wjakethompson
diff --git a/‎.github/workflows/lint.yaml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/lint.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.lintr
Lines changed: 2 additions & 1 deletion b/‎.lintr
Lines changed: 2 additions & 1 deletion
diff --git a/‎NAMESPACE
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/discrimination.R
Lines changed: 174 additions & 0 deletions b/‎R/discrimination.R
Lines changed: 174 additions & 0 deletions
diff --git a/‎R/model-evaluation.R
Lines changed: 4 additions & 3 deletions b/‎R/model-evaluation.R
Lines changed: 4 additions & 3 deletions
diff --git a/‎R/reliability.R
Lines changed: 30 additions & 4 deletions b/‎R/reliability.R
Lines changed: 30 additions & 4 deletions
diff --git a/‎R/utils-reliability.R
Lines changed: 13 additions & 3 deletions b/‎R/utils-reliability.R
Lines changed: 13 additions & 3 deletions
diff --git a/‎inst/WORDLIST
Lines changed: 5 additions & 0 deletions b/‎inst/WORDLIST
Lines changed: 5 additions & 0 deletions
@@ -22,7 +22,7 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::lintr, local::.
+          extra-packages: r-lib/lintr, local::.
           needs: lint
 
       - name: Lint
 
@@ -1,5 +1,6 @@
 linters: linters_with_defaults(
-    indentation_linter = NULL
+    indentation_linter = NULL,
+    return_linter = NULL
   )
 exclusions: list(
     "R/stanmodels.R",
 
@@ -24,6 +24,7 @@ export(as_draws)
 export(as_label)
 export(as_measrfit)
 export(as_name)
+export(cdi)
 export(create_profiles)
 export(default_dcm_priors)
 export(enquo)
 
@@ -0,0 +1,174 @@
+#' Item, attribute, and test-level discrimination indices
+#'
+#' The cognitive diagnostic index (CDI) is a measure of how well an assessment
+#' is able to distinguish between attribute profiles. The index was originally
+#' proposed by Henson & Douglas (2005) for item- and test-level discrimination,
+#' and then expanded by Henson et al. (2008) to include attribute-level
+#' discrimination indices.
+#'
+#' @param model The estimated model to be evaluated.
+#' @param weight_prevalence Logical indicating whether the discrimination
+#'   indices should be weighted by the prevalence of the attribute profiles. See
+#'   details for additional information.
+#'
+#' @details
+#' Henson et al. (2008) described two attribute-level discrimination indices,
+#' \eqn{\mathbf{d}_{(A)\mathbf{\cdot}}} (Equation 8) and
+#' \eqn{\mathbf{d}_{(B)\mathbf{\cdot}}} (Equation 13), which are similar in that
+#' both are the sum of item-level discrimination indices.
+#' In both cases, item-level discrimination indices are calculated as the
+#' average of Kullback-Leibler information for all pairs of attributes profiles
+#' for the item.
+#' The item-level indices are then summed to achieve the test-level
+#' discrimination index for each attribute, or the test overall.
+#' However, whereas \eqn{\mathbf{d}_{(A)\mathbf{\cdot}}} is an unweighted
+#' average of the Kullback-Leibler information,
+#' \eqn{\mathbf{d}_{(B)\mathbf{\cdot}}} is a weighted average, where the weight
+#' is defined by the prevalence of each profile (i.e.,
+#' [`measr_extract(model, what = "strc_param")`][measr_extract()]).
+#'
+#' @return A list with two elements:
+#'   * `item_discrimination`: A [tibble][tibble::tibble-package] with one row
+#'     per item containing the CDI for the item and any relevant attributes.
+#'   * `test_discrimination`: A [tibble][tibble::tibble-package] with one row
+#'     containing the total CDI for the assessment and for each attribute.
+#' @export
+#'
+#' @references Henson, R., & Douglas, J. (2005). Test construction for cognitive
+#'   diagnosis. *Applied Psychological Measurement, 29*(4), 262-277.
+#'   \doi{10.1177/0146621604272623}
+#' @references Henson, R., Roussos, L., Douglas, J., & Xuming, H. (2008).
+#'   Cognitive diagnostic attribute-level discrimination indices.
+#'   *Applied Psychological Measurement, 32*(4), 275-288.
+#'   \doi{10.1177/0146621607302478}
+#' @examplesIf measr_examples()
+#' rstn_ecpe_lcdm <- measr_dcm(
+#'   data = ecpe_data, missing = NA, qmatrix = ecpe_qmatrix,
+#'   resp_id = "resp_id", item_id = "item_id", type = "lcdm",
+#'   method = "optim", seed = 63277, backend = "rstan"
+#' )
+#'
+#' cdi(rstn_ecpe_lcdm)
+cdi <- function(model, weight_prevalence = TRUE) {
+  model <- check_model(model, required_class = "measrfit", name = "model")
+  weight_prevalence <- check_logical(weight_prevalence,
+                                     name = "weight_prevalence")
+
+  stan_draws <- switch(model$method,
+                       "mcmc" = get_mcmc_draws(model),
+                       "optim" = get_optim_draws(model))
+
+  pi_matrix <- stan_draws %>%
+    posterior::subset_draws(variable = "pi") %>%
+    posterior::as_draws_df() %>%
+    tibble::as_tibble() %>%
+    tidyr::pivot_longer(cols = -c(".chain", ".iteration", ".draw")) %>%
+    dplyr::summarize(value = mean(.data$value), .by = "name") %>%
+    tidyr::separate_wider_regex(
+      cols = "name",
+      patterns = c("pi\\[", item = "[0-9]*", ",", class = "[0-9]*", "\\]")
+    ) %>%
+    dplyr::mutate(item = as.integer(.data$item),
+                  class = as.integer(.data$class))
+
+  hamming <- profile_hamming(
+    dplyr::select(measr_extract(model, "classes"), -"class")
+  )
+  att_names <- hamming %>%
+    dplyr::select(-c("profile_1", "profile_2", "hamming")) %>%
+    colnames()
+
+  item_discrim <- tidyr::crossing(item = unique(pi_matrix$item),
+                                  profile_1 = unique(pi_matrix$class),
+                                  profile_2 = unique(pi_matrix$class)) %>%
+    dplyr::left_join(pi_matrix, by = c("item", "profile_1" = "class"),
+                     relationship = "many-to-one") %>%
+    dplyr::rename("prob_1" = "value") %>%
+    dplyr::left_join(pi_matrix, by = c("item", "profile_2" = "class"),
+                     relationship = "many-to-one") %>%
+    dplyr::rename("prob_2" = "value") %>%
+    dplyr::mutate(kli = (.data$prob_1 * log(.data$prob_1 / .data$prob_2)) +
+                    ((1 - .data$prob_1) *
+                       log((1 - .data$prob_1) / (1 - .data$prob_2)))) %>%
+    dplyr::left_join(hamming, by = c("profile_1", "profile_2"),
+                     relationship = "many-to-one") %>%
+    dplyr::mutate(dplyr::across(dplyr::where(is.logical),
+                                \(x) {
+                                  dplyr::case_when(
+                                    x & .data$hamming == 1L ~ TRUE,
+                                    .default = NA
+                                  )
+                                }),
+                  dplyr::across(dplyr::where(is.logical),
+                                \(x) as.integer(x) * .data$kli)) %>%
+    dplyr::filter(.data$hamming > 0) %>%
+    dplyr::mutate(weight = 1 / .data$hamming)
+
+  if (weight_prevalence) {
+    vc <- stan_draws %>%
+      posterior::subset_draws(variable = "log_Vc") %>%
+      posterior::as_draws_df() %>%
+      tibble::as_tibble() %>%
+      tidyr::pivot_longer(cols = -c(".chain", ".iteration", ".draw")) %>%
+      dplyr::summarize(value = mean(.data$value), .by = "name") %>%
+      dplyr::mutate(value = exp(.data$value)) %>%
+      tidyr::separate_wider_regex(
+        cols = "name",
+        patterns = c("log_Vc\\[", class = "[0-9]*", "\\]")
+      ) %>%
+      dplyr::mutate(class = as.integer(.data$class))
+
+    item_discrim <- item_discrim %>%
+      dplyr::left_join(vc, by = c("profile_1" = "class")) %>%
+      dplyr::mutate(weight = .data$weight * .data$value) %>%
+      dplyr::select(-"value")
+  }
+
+  item_discrim <- item_discrim %>%
+    dplyr::summarize(
+      overall = stats::weighted.mean(.data$kli, w = .data$weight),
+      dplyr::across(
+        dplyr::all_of(att_names),
+        \(x) stats::weighted.mean(x, w = .data$weight, na.rm = TRUE)
+      ),
+      .by = "item"
+    )
+
+  test_discrim <- item_discrim %>%
+    dplyr::summarize(dplyr::across(-"item", sum))
+
+  return(
+    list(item_discrimination = item_discrim,
+         test_discrimination = test_discrim)
+  )
+}
+
+profile_hamming <- function(profiles) {
+  profile_combos <- tidyr::crossing(profile_1 = seq_len(nrow(profiles)),
+                                    profile_2 = seq_len(nrow(profiles)))
+
+
+  hamming <- mapply(hamming_distance, profile_combos$profile_1,
+                    profile_combos$profile_2,
+                    MoreArgs = list(profiles = profiles),
+                    SIMPLIFY = FALSE, USE.NAMES = FALSE) %>%
+    dplyr::bind_rows()
+
+  dplyr::bind_cols(profile_combos, hamming)
+}
+
+hamming_distance <- function(prof1, prof2, profiles) {
+  pattern1 <- profiles[prof1, ]
+  pattern2 <- profiles[prof2, ]
+
+  pattern1 %>%
+    tidyr::pivot_longer(cols = dplyr::everything(),
+                        names_to = "att", values_to = "patt1") %>%
+    dplyr::left_join(tidyr::pivot_longer(pattern2, cols = dplyr::everything(),
+                                         names_to = "att", values_to = "patt2"),
+                     by = "att", relationship = "one-to-one") %>%
+    dplyr::mutate(mismatch = .data$patt1 != .data$patt2,
+                  hamming = sum(.data$mismatch)) %>%
+    dplyr::select("att", "mismatch", "hamming") %>%
+    tidyr::pivot_wider(names_from = "att", values_from = "mismatch")
+}
@@ -50,7 +50,8 @@
 #' `$reliability` element of the fitted model. Pattern level reliability is
 #' described by Cui et al. (2012). Classification reliability and posterior
 #' probability reliability are described by Johnson & Sinharay (2018, 2020),
-#' respectively. This function wraps [reliability()].
+#' respectively. This function wraps [reliability()]. Arguments supplied to
+#' `...` are passed to [reliability()].
 #'
 #' @return A modified [measrfit] object with the corresponding slot populated
 #'   with the specified information.
@@ -160,7 +161,7 @@ add_criterion <- function(x, criterion = c("loo", "waic"), overwrite = FALSE,
 
 #' @export
 #' @rdname model_evaluation
-add_reliability <- function(x, overwrite = FALSE, save = TRUE) {
+add_reliability <- function(x, overwrite = FALSE, save = TRUE, ...) {
   model <- check_model(x, required_class = "measrfit", name = "x")
   overwrite <- check_logical(overwrite, name = "overwrite")
   save <- check_logical(save, name = "force_save")
@@ -169,7 +170,7 @@ add_reliability <- function(x, overwrite = FALSE, save = TRUE) {
   run_reli <- length(model$reliability) == 0 || overwrite
 
   if (run_reli) {
-    model$reliability <- reliability(model)
+    model$reliability <- reliability(model, force = TRUE, ...)
   }
 
   # re-save model object (if applicable)
 
@@ -28,7 +28,11 @@ reliability <- function(model, ...) {
 #'
 #' @param threshold For `map_reliability`, the threshold applied to the
 #'  attribute-level probabilities for determining the binary attribute
-#'  classifications.
+#'  classifications. Should be a numeric vector of length 1 (the same threshold
+#'  is applied to all attributes), or length equal to the number of attributes.
+#'  If a named vector is supplied, names should match the attribute names in the
+#'  Q-matrix used to estimate the model. If unnamed, thresholds should be in the
+#'  order the attributes were defined in the Q-matrix.
 #'
 #' @details The pattern-level reliability (`pattern_reliability`) statistics are
 #' described in Cui et al. (2012). Attribute-level classification reliability
@@ -71,16 +75,38 @@ reliability <- function(model, ...) {
 #'
 #' reliability(rstn_mdm_lcdm)
 reliability.measrdcm <- function(model, ..., threshold = 0.5, force = FALSE) {
+  threshold <- check_double(threshold, lb = 0, ub = 1, inclusive = FALSE,
+                            name = "threshold")
+  force <- check_logical(force, name = "force")
+
+  att_names <- colnames(dplyr::select(model$data$qmatrix, -"item_id"))
+  if (length(threshold) == 1) {
+    threshold <- rep(threshold, times = length(att_names)) %>%
+      rlang::set_names(att_names)
+  } else if (length(threshold) == length(att_names)) {
+    if (is.null(names(threshold))) {
+      threshold <- rlang::set_names(threshold, att_names)
+    } else if (!all(names(threshold) %in% att_names)) {
+      bad_names <- setdiff(names(threshold), att_names)
+      rlang::abort(
+        message = glue::glue("Unknown attribute names provided: ",
+                             "{paste(bad_names, collapse = ', ')}")
+      )
+    }
+  } else {
+    rlang::abort(
+      message = glue::glue("`threshold` must be of length 1 or length ",
+                           "{length(att_names)} (the number of attributes).")
+    )
+  }
+
   if ((!is.null(model$reliability) && length(model$reliability) > 0) &&
       !force) {
     return(model$reliability)
   }
 
   # coerce model into a list of values required for reliability
   obj <- reli_list(model, threshold = threshold)
-  att_names <- model$data$qmatrix %>%
-    dplyr::select(-"item_id") %>%
-    colnames()
 
   tbl <- obj$acc
   p <- obj$prev
 
@@ -159,9 +159,19 @@ reli_list <- function(model, threshold) {
 
   # map estimates
   binary_att <- attr_probs %>%
-    dplyr::mutate(dplyr::across(dplyr::everything(),
-                                ~dplyr::case_when(.x >= threshold ~ 1L,
-                                                  TRUE ~ 0L))) %>%
+    tibble::rowid_to_column(var = "resp_id") %>%
+    tidyr::pivot_longer(cols = -"resp_id",
+                        names_to = "attribute", values_to = "probability") %>%
+    dplyr::left_join(tibble::enframe(threshold, name = "attribute",
+                                     value = "threshold"),
+                     by = "attribute",
+                     relationship = "many-to-one") %>%
+    dplyr::mutate(class = dplyr::case_when(.data$probability >=
+                                             .data$threshold ~ 1L,
+                                           .default = 0L)) %>%
+    dplyr::select("resp_id", "attribute", "class") %>%
+    tidyr::pivot_wider(names_from = "attribute", values_from = "class") %>%
+    dplyr::select(dplyr::all_of(names(threshold))) %>%
     as.matrix() %>%
     unname() %>%
     as.vector()
 
@@ -1,3 +1,4 @@
+CDI
 CDM
 CDMs
 CMD
@@ -17,7 +18,9 @@ Gelman
 Gierl
 HDCM
 Kruskal's
+Kullback
 LCDM
+Leibler
 Liu
 MDM
 MacReady
@@ -33,6 +36,7 @@ PPMCs
 Psychometrika
 RMSEA
 RStan
+Roussos
 Rtools
 Rupp
 SRMSR
@@ -45,6 +49,7 @@ WAIC
 Watanabe
 Xcode
 Xin
+Xuming
 Youden's
 al
 att
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`linters: linters_with_defaults(`
`2`		`- indentation_linter = NULL`
	`2`	`+ indentation_linter = NULL,`
	`3`	`+ return_linter = NULL`
`3`	`4`	`)`
`4`	`5`	`exclusions: list(`
`5`	`6`	`"R/stanmodels.R",`