|
| 1 | +#' Item, attribute, and test-level discrimination indices |
| 2 | +#' |
| 3 | +#' The cognitive diagnostic index (CDI) is a measure of how well an assessment |
| 4 | +#' is able to distinguish between attribute profiles. The index was originally |
| 5 | +#' proposed by Henson & Douglas (2005) for item- and test-level discrimination, |
| 6 | +#' and then expanded by Henson et al. (2008) to include attribute-level |
| 7 | +#' discrimination indices. |
| 8 | +#' |
| 9 | +#' @param model The estimated model to be evaluated. |
| 10 | +#' @param weight_prevalence Logical indicating whether the discrimination |
| 11 | +#' indices should be weighted by the prevalence of the attribute profiles. See |
| 12 | +#' details for additional information. |
| 13 | +#' |
| 14 | +#' @details |
| 15 | +#' Henson et al. (2008) described two attribute-level discrimination indices, |
| 16 | +#' \eqn{\mathbf{d}_{(A)\mathbf{\cdot}}} (Equation 8) and |
| 17 | +#' \eqn{\mathbf{d}_{(B)\mathbf{\cdot}}} (Equation 13), which are similar in that |
| 18 | +#' both are the sum of item-level discrimination indices. |
| 19 | +#' In both cases, item-level discrimination indices are calculated as the |
| 20 | +#' average of Kullback-Leibler information for all pairs of attributes profiles |
| 21 | +#' for the item. |
| 22 | +#' The item-level indices are then summed to achieve the test-level |
| 23 | +#' discrimination index for each attribute, or the test overall. |
| 24 | +#' However, whereas \eqn{\mathbf{d}_{(A)\mathbf{\cdot}}} is an unweighted |
| 25 | +#' average of the Kullback-Leibler information, |
| 26 | +#' \eqn{\mathbf{d}_{(B)\mathbf{\cdot}}} is a weighted average, where the weight |
| 27 | +#' is defined by the prevalence of each profile (i.e., |
| 28 | +#' [`measr_extract(model, what = "strc_param")`][measr_extract()]). |
| 29 | +#' |
| 30 | +#' @return A list with two elements: |
| 31 | +#' * `item_discrimination`: A [tibble][tibble::tibble-package] with one row |
| 32 | +#' per item containing the CDI for the item and any relevant attributes. |
| 33 | +#' * `test_discrimination`: A [tibble][tibble::tibble-package] with one row |
| 34 | +#' containing the total CDI for the assessment and for each attribute. |
| 35 | +#' @export |
| 36 | +#' |
| 37 | +#' @references Henson, R., & Douglas, J. (2005). Test construction for cognitive |
| 38 | +#' diagnosis. *Applied Psychological Measurement, 29*(4), 262-277. |
| 39 | +#' \doi{10.1177/0146621604272623} |
| 40 | +#' @references Henson, R., Roussos, L., Douglas, J., & Xuming, H. (2008). |
| 41 | +#' Cognitive diagnostic attribute-level discrimination indices. |
| 42 | +#' *Applied Psychological Measurement, 32*(4), 275-288. |
| 43 | +#' \doi{10.1177/0146621607302478} |
| 44 | +#' @examplesIf measr_examples() |
| 45 | +#' rstn_ecpe_lcdm <- measr_dcm( |
| 46 | +#' data = ecpe_data, missing = NA, qmatrix = ecpe_qmatrix, |
| 47 | +#' resp_id = "resp_id", item_id = "item_id", type = "lcdm", |
| 48 | +#' method = "optim", seed = 63277, backend = "rstan" |
| 49 | +#' ) |
| 50 | +#' |
| 51 | +#' cdi(rstn_ecpe_lcdm) |
| 52 | +cdi <- function(model, weight_prevalence = TRUE) { |
| 53 | + model <- check_model(model, required_class = "measrfit", name = "model") |
| 54 | + weight_prevalence <- check_logical(weight_prevalence, |
| 55 | + name = "weight_prevalence") |
| 56 | + |
| 57 | + stan_draws <- switch(model$method, |
| 58 | + "mcmc" = get_mcmc_draws(model), |
| 59 | + "optim" = get_optim_draws(model)) |
| 60 | + |
| 61 | + pi_matrix <- stan_draws %>% |
| 62 | + posterior::subset_draws(variable = "pi") %>% |
| 63 | + posterior::as_draws_df() %>% |
| 64 | + tibble::as_tibble() %>% |
| 65 | + tidyr::pivot_longer(cols = -c(".chain", ".iteration", ".draw")) %>% |
| 66 | + dplyr::summarize(value = mean(.data$value), .by = "name") %>% |
| 67 | + tidyr::separate_wider_regex( |
| 68 | + cols = "name", |
| 69 | + patterns = c("pi\\[", item = "[0-9]*", ",", class = "[0-9]*", "\\]") |
| 70 | + ) %>% |
| 71 | + dplyr::mutate(item = as.integer(.data$item), |
| 72 | + class = as.integer(.data$class)) |
| 73 | + |
| 74 | + hamming <- profile_hamming( |
| 75 | + dplyr::select(measr_extract(model, "classes"), -"class") |
| 76 | + ) |
| 77 | + att_names <- hamming %>% |
| 78 | + dplyr::select(-c("profile_1", "profile_2", "hamming")) %>% |
| 79 | + colnames() |
| 80 | + |
| 81 | + item_discrim <- tidyr::crossing(item = unique(pi_matrix$item), |
| 82 | + profile_1 = unique(pi_matrix$class), |
| 83 | + profile_2 = unique(pi_matrix$class)) %>% |
| 84 | + dplyr::left_join(pi_matrix, by = c("item", "profile_1" = "class"), |
| 85 | + relationship = "many-to-one") %>% |
| 86 | + dplyr::rename("prob_1" = "value") %>% |
| 87 | + dplyr::left_join(pi_matrix, by = c("item", "profile_2" = "class"), |
| 88 | + relationship = "many-to-one") %>% |
| 89 | + dplyr::rename("prob_2" = "value") %>% |
| 90 | + dplyr::mutate(kli = (.data$prob_1 * log(.data$prob_1 / .data$prob_2)) + |
| 91 | + ((1 - .data$prob_1) * |
| 92 | + log((1 - .data$prob_1) / (1 - .data$prob_2)))) %>% |
| 93 | + dplyr::left_join(hamming, by = c("profile_1", "profile_2"), |
| 94 | + relationship = "many-to-one") %>% |
| 95 | + dplyr::mutate(dplyr::across(dplyr::where(is.logical), |
| 96 | + \(x) { |
| 97 | + dplyr::case_when( |
| 98 | + x & .data$hamming == 1L ~ TRUE, |
| 99 | + .default = NA |
| 100 | + ) |
| 101 | + }), |
| 102 | + dplyr::across(dplyr::where(is.logical), |
| 103 | + \(x) as.integer(x) * .data$kli)) %>% |
| 104 | + dplyr::filter(.data$hamming > 0) %>% |
| 105 | + dplyr::mutate(weight = 1 / .data$hamming) |
| 106 | + |
| 107 | + if (weight_prevalence) { |
| 108 | + vc <- stan_draws %>% |
| 109 | + posterior::subset_draws(variable = "log_Vc") %>% |
| 110 | + posterior::as_draws_df() %>% |
| 111 | + tibble::as_tibble() %>% |
| 112 | + tidyr::pivot_longer(cols = -c(".chain", ".iteration", ".draw")) %>% |
| 113 | + dplyr::summarize(value = mean(.data$value), .by = "name") %>% |
| 114 | + dplyr::mutate(value = exp(.data$value)) %>% |
| 115 | + tidyr::separate_wider_regex( |
| 116 | + cols = "name", |
| 117 | + patterns = c("log_Vc\\[", class = "[0-9]*", "\\]") |
| 118 | + ) %>% |
| 119 | + dplyr::mutate(class = as.integer(.data$class)) |
| 120 | + |
| 121 | + item_discrim <- item_discrim %>% |
| 122 | + dplyr::left_join(vc, by = c("profile_1" = "class")) %>% |
| 123 | + dplyr::mutate(weight = .data$weight * .data$value) %>% |
| 124 | + dplyr::select(-"value") |
| 125 | + } |
| 126 | + |
| 127 | + item_discrim <- item_discrim %>% |
| 128 | + dplyr::summarize( |
| 129 | + overall = stats::weighted.mean(.data$kli, w = .data$weight), |
| 130 | + dplyr::across( |
| 131 | + dplyr::all_of(att_names), |
| 132 | + \(x) stats::weighted.mean(x, w = .data$weight, na.rm = TRUE) |
| 133 | + ), |
| 134 | + .by = "item" |
| 135 | + ) |
| 136 | + |
| 137 | + test_discrim <- item_discrim %>% |
| 138 | + dplyr::summarize(dplyr::across(-"item", sum)) |
| 139 | + |
| 140 | + return( |
| 141 | + list(item_discrimination = item_discrim, |
| 142 | + test_discrimination = test_discrim) |
| 143 | + ) |
| 144 | +} |
| 145 | + |
| 146 | +profile_hamming <- function(profiles) { |
| 147 | + profile_combos <- tidyr::crossing(profile_1 = seq_len(nrow(profiles)), |
| 148 | + profile_2 = seq_len(nrow(profiles))) |
| 149 | + |
| 150 | + |
| 151 | + hamming <- mapply(hamming_distance, profile_combos$profile_1, |
| 152 | + profile_combos$profile_2, |
| 153 | + MoreArgs = list(profiles = profiles), |
| 154 | + SIMPLIFY = FALSE, USE.NAMES = FALSE) %>% |
| 155 | + dplyr::bind_rows() |
| 156 | + |
| 157 | + dplyr::bind_cols(profile_combos, hamming) |
| 158 | +} |
| 159 | + |
| 160 | +hamming_distance <- function(prof1, prof2, profiles) { |
| 161 | + pattern1 <- profiles[prof1, ] |
| 162 | + pattern2 <- profiles[prof2, ] |
| 163 | + |
| 164 | + pattern1 %>% |
| 165 | + tidyr::pivot_longer(cols = dplyr::everything(), |
| 166 | + names_to = "att", values_to = "patt1") %>% |
| 167 | + dplyr::left_join(tidyr::pivot_longer(pattern2, cols = dplyr::everything(), |
| 168 | + names_to = "att", values_to = "patt2"), |
| 169 | + by = "att", relationship = "one-to-one") %>% |
| 170 | + dplyr::mutate(mismatch = .data$patt1 != .data$patt2, |
| 171 | + hamming = sum(.data$mismatch)) %>% |
| 172 | + dplyr::select("att", "mismatch", "hamming") %>% |
| 173 | + tidyr::pivot_wider(names_from = "att", values_from = "mismatch") |
| 174 | +} |
0 commit comments