Merge pull request #50 from wjakethompson/overall-pvalues

wjakethompson · web-flow · commit bacc0a799b51 · 2024-08-12T15:44:42.000-05:00
PPMC for overall item p-values
diff --git a/R/extract.R b/R/extract.R
@@ -77,6 +77,10 @@ measr_extract <- function(model, ...) {
 #'     Model fit information must first be added to the model using [add_fit()].
 #'   * `ppmc_odds_ratio_flags`: A subset of the PPMC odds ratios where the _ppp_
 #'     is outside the specified `ppmc_interval`.
+#'   * `ppmc_pvalue`: The observed and posterior predicted proportion of correct
+#'     responses to each item. See [fit_ppmc()] for details.
+#'   * `ppmc_pvalue_flags`: A subset of the PPMC proportion correct values where
+#'     the _ppp_ is outside the specified `ppmc_interval`.
 #'   * `loo`: The leave-one-out cross validation results. See [loo::loo()] for
 #'     details. The information criterion must first be added to the model using
 #'     [add_criterion()].
@@ -146,6 +150,8 @@ measr_extract.measrdcm <- function(model, what, ...) {
     ppmc_conditional_prob_flags = dcm_extract_ppmc_cond_prob(model, ...),
     ppmc_odds_ratio = extract_or(model, ppmc_interval = NULL),
     ppmc_odds_ratio_flags = extract_or(model, ...),
+    ppmc_pvalue = dcm_extract_ppmc_pvalue(model, ppmc_interval = NULL),
+    ppmc_pvalue_flags = dcm_extract_ppmc_pvalue(model, ...),
     loo = extract_info_crit(model, "loo"),
     waic = extract_info_crit(model, "waic"),
     pattern_reliability = dcm_extract_patt_reli(model),
diff --git a/R/ppmc.R b/R/ppmc.R
@@ -47,9 +47,10 @@
 #' At the item level, we can calculate the conditional probability that a
 #' respondent in each class provides a correct response (`item_fit =
 #' "conditional_prob"`) as described by Thompson (2019) and Sinharay & Almond
-#' (2007). We can also calculate the odds ratio for each pair of items
-#' (`item_fit = "odds_ratio"`) as described by Park et al. (2015) and Sinharay
-#' et al. (2006).
+#' (2007) or the overall proportion correct for an item (`item_fit = "pvalue"`),
+#' as described by Thompson (2019). We can also calculate the odds ratio for
+#' each pair of items (`item_fit = "odds_ratio"`) as described by Park et al.
+#' (2015) and Sinharay et al. (2006).
 #'
 #' @return A list with two elements, "model_fit" and "item_fit". If either
 #'   `model_fit = NULL` or `item_fit = NULL` in the function call, this will be
@@ -104,7 +105,7 @@
 fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
                      return_draws = 0,
                      model_fit = c("raw_score"),
-                     item_fit = c("conditional_prob", "odds_ratio"),
+                     item_fit = c("conditional_prob", "odds_ratio", "pvalue"),
                      force = FALSE) {
   model <- check_model(model, required_class = "measrdcm", name = "object")
   total_draws <- posterior::ndraws(posterior::as_draws(model))
@@ -195,7 +196,7 @@ fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
                    post_data = post_data,
                    probs = probs,
                    return_draws = return_draws,
-                   type = model_fit)
+                   type = check_ppmc$args$model_fit)
   } else {
     NULL
   }
@@ -213,13 +214,26 @@ fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
                   pi_draws = pi_draws,
                   probs = probs,
                   return_draws = return_draws,
-                  type = item_fit)
+                  type = check_ppmc$args$item_fit)
   } else {
     NULL
   }
 
-  ret_list <- list(model_fit = model_level_fit,
-                   item_fit = item_level_fit)
+  ret_list <- if (is.null(model$fit$ppmc)) {
+    list(model_fit = model_level_fit,
+         item_fit = item_level_fit)
+  } else {
+    model$fit$ppmc$model_fit[names(model_level_fit)] <- NULL
+    model$fit$ppmc$item_fit[names(item_level_fit)] <- NULL
+
+    utils::modifyList(model$fit$ppmc,
+                      list(model_fit = model_level_fit,
+                           item_fit = item_level_fit))
+  }
+
+  ret_list <- list(model_fit = ret_list$model_fit[model_fit],
+                   item_fit = ret_list$item_fit[item_fit])
+
   ret_list[sapply(ret_list, is.null)] <- NULL
 
   return(ret_list)
@@ -312,9 +326,15 @@ ppmc_item_fit <- function(model, post_data, attr, resp_prob, pi_draws, probs,
   } else {
     NULL
   }
+  pvalue <- if ("pvalue" %in% type) {
+    ppmc_item_pvalue(model = model, post_data = post_data, probs, return_draws)
+  } else {
+    NULL
+  }
 
   item_res <- list(conditional_prob = cond_prob,
-                   odds_ratio = odds_ratio)
+                   odds_ratio = odds_ratio,
+                   pvalue = pvalue)
   item_res[sapply(item_res, is.null)] <- NULL
 
   return(item_res)
@@ -420,7 +440,8 @@ ppmc_conditional_probs <- function(model, attr, resp_prob, pi_draws, probs,
                        dplyr::mutate(item_id = as.integer(.data$item)),
                      by = "item_id", relationship = "many-to-one") %>%
     dplyr::select(-"item_id", -"class_id") %>%
-    dplyr::relocate("item", "class", .before = 1)
+    dplyr::relocate("item", "class", .before = 1) %>%
+    dplyr::rename(!!model$data$item_id := "item")
 
   return(cond_pval_res)
 }
@@ -513,3 +534,51 @@ pw_or <- function(dat) {
     dplyr::mutate(or = or)
   return(pwor)
 }
+
+ppmc_item_pvalue <- function(model, post_data, probs, return_draws) {
+  obs_pvalue <- model$data$data %>%
+    dplyr::mutate(item = as.numeric(.data$item_id)) %>%
+    dplyr::summarize(obs_pvalue = mean(.data$score),
+                     .by = c("item", "item_id"))
+
+  pval_res <- post_data %>%
+    dplyr::summarize(pvalue = mean(.data$value),
+                     .by = c(".chain", ".iteration", ".draw", "item")) %>%
+    tidyr::nest(samples = -"item") %>%
+    dplyr::left_join(obs_pvalue, by = "item", relationship = "one-to-one") %>%
+    dplyr::mutate(ppmc_mean = vapply(.data$samples,
+                                     function(.x) mean(.x$pvalue),
+                                     double(1)),
+                  bounds = lapply(.data$samples,
+                                  function(.x, probs) {
+                                    tibble::as_tibble_row(
+                                      stats::quantile(.x$pvalue,
+                                                      probs = probs,
+                                                      na.rm = TRUE)
+                                    )
+                                  },
+                                  probs = probs),
+                  ppp = mapply(function(exp, obs) {
+                    mean(exp$pvalue > obs)
+                  },
+                  .data$samples, .data$obs_pvalue)) %>%
+    tidyr::unnest("bounds")
+
+  if (return_draws > 0) {
+    pval_res <- pval_res %>%
+      dplyr::relocate("samples", .before = "ppp") %>%
+      dplyr::mutate(
+        samples = lapply(.data$samples,
+                         function(x) {
+                           x %>%
+                             dplyr::slice_sample(prop = return_draws) %>%
+                             dplyr::pull("pvalue")
+                         }))
+  } else {
+    pval_res <- dplyr::select(pval_res, -"samples")
+  }
+
+  pval_res %>%
+    dplyr::select(-"item") %>%
+    dplyr::rename(!!model$data$item_id := "item_id")
+}
diff --git a/R/utils-extract.R b/R/utils-extract.R
@@ -187,6 +187,31 @@ dcm_extract_ppmc_cond_prob <- function(model, ppmc_interval = 0.95) {
   return(res)
 }
 
+dcm_extract_ppmc_pvalue <- function(model, ppmc_interval = 0.95) {
+  if (!is.null(ppmc_interval)) {
+    ppmc_interval <- check_double(ppmc_interval, lb = 0, ub = 1,
+                                  name = "ppmc_interval")
+  }
+
+  if (is.null(model$fit$ppmc$item_fit$pvalue)) {
+    rlang::abort(message = glue::glue("Model fit information must be ",
+                                      "added to a model object before ",
+                                      "p-values can be ",
+                                      "extracted. See `?add_fit()`."))
+  }
+
+  res <- if (is.null(ppmc_interval)) {
+    model$fit$ppmc$item_fit$pvalue
+  } else {
+    model$fit$ppmc$item_fit$pvalue %>%
+      dplyr::filter(!dplyr::between(.data$ppp,
+                                    (1 - ppmc_interval) / 2,
+                                    1 - ((1 - ppmc_interval) / 2)))
+  }
+
+  return(res)
+}
+
 dcm_extract_patt_reli <- function(model) {
   if (identical(model$reliability, list())) {
     rlang::abort(message = glue::glue("Reliability information must be ",
diff --git a/man/fit_ppmc.Rd b/man/fit_ppmc.Rd
diff --git a/man/measr_extract.Rd b/man/measr_extract.Rd
diff --git a/tests/testthat/test-mcmc.R b/tests/testthat/test-mcmc.R
@@ -22,7 +22,7 @@ if (!identical(Sys.getenv("NOT_CRAN"), "true")) {
         attribute_structure = "independent",
         method = "mcmc", seed = 63277, backend = "rstan",
         iter = 1500, warmup = 1000, chains = 2,
-        cores = 2,
+        cores = 2, refresh = 0,
         prior = c(prior(beta(5, 17), class = "slip"),
                   prior(beta(5, 17), class = "guess")))
     )
@@ -202,9 +202,9 @@ test_that("ppmc works", {
 
   test_ppmc <- fit_ppmc(cmds_mdm_lcdm, ndraws = 200, return_draws = 0.9,
                         probs = c(0.055, 0.945),
-                        model_fit = NULL, item_fit = "odds_ratio")
+                        model_fit = NULL, item_fit = c("odds_ratio", "pvalue"))
   expect_equal(names(test_ppmc), c("item_fit"))
-  expect_equal(names(test_ppmc$item_fit), "odds_ratio")
+  expect_equal(names(test_ppmc$item_fit), c("odds_ratio", "pvalue"))
   expect_s3_class(test_ppmc$item_fit$odds_ratio, "tbl_df")
   expect_equal(nrow(test_ppmc$item_fit$odds_ratio), 6L)
   expect_equal(colnames(test_ppmc$item_fit$odds_ratio),
@@ -218,20 +218,35 @@ test_that("ppmc works", {
                       length, integer(1)),
                rep(180, 6))
 
+  expect_s3_class(test_ppmc$item_fit$pvalue, "tbl_df")
+  expect_equal(nrow(test_ppmc$item_fit$pvalue), 4)
+  expect_equal(colnames(test_ppmc$item_fit$pvalue),
+               c("item", "obs_pvalue", "ppmc_mean", "5.5%", "94.5%",
+                 "samples", "ppp"))
+  expect_equal(as.character(test_ppmc$item_fit$pvalue$item),
+               paste0("mdm", 1:4))
+  expect_equal(vapply(test_ppmc$item_fit$pvalue$samples,
+                      length, double(1)),
+               rep(180, 4))
+
   test_ppmc <- fit_ppmc(cmds_mdm_lcdm, ndraws = 1, return_draws = 0,
                         model_fit = "raw_score",
-                        item_fit = c("conditional_prob", "odds_ratio"))
+                        item_fit = c("conditional_prob", "odds_ratio",
+                                     "pvalue"))
   expect_equal(names(test_ppmc), c("model_fit", "item_fit"))
   expect_equal(names(test_ppmc$model_fit), "raw_score")
   expect_equal(colnames(test_ppmc$model_fit$raw_score),
                c("obs_chisq", "ppmc_mean", "2.5%", "97.5%", "ppp"))
-  expect_equal(names(test_ppmc$item_fit), c("conditional_prob", "odds_ratio"))
+  expect_equal(names(test_ppmc$item_fit),
+               c("conditional_prob", "odds_ratio", "pvalue"))
   expect_equal(colnames(test_ppmc$item_fit$conditional_prob),
                c("item", "class", "obs_cond_pval", "ppmc_mean", "2.5%", "97.5%",
                  "ppp"))
   expect_equal(colnames(test_ppmc$item_fit$odds_ratio),
                c("item_1", "item_2", "obs_or", "ppmc_mean", "2.5%", "97.5%",
                  "ppp"))
+  expect_equal(colnames(test_ppmc$item_fit$pvalue),
+               c("item", "obs_pvalue", "ppmc_mean", "2.5%", "97.5%", "ppp"))
 })
 
 test_that("ppmc extraction errors", {
@@ -302,23 +317,39 @@ test_that("model fit can be added", {
                c("item", "class", "obs_cond_pval", "ppmc_mean", "5.5%", "94.5%",
                  "ppp"))
 
+  # now calculate conditional probs and overall pvalue - overall is new, but
+  # conditional prob should use stored value
+  test_ppmc <- fit_ppmc(test_model, model_fit = NULL,
+                        item_fit = c("conditional_prob", "pvalue"))
+  expect_equal(names(test_ppmc), "item_fit")
+  expect_equal(names(test_ppmc$item_fit), c("conditional_prob", "pvalue"))
+  expect_identical(test_model$fit$ppmc$item_fit$conditional_prob,
+                   test_ppmc$item_fit$conditional_prob)
+  expect_equal(names(test_ppmc$item_fit$pvalue),
+               c("item", "obs_pvalue", "ppmc_mean", "2.5%", "97.5%", "ppp"))
+
   # overwrite just conditional prob with samples and new probs
+  # add overall p-values
   test_model <- add_fit(test_model, method = "ppmc", overwrite = TRUE,
-                        model_fit = NULL, item_fit = "conditional_prob",
+                        model_fit = NULL,
+                        item_fit = c("conditional_prob", "pvalue"),
                         return_draws = 0.2, probs = c(.1, .9))
   expect_equal(names(test_model$fit), c("m2", "ppmc"))
   expect_equal(names(test_model$fit$ppmc), c("item_fit", "model_fit"))
   expect_equal(names(test_model$fit$ppmc$model_fit), "raw_score")
   expect_equal(names(test_model$fit$ppmc$model_fit$raw_score),
                c("obs_chisq", "ppmc_mean", "5.5%", "94.5%", "ppp"))
   expect_equal(names(test_model$fit$ppmc$item_fit),
-               c("odds_ratio", "conditional_prob"))
+               c("odds_ratio", "conditional_prob", "pvalue"))
   expect_equal(names(test_model$fit$ppmc$item_fit$odds_ratio),
                c("item_1", "item_2", "obs_or", "ppmc_mean", "2.5%", "97.5%",
                  "samples", "ppp"))
   expect_equal(names(test_model$fit$ppmc$item_fit$conditional_prob),
                c("item", "class", "obs_cond_pval", "ppmc_mean", "10%", "90%",
                  "samples", "ppp"))
+  expect_equal(names(test_model$fit$ppmc$item_fit$pvalue),
+               c("item", "obs_pvalue", "ppmc_mean", "10%", "90%",
+                 "samples", "ppp"))
 
   # test extraction
   rs_check <- measr_extract(test_model, "ppmc_raw_score")
@@ -343,6 +374,16 @@ test_that("model fit can be added", {
   expect_equal(measr_extract(test_model, "ppmc_odds_ratio_flags",
                              ppmc_interval = 0.8),
                dplyr::filter(or_check, ppp <= 0.1 | ppp >= 0.9))
+
+  pval_check <- measr_extract(test_model, "ppmc_pvalue")
+  expect_equal(pval_check,
+               test_model$fit$ppmc$item_fit$pvalue)
+  expect_equal(measr_extract(test_model, "ppmc_pvalue_flags",
+                             ppmc_interval = 0.95),
+               dplyr::filter(pval_check, ppp <= 0.025 | ppp >= 0.975))
+  expect_equal(measr_extract(test_model, "ppmc_pvalue_flags",
+                             ppmc_interval = 0.6),
+               dplyr::filter(pval_check, ppp <= 0.2 | ppp >= 0.8))
 })
 
 test_that("respondent probabilities are correct", {