Skip to content

Commit bacc0a7

Browse files
Merge pull request #50 from wjakethompson/overall-pvalues
PPMC for overall item p-values
2 parents faaf5d0 + b18f8c5 commit bacc0a7

File tree

6 files changed

+167
-21
lines changed

6 files changed

+167
-21
lines changed

R/extract.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ measr_extract <- function(model, ...) {
7777
#' Model fit information must first be added to the model using [add_fit()].
7878
#' * `ppmc_odds_ratio_flags`: A subset of the PPMC odds ratios where the _ppp_
7979
#' is outside the specified `ppmc_interval`.
80+
#' * `ppmc_pvalue`: The observed and posterior predicted proportion of correct
81+
#' responses to each item. See [fit_ppmc()] for details.
82+
#' * `ppmc_pvalue_flags`: A subset of the PPMC proportion correct values where
83+
#' the _ppp_ is outside the specified `ppmc_interval`.
8084
#' * `loo`: The leave-one-out cross validation results. See [loo::loo()] for
8185
#' details. The information criterion must first be added to the model using
8286
#' [add_criterion()].
@@ -146,6 +150,8 @@ measr_extract.measrdcm <- function(model, what, ...) {
146150
ppmc_conditional_prob_flags = dcm_extract_ppmc_cond_prob(model, ...),
147151
ppmc_odds_ratio = extract_or(model, ppmc_interval = NULL),
148152
ppmc_odds_ratio_flags = extract_or(model, ...),
153+
ppmc_pvalue = dcm_extract_ppmc_pvalue(model, ppmc_interval = NULL),
154+
ppmc_pvalue_flags = dcm_extract_ppmc_pvalue(model, ...),
149155
loo = extract_info_crit(model, "loo"),
150156
waic = extract_info_crit(model, "waic"),
151157
pattern_reliability = dcm_extract_patt_reli(model),

R/ppmc.R

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,10 @@
4747
#' At the item level, we can calculate the conditional probability that a
4848
#' respondent in each class provides a correct response (`item_fit =
4949
#' "conditional_prob"`) as described by Thompson (2019) and Sinharay & Almond
50-
#' (2007). We can also calculate the odds ratio for each pair of items
51-
#' (`item_fit = "odds_ratio"`) as described by Park et al. (2015) and Sinharay
52-
#' et al. (2006).
50+
#' (2007) or the overall proportion correct for an item (`item_fit = "pvalue"`),
51+
#' as described by Thompson (2019). We can also calculate the odds ratio for
52+
#' each pair of items (`item_fit = "odds_ratio"`) as described by Park et al.
53+
#' (2015) and Sinharay et al. (2006).
5354
#'
5455
#' @return A list with two elements, "model_fit" and "item_fit". If either
5556
#' `model_fit = NULL` or `item_fit = NULL` in the function call, this will be
@@ -104,7 +105,7 @@
104105
fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
105106
return_draws = 0,
106107
model_fit = c("raw_score"),
107-
item_fit = c("conditional_prob", "odds_ratio"),
108+
item_fit = c("conditional_prob", "odds_ratio", "pvalue"),
108109
force = FALSE) {
109110
model <- check_model(model, required_class = "measrdcm", name = "object")
110111
total_draws <- posterior::ndraws(posterior::as_draws(model))
@@ -195,7 +196,7 @@ fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
195196
post_data = post_data,
196197
probs = probs,
197198
return_draws = return_draws,
198-
type = model_fit)
199+
type = check_ppmc$args$model_fit)
199200
} else {
200201
NULL
201202
}
@@ -213,13 +214,26 @@ fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
213214
pi_draws = pi_draws,
214215
probs = probs,
215216
return_draws = return_draws,
216-
type = item_fit)
217+
type = check_ppmc$args$item_fit)
217218
} else {
218219
NULL
219220
}
220221

221-
ret_list <- list(model_fit = model_level_fit,
222-
item_fit = item_level_fit)
222+
ret_list <- if (is.null(model$fit$ppmc)) {
223+
list(model_fit = model_level_fit,
224+
item_fit = item_level_fit)
225+
} else {
226+
model$fit$ppmc$model_fit[names(model_level_fit)] <- NULL
227+
model$fit$ppmc$item_fit[names(item_level_fit)] <- NULL
228+
229+
utils::modifyList(model$fit$ppmc,
230+
list(model_fit = model_level_fit,
231+
item_fit = item_level_fit))
232+
}
233+
234+
ret_list <- list(model_fit = ret_list$model_fit[model_fit],
235+
item_fit = ret_list$item_fit[item_fit])
236+
223237
ret_list[sapply(ret_list, is.null)] <- NULL
224238

225239
return(ret_list)
@@ -312,9 +326,15 @@ ppmc_item_fit <- function(model, post_data, attr, resp_prob, pi_draws, probs,
312326
} else {
313327
NULL
314328
}
329+
pvalue <- if ("pvalue" %in% type) {
330+
ppmc_item_pvalue(model = model, post_data = post_data, probs, return_draws)
331+
} else {
332+
NULL
333+
}
315334

316335
item_res <- list(conditional_prob = cond_prob,
317-
odds_ratio = odds_ratio)
336+
odds_ratio = odds_ratio,
337+
pvalue = pvalue)
318338
item_res[sapply(item_res, is.null)] <- NULL
319339

320340
return(item_res)
@@ -420,7 +440,8 @@ ppmc_conditional_probs <- function(model, attr, resp_prob, pi_draws, probs,
420440
dplyr::mutate(item_id = as.integer(.data$item)),
421441
by = "item_id", relationship = "many-to-one") %>%
422442
dplyr::select(-"item_id", -"class_id") %>%
423-
dplyr::relocate("item", "class", .before = 1)
443+
dplyr::relocate("item", "class", .before = 1) %>%
444+
dplyr::rename(!!model$data$item_id := "item")
424445

425446
return(cond_pval_res)
426447
}
@@ -513,3 +534,51 @@ pw_or <- function(dat) {
513534
dplyr::mutate(or = or)
514535
return(pwor)
515536
}
537+
538+
ppmc_item_pvalue <- function(model, post_data, probs, return_draws) {
539+
obs_pvalue <- model$data$data %>%
540+
dplyr::mutate(item = as.numeric(.data$item_id)) %>%
541+
dplyr::summarize(obs_pvalue = mean(.data$score),
542+
.by = c("item", "item_id"))
543+
544+
pval_res <- post_data %>%
545+
dplyr::summarize(pvalue = mean(.data$value),
546+
.by = c(".chain", ".iteration", ".draw", "item")) %>%
547+
tidyr::nest(samples = -"item") %>%
548+
dplyr::left_join(obs_pvalue, by = "item", relationship = "one-to-one") %>%
549+
dplyr::mutate(ppmc_mean = vapply(.data$samples,
550+
function(.x) mean(.x$pvalue),
551+
double(1)),
552+
bounds = lapply(.data$samples,
553+
function(.x, probs) {
554+
tibble::as_tibble_row(
555+
stats::quantile(.x$pvalue,
556+
probs = probs,
557+
na.rm = TRUE)
558+
)
559+
},
560+
probs = probs),
561+
ppp = mapply(function(exp, obs) {
562+
mean(exp$pvalue > obs)
563+
},
564+
.data$samples, .data$obs_pvalue)) %>%
565+
tidyr::unnest("bounds")
566+
567+
if (return_draws > 0) {
568+
pval_res <- pval_res %>%
569+
dplyr::relocate("samples", .before = "ppp") %>%
570+
dplyr::mutate(
571+
samples = lapply(.data$samples,
572+
function(x) {
573+
x %>%
574+
dplyr::slice_sample(prop = return_draws) %>%
575+
dplyr::pull("pvalue")
576+
}))
577+
} else {
578+
pval_res <- dplyr::select(pval_res, -"samples")
579+
}
580+
581+
pval_res %>%
582+
dplyr::select(-"item") %>%
583+
dplyr::rename(!!model$data$item_id := "item_id")
584+
}

R/utils-extract.R

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,31 @@ dcm_extract_ppmc_cond_prob <- function(model, ppmc_interval = 0.95) {
187187
return(res)
188188
}
189189

190+
dcm_extract_ppmc_pvalue <- function(model, ppmc_interval = 0.95) {
191+
if (!is.null(ppmc_interval)) {
192+
ppmc_interval <- check_double(ppmc_interval, lb = 0, ub = 1,
193+
name = "ppmc_interval")
194+
}
195+
196+
if (is.null(model$fit$ppmc$item_fit$pvalue)) {
197+
rlang::abort(message = glue::glue("Model fit information must be ",
198+
"added to a model object before ",
199+
"p-values can be ",
200+
"extracted. See `?add_fit()`."))
201+
}
202+
203+
res <- if (is.null(ppmc_interval)) {
204+
model$fit$ppmc$item_fit$pvalue
205+
} else {
206+
model$fit$ppmc$item_fit$pvalue %>%
207+
dplyr::filter(!dplyr::between(.data$ppp,
208+
(1 - ppmc_interval) / 2,
209+
1 - ((1 - ppmc_interval) / 2)))
210+
}
211+
212+
return(res)
213+
}
214+
190215
dcm_extract_patt_reli <- function(model) {
191216
if (identical(model$reliability, list())) {
192217
rlang::abort(message = glue::glue("Reliability information must be ",

man/fit_ppmc.Rd

Lines changed: 5 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/measr_extract.Rd

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-mcmc.R

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ if (!identical(Sys.getenv("NOT_CRAN"), "true")) {
2222
attribute_structure = "independent",
2323
method = "mcmc", seed = 63277, backend = "rstan",
2424
iter = 1500, warmup = 1000, chains = 2,
25-
cores = 2,
25+
cores = 2, refresh = 0,
2626
prior = c(prior(beta(5, 17), class = "slip"),
2727
prior(beta(5, 17), class = "guess")))
2828
)
@@ -202,9 +202,9 @@ test_that("ppmc works", {
202202

203203
test_ppmc <- fit_ppmc(cmds_mdm_lcdm, ndraws = 200, return_draws = 0.9,
204204
probs = c(0.055, 0.945),
205-
model_fit = NULL, item_fit = "odds_ratio")
205+
model_fit = NULL, item_fit = c("odds_ratio", "pvalue"))
206206
expect_equal(names(test_ppmc), c("item_fit"))
207-
expect_equal(names(test_ppmc$item_fit), "odds_ratio")
207+
expect_equal(names(test_ppmc$item_fit), c("odds_ratio", "pvalue"))
208208
expect_s3_class(test_ppmc$item_fit$odds_ratio, "tbl_df")
209209
expect_equal(nrow(test_ppmc$item_fit$odds_ratio), 6L)
210210
expect_equal(colnames(test_ppmc$item_fit$odds_ratio),
@@ -218,20 +218,35 @@ test_that("ppmc works", {
218218
length, integer(1)),
219219
rep(180, 6))
220220

221+
expect_s3_class(test_ppmc$item_fit$pvalue, "tbl_df")
222+
expect_equal(nrow(test_ppmc$item_fit$pvalue), 4)
223+
expect_equal(colnames(test_ppmc$item_fit$pvalue),
224+
c("item", "obs_pvalue", "ppmc_mean", "5.5%", "94.5%",
225+
"samples", "ppp"))
226+
expect_equal(as.character(test_ppmc$item_fit$pvalue$item),
227+
paste0("mdm", 1:4))
228+
expect_equal(vapply(test_ppmc$item_fit$pvalue$samples,
229+
length, double(1)),
230+
rep(180, 4))
231+
221232
test_ppmc <- fit_ppmc(cmds_mdm_lcdm, ndraws = 1, return_draws = 0,
222233
model_fit = "raw_score",
223-
item_fit = c("conditional_prob", "odds_ratio"))
234+
item_fit = c("conditional_prob", "odds_ratio",
235+
"pvalue"))
224236
expect_equal(names(test_ppmc), c("model_fit", "item_fit"))
225237
expect_equal(names(test_ppmc$model_fit), "raw_score")
226238
expect_equal(colnames(test_ppmc$model_fit$raw_score),
227239
c("obs_chisq", "ppmc_mean", "2.5%", "97.5%", "ppp"))
228-
expect_equal(names(test_ppmc$item_fit), c("conditional_prob", "odds_ratio"))
240+
expect_equal(names(test_ppmc$item_fit),
241+
c("conditional_prob", "odds_ratio", "pvalue"))
229242
expect_equal(colnames(test_ppmc$item_fit$conditional_prob),
230243
c("item", "class", "obs_cond_pval", "ppmc_mean", "2.5%", "97.5%",
231244
"ppp"))
232245
expect_equal(colnames(test_ppmc$item_fit$odds_ratio),
233246
c("item_1", "item_2", "obs_or", "ppmc_mean", "2.5%", "97.5%",
234247
"ppp"))
248+
expect_equal(colnames(test_ppmc$item_fit$pvalue),
249+
c("item", "obs_pvalue", "ppmc_mean", "2.5%", "97.5%", "ppp"))
235250
})
236251

237252
test_that("ppmc extraction errors", {
@@ -302,23 +317,39 @@ test_that("model fit can be added", {
302317
c("item", "class", "obs_cond_pval", "ppmc_mean", "5.5%", "94.5%",
303318
"ppp"))
304319

320+
# now calculate conditional probs and overall pvalue - overall is new, but
321+
# conditional prob should use stored value
322+
test_ppmc <- fit_ppmc(test_model, model_fit = NULL,
323+
item_fit = c("conditional_prob", "pvalue"))
324+
expect_equal(names(test_ppmc), "item_fit")
325+
expect_equal(names(test_ppmc$item_fit), c("conditional_prob", "pvalue"))
326+
expect_identical(test_model$fit$ppmc$item_fit$conditional_prob,
327+
test_ppmc$item_fit$conditional_prob)
328+
expect_equal(names(test_ppmc$item_fit$pvalue),
329+
c("item", "obs_pvalue", "ppmc_mean", "2.5%", "97.5%", "ppp"))
330+
305331
# overwrite just conditional prob with samples and new probs
332+
# add overall p-values
306333
test_model <- add_fit(test_model, method = "ppmc", overwrite = TRUE,
307-
model_fit = NULL, item_fit = "conditional_prob",
334+
model_fit = NULL,
335+
item_fit = c("conditional_prob", "pvalue"),
308336
return_draws = 0.2, probs = c(.1, .9))
309337
expect_equal(names(test_model$fit), c("m2", "ppmc"))
310338
expect_equal(names(test_model$fit$ppmc), c("item_fit", "model_fit"))
311339
expect_equal(names(test_model$fit$ppmc$model_fit), "raw_score")
312340
expect_equal(names(test_model$fit$ppmc$model_fit$raw_score),
313341
c("obs_chisq", "ppmc_mean", "5.5%", "94.5%", "ppp"))
314342
expect_equal(names(test_model$fit$ppmc$item_fit),
315-
c("odds_ratio", "conditional_prob"))
343+
c("odds_ratio", "conditional_prob", "pvalue"))
316344
expect_equal(names(test_model$fit$ppmc$item_fit$odds_ratio),
317345
c("item_1", "item_2", "obs_or", "ppmc_mean", "2.5%", "97.5%",
318346
"samples", "ppp"))
319347
expect_equal(names(test_model$fit$ppmc$item_fit$conditional_prob),
320348
c("item", "class", "obs_cond_pval", "ppmc_mean", "10%", "90%",
321349
"samples", "ppp"))
350+
expect_equal(names(test_model$fit$ppmc$item_fit$pvalue),
351+
c("item", "obs_pvalue", "ppmc_mean", "10%", "90%",
352+
"samples", "ppp"))
322353

323354
# test extraction
324355
rs_check <- measr_extract(test_model, "ppmc_raw_score")
@@ -343,6 +374,16 @@ test_that("model fit can be added", {
343374
expect_equal(measr_extract(test_model, "ppmc_odds_ratio_flags",
344375
ppmc_interval = 0.8),
345376
dplyr::filter(or_check, ppp <= 0.1 | ppp >= 0.9))
377+
378+
pval_check <- measr_extract(test_model, "ppmc_pvalue")
379+
expect_equal(pval_check,
380+
test_model$fit$ppmc$item_fit$pvalue)
381+
expect_equal(measr_extract(test_model, "ppmc_pvalue_flags",
382+
ppmc_interval = 0.95),
383+
dplyr::filter(pval_check, ppp <= 0.025 | ppp >= 0.975))
384+
expect_equal(measr_extract(test_model, "ppmc_pvalue_flags",
385+
ppmc_interval = 0.6),
386+
dplyr::filter(pval_check, ppp <= 0.2 | ppp >= 0.8))
346387
})
347388

348389
test_that("respondent probabilities are correct", {

0 commit comments

Comments
 (0)