Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PPMC for overall item p-values #50

Merged
merged 3 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions R/extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ measr_extract <- function(model, ...) {
#' Model fit information must first be added to the model using [add_fit()].
#' * `ppmc_odds_ratio_flags`: A subset of the PPMC odds ratios where the _ppp_
#' is outside the specified `ppmc_interval`.
#' * `ppmc_pvalue`: The observed and posterior predicted proportion of correct
#' responses to each item. See [fit_ppmc()] for details.
#' * `ppmc_pvalue_flags`: A subset of the PPMC proportion correct values where
#' the _ppp_ is outside the specified `ppmc_interval`.
#' * `loo`: The leave-one-out cross validation results. See [loo::loo()] for
#' details. The information criterion must first be added to the model using
#' [add_criterion()].
Expand Down Expand Up @@ -146,6 +150,8 @@ measr_extract.measrdcm <- function(model, what, ...) {
ppmc_conditional_prob_flags = dcm_extract_ppmc_cond_prob(model, ...),
ppmc_odds_ratio = extract_or(model, ppmc_interval = NULL),
ppmc_odds_ratio_flags = extract_or(model, ...),
ppmc_pvalue = dcm_extract_ppmc_pvalue(model, ppmc_interval = NULL),
ppmc_pvalue_flags = dcm_extract_ppmc_pvalue(model, ...),
loo = extract_info_crit(model, "loo"),
waic = extract_info_crit(model, "waic"),
pattern_reliability = dcm_extract_patt_reli(model),
Expand Down
89 changes: 79 additions & 10 deletions R/ppmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@
#' At the item level, we can calculate the conditional probability that a
#' respondent in each class provides a correct response (`item_fit =
#' "conditional_prob"`) as described by Thompson (2019) and Sinharay & Almond
#' (2007). We can also calculate the odds ratio for each pair of items
#' (`item_fit = "odds_ratio"`) as described by Park et al. (2015) and Sinharay
#' et al. (2006).
#' (2007) or the overall proportion correct for an item (`item_fit = "pvalue"`),
#' as described by Thompson (2019). We can also calculate the odds ratio for
#' each pair of items (`item_fit = "odds_ratio"`) as described by Park et al.
#' (2015) and Sinharay et al. (2006).
#'
#' @return A list with two elements, "model_fit" and "item_fit". If either
#' `model_fit = NULL` or `item_fit = NULL` in the function call, this will be
Expand Down Expand Up @@ -104,7 +105,7 @@
fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
return_draws = 0,
model_fit = c("raw_score"),
item_fit = c("conditional_prob", "odds_ratio"),
item_fit = c("conditional_prob", "odds_ratio", "pvalue"),
force = FALSE) {
model <- check_model(model, required_class = "measrdcm", name = "object")
total_draws <- posterior::ndraws(posterior::as_draws(model))
Expand Down Expand Up @@ -195,7 +196,7 @@ fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
post_data = post_data,
probs = probs,
return_draws = return_draws,
type = model_fit)
type = check_ppmc$args$model_fit)
} else {
NULL
}
Expand All @@ -213,13 +214,26 @@ fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
pi_draws = pi_draws,
probs = probs,
return_draws = return_draws,
type = item_fit)
type = check_ppmc$args$item_fit)
} else {
NULL
}

ret_list <- list(model_fit = model_level_fit,
item_fit = item_level_fit)
ret_list <- if (is.null(model$fit$ppmc)) {
list(model_fit = model_level_fit,
item_fit = item_level_fit)
} else {
model$fit$ppmc$model_fit[names(model_level_fit)] <- NULL
model$fit$ppmc$item_fit[names(item_level_fit)] <- NULL

utils::modifyList(model$fit$ppmc,
list(model_fit = model_level_fit,
item_fit = item_level_fit))
}

ret_list <- list(model_fit = ret_list$model_fit[model_fit],
item_fit = ret_list$item_fit[item_fit])

ret_list[sapply(ret_list, is.null)] <- NULL

return(ret_list)
Expand Down Expand Up @@ -312,9 +326,15 @@ ppmc_item_fit <- function(model, post_data, attr, resp_prob, pi_draws, probs,
} else {
NULL
}
pvalue <- if ("pvalue" %in% type) {
ppmc_item_pvalue(model = model, post_data = post_data, probs, return_draws)
} else {
NULL
}

item_res <- list(conditional_prob = cond_prob,
odds_ratio = odds_ratio)
odds_ratio = odds_ratio,
pvalue = pvalue)
item_res[sapply(item_res, is.null)] <- NULL

return(item_res)
Expand Down Expand Up @@ -420,7 +440,8 @@ ppmc_conditional_probs <- function(model, attr, resp_prob, pi_draws, probs,
dplyr::mutate(item_id = as.integer(.data$item)),
by = "item_id", relationship = "many-to-one") %>%
dplyr::select(-"item_id", -"class_id") %>%
dplyr::relocate("item", "class", .before = 1)
dplyr::relocate("item", "class", .before = 1) %>%
dplyr::rename(!!model$data$item_id := "item")

return(cond_pval_res)
}
Expand Down Expand Up @@ -513,3 +534,51 @@ pw_or <- function(dat) {
dplyr::mutate(or = or)
return(pwor)
}

ppmc_item_pvalue <- function(model, post_data, probs, return_draws) {
obs_pvalue <- model$data$data %>%
dplyr::mutate(item = as.numeric(.data$item_id)) %>%
dplyr::summarize(obs_pvalue = mean(.data$score),
.by = c("item", "item_id"))

pval_res <- post_data %>%
dplyr::summarize(pvalue = mean(.data$value),
.by = c(".chain", ".iteration", ".draw", "item")) %>%
tidyr::nest(samples = -"item") %>%
dplyr::left_join(obs_pvalue, by = "item", relationship = "one-to-one") %>%
dplyr::mutate(ppmc_mean = vapply(.data$samples,
function(.x) mean(.x$pvalue),
double(1)),
bounds = lapply(.data$samples,
function(.x, probs) {
tibble::as_tibble_row(
stats::quantile(.x$pvalue,
probs = probs,
na.rm = TRUE)
)
},
probs = probs),
ppp = mapply(function(exp, obs) {
mean(exp$pvalue > obs)
},
.data$samples, .data$obs_pvalue)) %>%
tidyr::unnest("bounds")

if (return_draws > 0) {
pval_res <- pval_res %>%
dplyr::relocate("samples", .before = "ppp") %>%
dplyr::mutate(
samples = lapply(.data$samples,
function(x) {
x %>%
dplyr::slice_sample(prop = return_draws) %>%
dplyr::pull("pvalue")
}))
} else {
pval_res <- dplyr::select(pval_res, -"samples")
}

pval_res %>%
dplyr::select(-"item") %>%
dplyr::rename(!!model$data$item_id := "item_id")
}
25 changes: 25 additions & 0 deletions R/utils-extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,31 @@ dcm_extract_ppmc_cond_prob <- function(model, ppmc_interval = 0.95) {
return(res)
}

dcm_extract_ppmc_pvalue <- function(model, ppmc_interval = 0.95) {
if (!is.null(ppmc_interval)) {
ppmc_interval <- check_double(ppmc_interval, lb = 0, ub = 1,
name = "ppmc_interval")
}

if (is.null(model$fit$ppmc$item_fit$pvalue)) {
rlang::abort(message = glue::glue("Model fit information must be ",
"added to a model object before ",
"p-values can be ",
"extracted. See `?add_fit()`."))
}

res <- if (is.null(ppmc_interval)) {
model$fit$ppmc$item_fit$pvalue
} else {
model$fit$ppmc$item_fit$pvalue %>%
dplyr::filter(!dplyr::between(.data$ppp,
(1 - ppmc_interval) / 2,
1 - ((1 - ppmc_interval) / 2)))
}

return(res)
}

dcm_extract_patt_reli <- function(model) {
if (identical(model$reliability, list())) {
rlang::abort(message = glue::glue("Reliability information must be ",
Expand Down
9 changes: 5 additions & 4 deletions man/fit_ppmc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/measr_extract.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 48 additions & 7 deletions tests/testthat/test-mcmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ if (!identical(Sys.getenv("NOT_CRAN"), "true")) {
attribute_structure = "independent",
method = "mcmc", seed = 63277, backend = "rstan",
iter = 1500, warmup = 1000, chains = 2,
cores = 2,
cores = 2, refresh = 0,
prior = c(prior(beta(5, 17), class = "slip"),
prior(beta(5, 17), class = "guess")))
)
Expand Down Expand Up @@ -202,9 +202,9 @@ test_that("ppmc works", {

test_ppmc <- fit_ppmc(cmds_mdm_lcdm, ndraws = 200, return_draws = 0.9,
probs = c(0.055, 0.945),
model_fit = NULL, item_fit = "odds_ratio")
model_fit = NULL, item_fit = c("odds_ratio", "pvalue"))
expect_equal(names(test_ppmc), c("item_fit"))
expect_equal(names(test_ppmc$item_fit), "odds_ratio")
expect_equal(names(test_ppmc$item_fit), c("odds_ratio", "pvalue"))
expect_s3_class(test_ppmc$item_fit$odds_ratio, "tbl_df")
expect_equal(nrow(test_ppmc$item_fit$odds_ratio), 6L)
expect_equal(colnames(test_ppmc$item_fit$odds_ratio),
Expand All @@ -218,20 +218,35 @@ test_that("ppmc works", {
length, integer(1)),
rep(180, 6))

expect_s3_class(test_ppmc$item_fit$pvalue, "tbl_df")
expect_equal(nrow(test_ppmc$item_fit$pvalue), 4)
expect_equal(colnames(test_ppmc$item_fit$pvalue),
c("item", "obs_pvalue", "ppmc_mean", "5.5%", "94.5%",
"samples", "ppp"))
expect_equal(as.character(test_ppmc$item_fit$pvalue$item),
paste0("mdm", 1:4))
expect_equal(vapply(test_ppmc$item_fit$pvalue$samples,
length, double(1)),
rep(180, 4))

test_ppmc <- fit_ppmc(cmds_mdm_lcdm, ndraws = 1, return_draws = 0,
model_fit = "raw_score",
item_fit = c("conditional_prob", "odds_ratio"))
item_fit = c("conditional_prob", "odds_ratio",
"pvalue"))
expect_equal(names(test_ppmc), c("model_fit", "item_fit"))
expect_equal(names(test_ppmc$model_fit), "raw_score")
expect_equal(colnames(test_ppmc$model_fit$raw_score),
c("obs_chisq", "ppmc_mean", "2.5%", "97.5%", "ppp"))
expect_equal(names(test_ppmc$item_fit), c("conditional_prob", "odds_ratio"))
expect_equal(names(test_ppmc$item_fit),
c("conditional_prob", "odds_ratio", "pvalue"))
expect_equal(colnames(test_ppmc$item_fit$conditional_prob),
c("item", "class", "obs_cond_pval", "ppmc_mean", "2.5%", "97.5%",
"ppp"))
expect_equal(colnames(test_ppmc$item_fit$odds_ratio),
c("item_1", "item_2", "obs_or", "ppmc_mean", "2.5%", "97.5%",
"ppp"))
expect_equal(colnames(test_ppmc$item_fit$pvalue),
c("item", "obs_pvalue", "ppmc_mean", "2.5%", "97.5%", "ppp"))
})

test_that("ppmc extraction errors", {
Expand Down Expand Up @@ -302,23 +317,39 @@ test_that("model fit can be added", {
c("item", "class", "obs_cond_pval", "ppmc_mean", "5.5%", "94.5%",
"ppp"))

# now calculate conditional probs and overall pvalue - overall is new, but
# conditional prob should use stored value
test_ppmc <- fit_ppmc(test_model, model_fit = NULL,
item_fit = c("conditional_prob", "pvalue"))
expect_equal(names(test_ppmc), "item_fit")
expect_equal(names(test_ppmc$item_fit), c("conditional_prob", "pvalue"))
expect_identical(test_model$fit$ppmc$item_fit$conditional_prob,
test_ppmc$item_fit$conditional_prob)
expect_equal(names(test_ppmc$item_fit$pvalue),
c("item", "obs_pvalue", "ppmc_mean", "2.5%", "97.5%", "ppp"))

# overwrite just conditional prob with samples and new probs
# add overall p-values
test_model <- add_fit(test_model, method = "ppmc", overwrite = TRUE,
model_fit = NULL, item_fit = "conditional_prob",
model_fit = NULL,
item_fit = c("conditional_prob", "pvalue"),
return_draws = 0.2, probs = c(.1, .9))
expect_equal(names(test_model$fit), c("m2", "ppmc"))
expect_equal(names(test_model$fit$ppmc), c("item_fit", "model_fit"))
expect_equal(names(test_model$fit$ppmc$model_fit), "raw_score")
expect_equal(names(test_model$fit$ppmc$model_fit$raw_score),
c("obs_chisq", "ppmc_mean", "5.5%", "94.5%", "ppp"))
expect_equal(names(test_model$fit$ppmc$item_fit),
c("odds_ratio", "conditional_prob"))
c("odds_ratio", "conditional_prob", "pvalue"))
expect_equal(names(test_model$fit$ppmc$item_fit$odds_ratio),
c("item_1", "item_2", "obs_or", "ppmc_mean", "2.5%", "97.5%",
"samples", "ppp"))
expect_equal(names(test_model$fit$ppmc$item_fit$conditional_prob),
c("item", "class", "obs_cond_pval", "ppmc_mean", "10%", "90%",
"samples", "ppp"))
expect_equal(names(test_model$fit$ppmc$item_fit$pvalue),
c("item", "obs_pvalue", "ppmc_mean", "10%", "90%",
"samples", "ppp"))

# test extraction
rs_check <- measr_extract(test_model, "ppmc_raw_score")
Expand All @@ -343,6 +374,16 @@ test_that("model fit can be added", {
expect_equal(measr_extract(test_model, "ppmc_odds_ratio_flags",
ppmc_interval = 0.8),
dplyr::filter(or_check, ppp <= 0.1 | ppp >= 0.9))

pval_check <- measr_extract(test_model, "ppmc_pvalue")
expect_equal(pval_check,
test_model$fit$ppmc$item_fit$pvalue)
expect_equal(measr_extract(test_model, "ppmc_pvalue_flags",
ppmc_interval = 0.95),
dplyr::filter(pval_check, ppp <= 0.025 | ppp >= 0.975))
expect_equal(measr_extract(test_model, "ppmc_pvalue_flags",
ppmc_interval = 0.6),
dplyr::filter(pval_check, ppp <= 0.2 | ppp >= 0.8))
})

test_that("respondent probabilities are correct", {
Expand Down
Loading