Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed score test function to provide more accurate values #269

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 41 additions & 23 deletions R/scoreTestGEE.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' @author Jack R. Leary
#' @description Performs a basic Lagrange multiplier test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes.
#' @importFrom stats model.matrix predict pchisq
#' @importFrom Matrix bdiag
#' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.
#' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.
#' @param alt.df The dataframe used to fit the alternative model. Defaults to NULL.
Expand All @@ -13,10 +14,11 @@
#' @return A list containing the Score test statistic, a \emph{p}-value, and the degrees of freedom used in the test.
#' @details
#' \itemize{
#' \item Calculating the test statistic involves taking the inverse of the variance-covariance matrix of the coefficients. Ideally this would be done using the "true" inverse with something like \code{\link{solve}}, \code{\link{qr.solve}}, or \code{\link{chol2inv}}, but in practice this can cause issues when the variance-covariance matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse as implemented in \code{\link[MASS]{ginv}} instead, which leads to more stable results.
#' \item Calculating the test statistic involves taking the inverse of the variance of the score vector. Ideally this would be done using the true inverse, but in practice this can cause issues when the matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse if the original matrix inversion fails.
#' \item The \emph{p}-value is calculated using an asymptotic \eqn{\Chi^2} distribution, with the degrees of freedom equal to the number of non-intercept coefficients in the alternative model.
#' }
#' @seealso \code{\link[geeM]{geem}}
#' @seealso \code{\link[glmtoolbox]{anova2}}
#' @seealso \code{\link{waldTestGEE}}
#' @seealso \code{\link{modelLRT}}

Expand Down Expand Up @@ -46,19 +48,19 @@ scoreTestGEE <- function(mod.1 = NULL,
Notes = NA_character_)
} else {
rho <- summary(mod.0)$alpha
sigma2 <- summary(mod.0)$phi
phi <- summary(mod.0)$phi
theta <- as.numeric(gsub("\\)", "", gsub(".*\\(", "", mod.1$FunList$family)))
X_null <- stats::model.matrix(mod.0, data = null.df)
X_alt <- stats::model.matrix(mod.1, data = alt.df)
residuals_null <- null.df$Y - exp(stats::predict(mod.0))
r_null <- null.df$Y - stats::predict(mod.0)
p_alt <- ncol(X_alt)
U <- rep(0, p_alt)
V_U <- matrix(0, nrow = p_alt, ncol = p_alt)
groups <- unique(id.vec)
i <- 1
W <- K <- vector("list", length = length(groups))
for (i in seq(groups)) {
group_indices <- which(id.vec == groups[i])
X_alt_i <- X_alt[group_indices, , drop = FALSE]
residuals_i <- residuals_null[group_indices]
n_i <- length(group_indices)
group_idx <- which(id.vec == groups[i])
n_i <- length(group_idx)
# create working correlation matrix R_i
if (cor.structure == "independence") {
R_i <- diag(1, nrow = n_i, ncol = n_i)
} else if (cor.structure == "exchangeable") {
Expand All @@ -67,26 +69,42 @@ scoreTestGEE <- function(mod.1 = NULL,
} else if (cor.structure == "ar1") {
R_i <- matrix(rho^abs(outer(seq(n_i), seq(n_i), "-")), nrow = n_i, ncol = n_i)
}
V_i <- sigma2 * R_i
R_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
if (inherits(R_i_inv, "try-error")) {
R_i_inv <- eigenMapPseudoInverse(V_i)
# create working covariance matrix V_i
mu_i <- stats::predict(mod.0)[group_idx]
V_mu_i <- mu_i * (1 + mu_i / theta)
A_i <- diag(V_mu_i)
A_i_sqrt <- sqrt(A_i) # same as taking the 1/2 power of A_i since A_i is diagonal
V_i <- A_i_sqrt %*% R_i %*% A_i_sqrt
# create matrices W_i and K_i
K_i <- diag(mu_i)
V_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
if (inherits(V_i_inv, "try-error")) {
V_i_inv <- eigenMapPseudoInverse(V_i)
}
X_alt_i_t <- t(X_alt_i)
U_i <- X_alt_i_t %*% (R_i_inv %*% residuals_i)
V_U_i <- X_alt_i_t %*% (R_i_inv %*% X_alt_i)
U <- U + U_i
V_U <- V_U + V_U_i
W_i <- K_i %*% V_i_inv %*% K_i
W[[i]] <- W_i
K[[i]] <- K_i
}
W <- as.matrix(Matrix::bdiag(W))
K <- as.matrix(Matrix::bdiag(K))
K_inv <- try({ eigenMapMatrixInvert(K) }, silent = TRUE)
if (inherits(K_inv, "try-error")) {
K_inv <- eigenMapPseudoInverse(K)
}
# generate score vector
U <- phi^(-1) * t(X_alt) %*% W %*% K_inv %*% r_null
# generate variance of score vector and invert it
V_U <- phi^(-2) * t(X_alt) %*% W %*% X_alt
V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE)
if (inherits(V_U_inv, "try-error")) {
V_U_inv <- eigenMapPseudoInverse(V_U)
}
test_stat <- as.numeric(t(U) %*% V_U_inv %*% U)
df1 <- ncol(X_alt) - ncol(X_null)
p_value <- 1 - stats::pchisq(test_stat, df = df1)
res <- list(Score_Stat = test_stat,
DF = df1,
# estimate test statistic and accompanying p-value
S <- t(U) %*% V_U_inv %*% U
S_df <- ncol(X_alt) - ncol(X_null)
p_value <- 1 - stats::pchisq(S, df = S_df)
res <- list(Score_Stat = S,
DF = S_df,
P_Val = p_value,
Notes = NA_character_)
}
Expand Down
4 changes: 3 additions & 1 deletion man/scoreTestGEE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading