From cff92ecfbde2b709603fd60cbbf5f140d742719a Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Thu, 10 Oct 2024 14:11:41 -0400 Subject: [PATCH 1/3] minor speedups for GEE and GLM modes --- NAMESPACE | 1 + R/backward_sel_WIC.R | 8 +++----- R/marge2.R | 2 +- R/score_fun_gee.R | 20 ++++++++++---------- R/score_fun_glm.R | 16 ++++++++-------- R/stat_out_score_gee_null.R | 32 ++++++++++++++++++-------------- 6 files changed, 41 insertions(+), 38 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 866bca0..3f4de44 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ importFrom(Matrix,rowMeans) importFrom(Matrix,solve) importFrom(Matrix,t) importFrom(Rcpp,sourceCpp) +importFrom(RcppEigen,fastLmPure) importFrom(bigstatsr,as_FBM) importFrom(broom.mixed,tidy) importFrom(doSNOW,registerDoSNOW) diff --git a/R/backward_sel_WIC.R b/R/backward_sel_WIC.R index e0ce0f2..d6bbb2a 100644 --- a/R/backward_sel_WIC.R +++ b/R/backward_sel_WIC.R @@ -7,7 +7,7 @@ #' @importFrom gamlss gamlss #' @importFrom geeM geem #' @importFrom MASS negative.binomial -#' @importFrom withr with_output_sink +#' @importFrom stats vcov #' @param Y The response variable. Defaults to NULL. #' @param B_new The model matrix. Defaults to NULL. #' @param is.gee Is the model a GEE? Defaults to FALSE. @@ -43,10 +43,8 @@ backward_sel_WIC <- function(Y = NULL, fit <- gamlss::gamlss(Y ~ B_new - 1, family = "NBI", trace = FALSE) - withr::with_output_sink(tempfile(), { - fit_sum_mat <- as.matrix(summary(fit)) - }) - wald_stat <- fit_sum_mat[, 3][-c(1, nrow(fit_sum_mat))]^2 + vcov_mat <- stats::vcov(fit, type = "all") + wald_stat <- unname((vcov_mat$coef / vcov_mat$se)[-c(1, length(vcov_mat$coef))]^2) } return(wald_stat) } diff --git a/R/marge2.R b/R/marge2.R index 0b5b36a..4efa37f 100644 --- a/R/marge2.R +++ b/R/marge2.R @@ -610,7 +610,7 @@ marge2 <- function(X_pred = NULL, if (trunc.type == 2) { B2a <- matrix(rep(B2[, best.var], 2), ncol = 2) - B_temp <- cbind(B, B2a*cbind(b1_new, b2_new)) + B_temp <- cbind(B, B2a * cbind(b1_new, b2_new)) B_new <- B2a * cbind(b1_new, b2_new) var_name3 <- var_name2[best.var] colnames(B_new) <- rep(var_name3, 2) diff --git a/R/score_fun_gee.R b/R/score_fun_gee.R index a6e1712..2d4a49c 100644 --- a/R/score_fun_gee.R +++ b/R/score_fun_gee.R @@ -4,8 +4,8 @@ #' @author Jakub Stoklosa #' @author David I. Warton #' @author Jack Leary -#' @importFrom stats lm.fit -#' @importFrom Matrix diag t solve +#' @importFrom RcppEigen fastLmPure +#' @importFrom Matrix solve #' @importFrom MASS ginv #' @description Calculate the score statistic for a GEE model. #' @param Y The response variable. Defaults to NULL. @@ -42,7 +42,7 @@ score_fun_gee <- function(Y = NULL, # check inputs if (is.null(Y) || is.null(N) || is.null(n_vec) || is.null(VS.est_list) || is.null(AWA.est_list) || is.null(J2_list) || is.null(Sigma2_list) || is.null(J11.inv) || is.null(JSigma11) || is.null(mu.est) || is.null(V.est) || is.null(B1) || is.null(XA)) { stop("Some inputs to score_fun_gee() are missing.") } # generate score statistic - reg <- try({ stats::lm.fit(B1, Y) }, silent = TRUE) # This is not the model fit!! It just checks whether any issues occur for a simple linear regression model. + reg <- try({ RcppEigen::fastLmPure(B1, Y) }, silent = TRUE) # This is not the model fit!! It just checks whether any issues occur for a simple linear regression model. if (inherits(reg, "try-error") || any(is.na(reg$coefficients))) { score <- NA_real_ } else { @@ -58,15 +58,15 @@ score_fun_gee <- function(Y = NULL, AWA.est_i <- AWA.est_list[[i]] J2_i <- J2_list[[i]] Sigma2_i <- Sigma2_list[[i]] - D.est_i <- eigenMapMatMult(A = Matrix::diag((mu.est[(sum(n_vec1[seq(i)]) + 1):k]), nrow = n_vec[i], ncol = n_vec[i]), + D.est_i <- eigenMapMatMult(A = diag(mu.est[(sum(n_vec1[seq(i)]) + 1):k], nrow = n_vec[i], ncol = n_vec[i]), B = XA[(sum(n_vec1[seq(i)]) + 1):k, ], n_cores = 1) - D_est_i_transpose <- Matrix::t(D.est_i) + D_est_i_transpose <- t(D.est_i) J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose, - B = Matrix::t(J2_i), + B = t(J2_i), n_cores = 1) Sigma21 <- Sigma21 + eigenMapMatMult(A = D_est_i_transpose, - B = Matrix::t(Sigma2_i), + B = t(Sigma2_i), n_cores = 1) B.est <- B.est + eigenMapMatMult(A = D_est_i_transpose, B = VS.est_i, @@ -82,12 +82,12 @@ score_fun_gee <- function(Y = NULL, B = J11.inv, n_cores = 1) temp_prod_1 <- eigenMapMatMult(A = temp_prod_1, - B = Matrix::t(Sigma21), + B = t(Sigma21), n_cores = 1) temp_prod_2 <- eigenMapMatMult(A = Sigma21, B = J11.inv, n_cores = 1) - J21_transpose <- Matrix::t(J21) + J21_transpose <- t(J21) temp_prod_2 <- eigenMapMatMult(A = temp_prod_2, B = J21_transpose, n_cores = 1) @@ -102,7 +102,7 @@ score_fun_gee <- function(Y = NULL, if (inherits(Sigma_inv, "try-error")) { Sigma_inv <- MASS::ginv(Sigma) } - temp_prod <- eigenMapMatMult(A = Matrix::t(B.est), + temp_prod <- eigenMapMatMult(A = t(B.est), B = Sigma_inv, n_cores = 1) score <- eigenMapMatMult(A = temp_prod, diff --git a/R/score_fun_glm.R b/R/score_fun_glm.R index 6521b2b..f8b98f7 100644 --- a/R/score_fun_glm.R +++ b/R/score_fun_glm.R @@ -4,8 +4,8 @@ #' @author Jakub Stoklosa #' @author David I. Warton #' @author Jack Leary -#' @importFrom stats lm.fit -#' @importFrom Matrix t +#' @importFrom RcppEigen fastLmPure +#' @importFrom Matrix solve #' @importFrom MASS ginv #' @description Calculate the score statistic for a GLM model. #' @param Y The response variable. Defaults to NULL. @@ -31,7 +31,7 @@ score_fun_glm <- function(Y = NULL, # check inputs if (is.null(Y) || is.null(VS.est_list) || is.null(A_list) || is.null(B1_list) || is.null(mu.est) || is.null(V.est) || is.null(B1) || is.null(XA)) { stop("Some inputs to score_fun_glm() are missing.") } # generate score statistic - reg <- try({ stats::lm.fit(B1, Y) }, silent = TRUE) # This is not the model fit!! It just checks whether any issues occur for a simple linear regression model. + reg <- try({ RcppEigen::fastLmPure(B1, Y) }, silent = TRUE) # This is not the model fit!! It just checks whether any issues occur for a simple linear regression model. if (inherits(reg, "try-error") || any(is.na(reg$coefficients))) { score <- NA_real_ } else { @@ -41,20 +41,20 @@ score_fun_glm <- function(Y = NULL, B_list_i <- eigenMapMatMult(A = B1_list_i, B = XA, n_cores = 1) - D_list_i <- eigenMapMatMult(A = Matrix::t(XA), + D_list_i <- eigenMapMatMult(A = t(XA), B = (XA * c(mu.est^2 / V.est)), n_cores = 1) - temp_prod <- eigenMapMatMult(A = Matrix::t(B_list_i), + temp_prod <- eigenMapMatMult(A = t(B_list_i), B = A_list_i, n_cores = 1) temp_prod <- eigenMapMatMult(A = temp_prod, B = B_list_i, n_cores = 1) inv.XVX_22 <- D_list_i - temp_prod - B.est <- eigenMapMatMult(A = Matrix::t(mu.est * VS.est_i), + B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i), B = XA, n_cores = 1) - XVX_22 <- try({ solve(inv.XVX_22) }, silent = TRUE) + XVX_22 <- try({ Matrix::solve(inv.XVX_22) }, silent = TRUE) if (inherits(XVX_22, "try-error")) { XVX_22 <- MASS::ginv(inv.XVX_22) } @@ -62,7 +62,7 @@ score_fun_glm <- function(Y = NULL, B = XVX_22, n_cores = 1) score <- eigenMapMatMult(A = temp_prod, - B = Matrix::t(B.est), + B = t(B.est), n_cores = 1) } res <- list(score = score) diff --git a/R/stat_out_score_gee_null.R b/R/stat_out_score_gee_null.R index 084eb7a..42f4182 100644 --- a/R/stat_out_score_gee_null.R +++ b/R/stat_out_score_gee_null.R @@ -7,7 +7,7 @@ #' @importFrom geeM geem #' @importFrom MASS negative.binomial ginv #' @importFrom stats fitted.values -#' @importFrom Matrix diag t solve +#' @importFrom Matrix solve #' @description A function that calculates parts of the score statistic for GEEs only (it is used for the full path for forward selection). #' @param Y The response variable Defaults to NULL. #' @param B_null The design matrix matrix under the null model Defaults to NULL. @@ -38,7 +38,8 @@ stat_out_score_gee_null <- function(Y = NULL, corstr = cor.structure, family = MASS::negative.binomial(theta.hat), scale.fix = FALSE, - sandwich = FALSE) + sandwich = FALSE, + maxit = 10) alpha_est <- ests$alpha sigma_est <- ests$phi mu_est <- as.matrix(stats::fitted.values(ests)) @@ -51,10 +52,10 @@ stat_out_score_gee_null <- function(Y = NULL, k <- sum(n_vec[seq(i)]) # set up working correlation matrix structure if (cor.structure == "independence") { - R_alpha <- Matrix::diag(1, n_vec[i], n_vec[i]) + R_alpha <- diag(1, n_vec[i], n_vec[i]) } else if (cor.structure == "exchangeable") { R_alpha <- matrix(alpha_est, nrow = n_vec[i], ncol = n_vec[i]) - R_alpha <- R_alpha + Matrix::diag(c(1 - alpha_est), nrow = n_vec[i], ncol = n_vec[i]) + R_alpha <- R_alpha + diag(1 - alpha_est, nrow = n_vec[i], ncol = n_vec[i]) } else if (cor.structure == "ar1") { R_alpha <- alpha_est^outer(seq(n_vec[i]), seq(n_vec[i]), \(x, y) abs(x - y)) } else { @@ -63,19 +64,22 @@ stat_out_score_gee_null <- function(Y = NULL, # compute iteration values for each statistic temp_seq_n <- (sum(n_vec1[seq(i)]) + 1):k mu_est_sub <- mu_est[temp_seq_n] - diag_sqrt_V_est <- Matrix::diag(sqrt(V_est[temp_seq_n]), - nrow = n_vec[i], - ncol = n_vec[i]) + diag_sqrt_V_est <- diag(sqrt(V_est[temp_seq_n]), + nrow = n_vec[i], + ncol = n_vec[i]) temp_prod <- eigenMapMatMult(A = diag_sqrt_V_est, B = R_alpha, n_cores = 1) V_est_i <- eigenMapMatMult(A = temp_prod, B = diag_sqrt_V_est, n_cores = 1) - V_est_i_inv <- eigenMapMatrixInvert(A = V_est_i, n_cores = 1) - S_est_i <- Matrix::t(Y)[temp_seq_n] - mu_est_sub + V_est_i_inv <- try({ Matrix::solve(V_est_i) }, silent = TRUE) + if (inherits(V_est_i_inv, "try-error")) { + V_est_i_inv <- MASS::ginv(V_est_i) + } + S_est_i <- t(Y)[temp_seq_n] - mu_est_sub temp_prod <- eigenMapMatMult(A = S_est_i, - B = Matrix::t(S_est_i), + B = t(S_est_i), n_cores = 1) temp_prod <- eigenMapMatMult(A = V_est_i_inv, B = temp_prod, @@ -83,12 +87,12 @@ stat_out_score_gee_null <- function(Y = NULL, AWA_est_i <- eigenMapMatMult(A = temp_prod, B = V_est_i_inv, n_cores = 1) - D_est_i <- eigenMapMatMult(A = Matrix::diag(mu_est_sub, - nrow = n_vec[i], - ncol = n_vec[i]), + D_est_i <- eigenMapMatMult(A = diag(mu_est_sub, + nrow = n_vec[i], + ncol = n_vec[i]), B = B_null[temp_seq_n, ], n_cores = 1) - D_est_i_transpose <- Matrix::t(D_est_i) + D_est_i_transpose <- t(D_est_i) temp_prod <- eigenMapMatMult(A = D_est_i_transpose, B = V_est_i_inv, n_cores = 1) From bf73e750f744f17db845a186bd7cb010abfe60af Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Thu, 10 Oct 2024 14:43:22 -0400 Subject: [PATCH 2/3] modified test suite -- closes #242 --- tests/testthat/test_scLANE.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R index 553d3f0..5478051 100644 --- a/tests/testthat/test_scLANE.R +++ b/tests/testthat/test_scLANE.R @@ -309,12 +309,12 @@ test_that("testDynamic() output", { expect_length(glm_gene_stats, 20) expect_length(gee_gene_stats, 20) expect_length(glmm_gene_stats, 20) - expect_s3_class(glm_gene_stats$ABCF1$Lineage_A$MARGE_Summary, "data.frame") - expect_s3_class(gee_gene_stats$ABCF1$Lineage_A$MARGE_Summary, "data.frame") - expect_s3_class(glmm_gene_stats$ABCF1$Lineage_A$MARGE_Summary, "data.frame") - expect_gt(nrow(glm_gene_stats$ABCF1$Lineage_A$MARGE_Summary), 0) - expect_gt(nrow(gee_gene_stats$ABCF1$Lineage_A$MARGE_Summary), 0) - expect_gt(nrow(glmm_gene_stats$ABCF1$Lineage_A$MARGE_Summary), 0) + expect_s3_class(glm_gene_stats[[glm_test_results$Gene[1]]]$Lineage_A$MARGE_Summary, "data.frame") + expect_s3_class(gee_gene_stats[[gee_test_results$Gene[1]]]$Lineage_A$MARGE_Summary, "data.frame") + expect_s3_class(glmm_gene_stats[[glmm_test_results$Gene[1]]]$Lineage_A$MARGE_Summary, "data.frame") + expect_gt(nrow(glm_gene_stats[[glm_test_results$Gene[1]]]$Lineage_A$MARGE_Summary), 0) + expect_gt(nrow(gee_gene_stats[[gee_test_results$Gene[1]]]$Lineage_A$MARGE_Summary), 0) + expect_gt(nrow(glmm_gene_stats[[glmm_test_results$Gene[1]]]$Lineage_A$MARGE_Summary), 0) expect_gt(sum(purrr::map_lgl(glm_gene_stats, \(x) x$Lineage_A$Model_Status == "MARGE model OK, null model OK")), 0) expect_gt(sum(purrr::map_lgl(gee_gene_stats, \(x) x$Lineage_A$Model_Status == "MARGE model OK, null model OK")), 0) expect_gt(sum(purrr::map_lgl(glmm_gene_stats, \(x) x$Lineage_A$Model_Status == "MARGE model OK, null model OK")), 0) From bc239c07205e02246258373a6cec6494308ec4d3 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Thu, 10 Oct 2024 15:12:40 -0400 Subject: [PATCH 3/3] added error handling and optional QR decomp for wald stat in backward_sel_WIC() --- R/backward_sel_WIC.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/backward_sel_WIC.R b/R/backward_sel_WIC.R index d6bbb2a..fdbe206 100644 --- a/R/backward_sel_WIC.R +++ b/R/backward_sel_WIC.R @@ -43,8 +43,13 @@ backward_sel_WIC <- function(Y = NULL, fit <- gamlss::gamlss(Y ~ B_new - 1, family = "NBI", trace = FALSE) - vcov_mat <- stats::vcov(fit, type = "all") - wald_stat <- unname((vcov_mat$coef / vcov_mat$se)[-c(1, length(vcov_mat$coef))]^2) + vcov_mat <- try({ stats::vcov(fit, type = "all") }, silent = TRUE) + if (inherits(vcov_mat, "try-error")) { + covmat_unscaled <- chol2inv(fit$mu.qr$qr[1:(fit$mu.df - fit$mu.nl.df), 1:(fit$mu.df - fit$mu.nl.df), drop = FALSE]) + wald_stat <- unname(coef(fit) / sqrt(diag(covmat_unscaled))^2)[-1] + } else { + wald_stat <- unname((vcov_mat$coef / vcov_mat$se)[-c(1, length(vcov_mat$coef))]^2) + } } return(wald_stat) }