Skip to content

Commit 09e6daf

Browse files
authored
Merge pull request #184 from jr-leary7/dev
Dev
2 parents eda0149 + 35e9611 commit 09e6daf

File tree

9 files changed

+150
-10
lines changed

9 files changed

+150
-10
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export(extractBreakpoints)
88
export(fitGLMM)
99
export(geneProgramDrivers)
1010
export(geneProgramScoring)
11+
export(geneProgramSignificance)
1112
export(getFittedValues)
1213
export(getKnotDist)
1314
export(getResultsDE)
@@ -71,6 +72,7 @@ importFrom(furrr,future_map)
7172
importFrom(future,multisession)
7273
importFrom(future,plan)
7374
importFrom(future,sequential)
75+
importFrom(gamlss,LR.test)
7476
importFrom(gamlss,gamlss)
7577
importFrom(gamlss,gamlss.control)
7678
importFrom(gamlss,pb)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
+ Added `geneProgramDrivers()` function to compute & test correlations of expression with gene module scores.
44
+ Updated documentation & unit tests.
5+
+ Added `geneProgramSignificance` function to estimate associations between gene program module scores and pseudotime.
56

67
# Changes in version 0.7.8
78

R/geneProgramDrivers.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#' @importFrom doSNOW registerDoSNOW
1212
#' @importFrom stats cor.test p.adjust
1313
#' @importFrom dplyr arrange desc mutate filter
14-
#' @description This function computes the correlation
14+
#' @description This function computes the correlation between smoothed gene expression and gene program scores in order to identify genes are significantly associated with program scores i.e., the "drivers" of the gene program.
1515
#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of normalized counts with genes as rows & cells as columns. Defaults to NULL.
1616
#' @param genes A character vector of genes to test. Defaults to NULL.
1717
#' @param gene.program A vector of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL.
@@ -39,7 +39,8 @@
3939
#' program_drivers <- geneProgramDrivers(sim_counts,
4040
#' genes = gene_embed$gene,
4141
#' gene.program = sim_counts$cluster_0,
42-
#' fdr.cutoff = 0.05)
42+
#' fdr.cutoff = 0.05,
43+
#' n.cores = 1L)
4344

4445
geneProgramDrivers <- function(expr.mat = NULL,
4546
genes = NULL,

R/geneProgramScoring.R

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
#' @param genes A character vector of gene IDs. Defaults to NULL.
99
#' @param gene.clusters A factor containing the cluster assignment of each gene in \code{genes}. Defaults to NULL.
1010
#' @param program.labels (Optional) A character vector specifying a label for each gene cluster. Defaults to NULL.
11-
#' @param minmax.norm (Optional) Should each program's score be min-max normalized to be on [0, 1]? Defaults to FALSE.
11+
#' @param minmax.norm (Optional) Should each program's score be min-max normalized to be on (0, 1)? Defaults to TRUE.
12+
#' @param minmax.epsilon (Optional) The tolerance used to ensure that program scores equal to 0 or 1 do not occur. Defaults to 0.01.
1213
#' @param n.cores (Optional) The number of cores used under the hood in \code{\link[UCell]{ScoreSignatures_UCell}}. Defaults to 2.
1314
#' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix.
1415
#' @seealso \code{\link[UCell]{ScoreSignatures_UCell}}
@@ -31,7 +32,8 @@ geneProgramScoring <- function(expr.mat = NULL,
3132
genes = NULL,
3233
gene.clusters = NULL,
3334
program.labels = NULL,
34-
minmax.norm = FALSE,
35+
minmax.norm = TRUE,
36+
minmax.epsilon = 1e-2,
3537
n.cores = 2L) {
3638
# check inputs
3739
if (is.null(expr.mat) || is.null(genes) || is.null(gene.clusters)) { stop("Arguments to geneProgramScoring() are missing.") }
@@ -65,11 +67,11 @@ geneProgramScoring <- function(expr.mat = NULL,
6567
program_scores <- UCell::ScoreSignatures_UCell(counts_matrix,
6668
features = program_list,
6769
ncores = n.cores)
68-
# min-max normalize if desired
70+
# min-max normalize with tol to (0, 1) if desired
6971
if (minmax.norm) {
7072
program_scores <- purrr::map(seq(ncol(program_scores)), \(i) {
7173
scores <- program_scores[, i]
72-
normed_scores <- (scores - min(scores)) / (max(scores) - min(scores))
74+
normed_scores <- minmax.epsilon + (((scores - min(scores)) * (1 - 2 * minmax.epsilon)) / (max(scores) - min(scores)))
7375
return(normed_scores)
7476
})
7577
program_scores <- purrr::reduce(program_scores, cbind)

R/geneProgramSignificance.R

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#' Test significance of gene program enrichment across a trajectory.
2+
#'
3+
#' @name geneProgramSignificance
4+
#' @author Jack Leary
5+
#' @import magrittr
6+
#' @importFrom purrr map reduce
7+
#' @importFrom gamlss gamlss LR.test
8+
#' @importFrom dplyr arrange desc mutate
9+
#' @importFrom stats p.adjust
10+
#' @description This function fits a Beta GAM with pseudotime as a covariate and gene program score as the response. The fitted model is then compared to a null, intercept-only model in order to determine the significance of the relationship between gene program and pseudotime.
11+
#' @param gene.programs A list of vectors of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL.
12+
#' @param pt A vector of pseudotime values for each cell. May contain NAs, which are handled internally. Defaults to NULL.
13+
#' @param program.labels (Optional) A character vector specifying a label for each gene cluster. Defaults to NULL.
14+
#' @param p.adj.method (Optional) The method used to adjust \emph{p}-values for multiple hypothesis testing. Defaults to "holm".
15+
#' @return A table of statistical output showing the significance of the association between pseudotime and program scores.
16+
#' @seealso \code{\link{geneProgramScoring}}
17+
#' @export
18+
#' @examples
19+
#' data(sim_counts)
20+
#' data(scLANE_models)
21+
#' data(sim_pseudotime)
22+
#' smoothed_dynamics <- smoothedCountsMatrix(scLANE_models,
23+
#' pt = sim_pseudotime,
24+
#' n.cores = 1L)
25+
#' gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L)
26+
#' sim_counts <- geneProgramScoring(sim_counts,
27+
#' genes = gene_embed$gene,
28+
#' gene.clusters = gene_embed$leiden,
29+
#' n.cores = 1L)
30+
#' program_enrichment_stats <- geneProgramSignificance(list(sim_counts$cluster_0),
31+
#' pt = sim_pseudotime$PT,
32+
#' program.labels = c("Program Name"))
33+
34+
geneProgramSignificance <- function(gene.programs = NULL,
35+
pt = NULL,
36+
program.labels = NULL,
37+
p.adj.method = "holm") {
38+
# check inputs
39+
if (is.null(gene.programs) || is.null(pt)) { stop("Inputs to geneProgramSignificance() are missing.") }
40+
if (is.null(program.labels)) {
41+
program.labels <- paste0("cluster_", seq(length(gene.programs)) - 1)
42+
}
43+
if (length(gene.programs) != length(program.labels)) { stop("program.labels must have the same length as gene.programs.") }
44+
# iterate over gene programs and fit Beta GAMs
45+
program_models <- purrr::map(seq(gene.programs), \(g) {
46+
pseudotime_vec <- pt[!is.na(pt)]
47+
program_score_vec <- gene.programs[[g]][!is.na(pt)]
48+
alt_model <- gamlss::gamlss(program_score_vec ~ pseudotime_vec,
49+
family = "BE",
50+
data = NULL,
51+
control = gamlss::gamlss.control(trace = FALSE))
52+
null_model <- gamlss::gamlss(program_score_vec ~ 1,
53+
family = "BE",
54+
data = NULL,
55+
control = gamlss::gamlss.control(trace = FALSE))
56+
lr_test_res <- gamlss::LR.test(null = null_model,
57+
alternative = alt_model,
58+
print = FALSE)
59+
res <- data.frame(Program = program.labels[g],
60+
LRT_Stat = lr_test_res$chi,
61+
DF = lr_test_res$df,
62+
P_Value = lr_test_res$p.val)
63+
return(res)
64+
})
65+
# prepare test results
66+
program_sig_df <- purrr::reduce(program_models, rbind) %>%
67+
dplyr::arrange(P_Value, dplyr::desc(LRT_Stat)) %>%
68+
dplyr::mutate(P_Value = dplyr::if_else(P_Value == 0, 2e-16, P_Value),
69+
P_Value_Adj = stats::p.adjust(P_Value, method = p.adj.method))
70+
return(program_sig_df)
71+
}

man/geneProgramDrivers.Rd

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/geneProgramScoring.Rd

Lines changed: 5 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/geneProgramSignificance.Rd

Lines changed: 50 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test_scLANE.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,10 @@ withr::with_output_sink(tempfile(), {
227227
sim_data_seu <- geneProgramScoring(sim_data_seu,
228228
genes = gene_embedding$gene,
229229
gene.clusters = gene_embedding$leiden)
230+
# gene program significance
231+
program_significance <- geneProgramSignificance(list(sim_data$cluster_0),
232+
pt = pt_test$PT,
233+
program.labels = c("Cluster0"))
230234
# gene program drivers
231235
program_drivers <- geneProgramDrivers(sim_data,
232236
genes = gene_embedding$gene,
@@ -407,6 +411,11 @@ test_that("geneProgramScoring() output", {
407411
expect_equal(colnames(sim_data_seu@meta.data)[11], "cluster_1")
408412
})
409413

414+
test_that("geneProgramSignificance() output", {
415+
expect_s3_class(program_significance, "data.frame")
416+
expect_equal(ncol(program_significance), 5)
417+
})
418+
410419
test_that("geneProgramDrivers() output", {
411420
expect_s3_class(program_drivers, "data.frame")
412421
expect_s3_class(program_drivers_seu, "data.frame")

0 commit comments

Comments
 (0)