diff --git a/.Rbuildignore b/.Rbuildignore index 9d81df18..928274cc 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,8 +1,24 @@ +^LICENSE$ +^\.lintr$ +^\.vscode ^.*\.Rproj$ ^\.Rproj\.user$ ^\.travis\.yml$ +^travis_setup\.sh$ ^_config\.yml$ -^vignettes/walkthrough_pbmc\.Rmd$ -^docs$ +^appveyor\.yml$ +^vignettes/.*html$ +^vignettes/articles +^vignettes/Integrating_multi_scRNA_data\.rmd$ ^vignettes/Integrating_scRNA_and_scATAC_data\.Rmd$ -^vignettes/Integrating_multi_scRNA_data\.Rmd$ \ No newline at end of file +^vignettes/Parameter_selection\.Rmd$ +^vignettes/SNAREseq_walkthrough\.Rmd$ +^vignettes/STARmap_dropviz_vig\.Rmd$ +^vignettes/UINMF_vignette\.Rmd$ +^vignettes/online_iNMF_tutorial\.Rmd$ +^vignettes/pbmc_alignment\.zip$ +^vignettes/walkthrough_pbmc\.Rmd$ +^vignettes/walkthrough_pbmc\.pdf$ +^vignettes/cross_species_vig\.Rmd$ +^docs +^devdata diff --git a/DESCRIPTION b/DESCRIPTION index 488547e3..7dc831eb 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,33 +1,35 @@ Package: rliger -Version: 1.0.0 -Date: 2021-03-09 +Version: 1.0.1 +Date: 2023-11-08 Type: Package Title: Linked Inference of Genomic Experimental Relationships Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) , and Liu J, Gao C, Sodicoff J, et al (2020) for more details. Authors@R: c( person(given = 'Joshua', family = 'Welch', email = 'welchjd@umich.edu', role = c('aut', 'ctb')), - person(given = 'Chao', family = 'Gao', email = 'gchao@umich.edu', role = c('aut', 'ctb', 'cre')), + person(given = 'Chao', family = 'Gao', email = 'gchao@umich.edu', role = c('aut', 'ctb')), person(given = 'Jialin', family = 'Liu', email = 'alanliu@umich.edu', role = c('aut', 'ctb')), person(given = 'Joshua', family = 'Sodicoff', email = 'sodicoff@umich.edu', role = c('aut', 'ctb')), person(given = 'Velina', family = 'Kozareva', role = c('aut', 'ctb')), person(given = 'Evan', family = 'Macosko', role = c('aut', 'ctb')), + person(given = 'Yichen', family = 'Wang', email = 'wayichen@umich.edu', role = c('aut', 'ctb', 'cre')), person(given = 'Paul', family = 'Hoffman', role = 'ctb'), person(given = 'Ilya', family = 'Korsunsky', role = 'ctb'), person(given = 'Robert', family = 'Lee', role = 'ctb') ) Author: Joshua Welch [aut, ctb], - Chao Gao [aut, ctb, cre], + Chao Gao [aut, ctb], Jialin Liu [aut, ctb], Joshua Sodicoff [aut, ctb], Velina Kozareva [aut, ctb], Evan Macosko [aut, ctb], + Yichen Wang [aut, ctb, cre], Paul Hoffman [ctb], Ilya Korsunsky [ctb], Robert Lee [ctb] -Maintainer: Chao Gao +Maintainer: Yichen Wang BugReports: https://github.com/welch-lab/liger/issues URL: https://github.com/welch-lab/liger -License: GPL-3 | file LICENSE +License: GPL-3 Imports: Rcpp (>= 0.12.10), plyr, FNN, @@ -38,31 +40,32 @@ Imports: Rcpp (>= 0.12.10), ica, Rtsne, ggplot2, - riverplot, foreach, parallel, doParallel, mclust, - stats, psych, RANN, uwot, rlang, - utils, hdf5r, - scattermore (>= 0.7) + scattermore (>= 0.7), + patchwork, + cowplot biocViews: LazyData: true LinkingTo: Rcpp, RcppArmadillo, RcppEigen, RcppProgress Depends: - cowplot, + R (>= 3.4), Matrix, methods, - patchwork -RoxygenNote: 7.1.1 + stats, + utils +RoxygenNote: 7.2.3 Encoding: UTF-8 Suggests: Seurat, + SeuratObject, knitr, reticulate, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index 395b0972..f7dd6838 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -144,8 +144,6 @@ importFrom(methods,slot) importFrom(methods,slotNames) importFrom(plyr,mapvalues) importFrom(plyr,rbind.fill.matrix) -importFrom(riverplot,makeRiver) -importFrom(riverplot,riverplot) importFrom(rlang,.data) importFrom(scattermore,geom_scattermore) importFrom(stats,approxfun) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..4a9d67ee --- /dev/null +++ b/NEWS.md @@ -0,0 +1,9 @@ +## rliger 1.0.1 + +- Allow setting mito pattern in `getMitoProportion()` #271 +- Fix efficiency issue when taking the log of norm.data (e.g. `runWilcoxon`) +- Add runable examples to all exported functions when possible +- Fix typo in online_iNMF matrix initialization +- Adapt to Seurat5 +- Other minor fixes + diff --git a/R/RcppExports.R b/R/RcppExports.R index b08396de..a3d24b85 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,113 +1,113 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -RunModularityClusteringCpp <- function(SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) { - .Call('_rliger_RunModularityClusteringCpp', PACKAGE = 'rliger', SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) -} - -scaleNotCenterFast <- function(x) { - .Call('_rliger_scaleNotCenterFast', PACKAGE = 'rliger', x) -} - -rowMeansFast <- function(x) { - .Call('_rliger_rowMeansFast', PACKAGE = 'rliger', x) -} - -rowVarsFast <- function(x, means) { - .Call('_rliger_rowVarsFast', PACKAGE = 'rliger', x, means) -} - -sumSquaredDeviations <- function(x, means) { - .Call('_rliger_sumSquaredDeviations', PACKAGE = 'rliger', x, means) -} - -cpp_sumGroups_dgc <- function(x, p, i, ncol, groups, ngroups) { - .Call('_rliger_cpp_sumGroups_dgc', PACKAGE = 'rliger', x, p, i, ncol, groups, ngroups) -} - -cpp_sumGroups_dgc_T <- function(x, p, i, ncol, nrow, groups, ngroups) { - .Call('_rliger_cpp_sumGroups_dgc_T', PACKAGE = 'rliger', x, p, i, ncol, nrow, groups, ngroups) -} - -cpp_sumGroups_dense <- function(X, groups, ngroups) { - .Call('_rliger_cpp_sumGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) -} - -cpp_sumGroups_dense_T <- function(X, groups, ngroups) { - .Call('_rliger_cpp_sumGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) -} - -cpp_nnzeroGroups_dense <- function(X, groups, ngroups) { - .Call('_rliger_cpp_nnzeroGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) -} - -cpp_nnzeroGroups_dense_T <- function(X, groups, ngroups) { - .Call('_rliger_cpp_nnzeroGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) -} - -cpp_nnzeroGroups_dgc <- function(p, i, ncol, groups, ngroups) { - .Call('_rliger_cpp_nnzeroGroups_dgc', PACKAGE = 'rliger', p, i, ncol, groups, ngroups) -} - -cpp_in_place_rank_mean <- function(v_temp, idx_begin, idx_end) { - .Call('_rliger_cpp_in_place_rank_mean', PACKAGE = 'rliger', v_temp, idx_begin, idx_end) -} - -cpp_rank_matrix_dgc <- function(x, p, nrow, ncol) { - .Call('_rliger_cpp_rank_matrix_dgc', PACKAGE = 'rliger', x, p, nrow, ncol) -} - -cpp_rank_matrix_dense <- function(X) { - .Call('_rliger_cpp_rank_matrix_dense', PACKAGE = 'rliger', X) -} - -cpp_nnzeroGroups_dgc_T <- function(p, i, ncol, nrow, groups, ngroups) { - .Call('_rliger_cpp_nnzeroGroups_dgc_T', PACKAGE = 'rliger', p, i, ncol, nrow, groups, ngroups) -} - -#' Fast calculation of feature count matrix -#' -#' @param bedmat A feature count list generated by bedmap -#' @param barcodes A list of barcodes -#' -#' @return A feature count matrix with features as rows and barcodes as -#' columns -#' @export -#' @examples -#' \dontrun{ -#' gene.counts <- makeFeatureMatrix(genes.bc, barcodes) -#' promoter.counts <- makeFeatureMatrix(promoters.bc, barcodes) -#' samnple <- gene.counts + promoter.counts -#' } -makeFeatureMatrix <- function(bedmat, barcodes) { - .Call('_rliger_makeFeatureMatrix', PACKAGE = 'rliger', bedmat, barcodes) -} - -cluster_vote <- function(nn_ranked, clusts) { - .Call('_rliger_cluster_vote', PACKAGE = 'rliger', nn_ranked, clusts) -} - -scale_columns_fast <- function(mat, scale = TRUE, center = TRUE) { - .Call('_rliger_scale_columns_fast', PACKAGE = 'rliger', mat, scale, center) -} - -max_factor <- function(H, dims_use, center_cols = FALSE) { - .Call('_rliger_max_factor', PACKAGE = 'rliger', H, dims_use, center_cols) -} - -solveNNLS <- function(C, B) { - .Call('_rliger_solveNNLS', PACKAGE = 'rliger', C, B) -} - -ComputeSNN <- function(nn_ranked, prune) { - .Call('_rliger_ComputeSNN', PACKAGE = 'rliger', nn_ranked, prune) -} - -WriteEdgeFile <- function(snn, filename, display_progress) { - invisible(.Call('_rliger_WriteEdgeFile', PACKAGE = 'rliger', snn, filename, display_progress)) -} - -DirectSNNToFile <- function(nn_ranked, prune, display_progress, filename) { - .Call('_rliger_DirectSNNToFile', PACKAGE = 'rliger', nn_ranked, prune, display_progress, filename) -} - +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +RunModularityClusteringCpp <- function(SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) { + .Call('_rliger_RunModularityClusteringCpp', PACKAGE = 'rliger', SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) +} + +scaleNotCenterFast <- function(x) { + .Call('_rliger_scaleNotCenterFast', PACKAGE = 'rliger', x) +} + +rowMeansFast <- function(x) { + .Call('_rliger_rowMeansFast', PACKAGE = 'rliger', x) +} + +rowVarsFast <- function(x, means) { + .Call('_rliger_rowVarsFast', PACKAGE = 'rliger', x, means) +} + +sumSquaredDeviations <- function(x, means) { + .Call('_rliger_sumSquaredDeviations', PACKAGE = 'rliger', x, means) +} + +cpp_sumGroups_dgc <- function(x, p, i, ncol, groups, ngroups) { + .Call('_rliger_cpp_sumGroups_dgc', PACKAGE = 'rliger', x, p, i, ncol, groups, ngroups) +} + +cpp_sumGroups_dgc_T <- function(x, p, i, ncol, nrow, groups, ngroups) { + .Call('_rliger_cpp_sumGroups_dgc_T', PACKAGE = 'rliger', x, p, i, ncol, nrow, groups, ngroups) +} + +cpp_sumGroups_dense <- function(X, groups, ngroups) { + .Call('_rliger_cpp_sumGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) +} + +cpp_sumGroups_dense_T <- function(X, groups, ngroups) { + .Call('_rliger_cpp_sumGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) +} + +cpp_nnzeroGroups_dense <- function(X, groups, ngroups) { + .Call('_rliger_cpp_nnzeroGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) +} + +cpp_nnzeroGroups_dense_T <- function(X, groups, ngroups) { + .Call('_rliger_cpp_nnzeroGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) +} + +cpp_nnzeroGroups_dgc <- function(p, i, ncol, groups, ngroups) { + .Call('_rliger_cpp_nnzeroGroups_dgc', PACKAGE = 'rliger', p, i, ncol, groups, ngroups) +} + +cpp_in_place_rank_mean <- function(v_temp, idx_begin, idx_end) { + .Call('_rliger_cpp_in_place_rank_mean', PACKAGE = 'rliger', v_temp, idx_begin, idx_end) +} + +cpp_rank_matrix_dgc <- function(x, p, nrow, ncol) { + .Call('_rliger_cpp_rank_matrix_dgc', PACKAGE = 'rliger', x, p, nrow, ncol) +} + +cpp_rank_matrix_dense <- function(X) { + .Call('_rliger_cpp_rank_matrix_dense', PACKAGE = 'rliger', X) +} + +cpp_nnzeroGroups_dgc_T <- function(p, i, ncol, nrow, groups, ngroups) { + .Call('_rliger_cpp_nnzeroGroups_dgc_T', PACKAGE = 'rliger', p, i, ncol, nrow, groups, ngroups) +} + +#' Fast calculation of feature count matrix +#' +#' @param bedmat A feature count list generated by bedmap +#' @param barcodes A list of barcodes +#' +#' @return A feature count matrix with features as rows and barcodes as +#' columns +#' @export +#' @examples +#' \dontrun{ +#' gene.counts <- makeFeatureMatrix(genes.bc, barcodes) +#' promoter.counts <- makeFeatureMatrix(promoters.bc, barcodes) +#' samnple <- gene.counts + promoter.counts +#' } +makeFeatureMatrix <- function(bedmat, barcodes) { + .Call('_rliger_makeFeatureMatrix', PACKAGE = 'rliger', bedmat, barcodes) +} + +cluster_vote <- function(nn_ranked, clusts) { + .Call('_rliger_cluster_vote', PACKAGE = 'rliger', nn_ranked, clusts) +} + +scale_columns_fast <- function(mat, scale = TRUE, center = TRUE) { + .Call('_rliger_scale_columns_fast', PACKAGE = 'rliger', mat, scale, center) +} + +max_factor <- function(H, dims_use, center_cols = FALSE) { + .Call('_rliger_max_factor', PACKAGE = 'rliger', H, dims_use, center_cols) +} + +solveNNLS <- function(C, B) { + .Call('_rliger_solveNNLS', PACKAGE = 'rliger', C, B) +} + +ComputeSNN <- function(nn_ranked, prune) { + .Call('_rliger_ComputeSNN', PACKAGE = 'rliger', nn_ranked, prune) +} + +WriteEdgeFile <- function(snn, filename, display_progress) { + invisible(.Call('_rliger_WriteEdgeFile', PACKAGE = 'rliger', snn, filename, display_progress)) +} + +DirectSNNToFile <- function(nn_ranked, prune, display_progress, filename) { + .Call('_rliger_DirectSNNToFile', PACKAGE = 'rliger', nn_ranked, prune, display_progress, filename) +} + diff --git a/R/data.R b/R/data.R new file mode 100644 index 00000000..1587ec46 --- /dev/null +++ b/R/data.R @@ -0,0 +1,10 @@ +#' dgCMatrix object of PBMC subsample data with Control and Stimulated datasets +#' @format \code{dgCMatrix} object of gene expression matrix from PBMC study, +#' named by "ctrl" and "stim". +#' @source https://www.nature.com/articles/nbt.4042 +#' @references Hyun Min Kang and et. al., Nature Biotechnology, 2018 +#' @rdname liger-demodata +"ctrl" + +#' @rdname liger-demodata +"stim" diff --git a/R/rliger.R b/R/rliger.R index 81570570..22eb0996 100755 --- a/R/rliger.R +++ b/R/rliger.R @@ -1,5 +1,9 @@ -#' @importFrom Matrix colSums rowSums t +#' @import Matrix #' @importFrom grDevices dev.off pdf +#' @import hdf5r +#' @importFrom methods new +#' @importFrom utils packageVersion +#' @importFrom Rcpp evalCpp NULL #' The LIGER Class @@ -15,7 +19,7 @@ NULL #' @slot norm.data List of normalized matrices (genes by cells) #' @slot scale.data List of scaled matrices (cells by genes) #' @slot sample.data List of sampled matrices (gene by cells) -#' #' @slot scale.unshared.data List of scaled matrices of unshared features +#' @slot scale.unshared.data List of scaled matrices of unshared features #' @slot h5file.info List of HDF5-related information for each input dataset. Paths to raw data, indices, #' indptr, barcodes, genes and the pipeline through which the HDF5 file is formated (10X, AnnData, etc), #' type of sampled data (raw, normalized or scaled). @@ -23,7 +27,7 @@ NULL #' cells across all datasets) #' @slot var.genes Subset of informative genes shared across datasets to be used in matrix #' factorization -#' #' @slot var.unshared.features Highly variable unshared features selected from each dataset +#' @slot var.unshared.features Highly variable unshared features selected from each dataset #' @slot H Cell loading factors (one matrix per dataset, dimensions cells by k) #' @slot H.norm Normalized cell loading factors (cells across all datasets combined into single #' matrix) @@ -45,9 +49,7 @@ NULL #' @rdname liger-class #' @aliases liger-class #' @exportClass liger -#' @importFrom Rcpp evalCpp #' @useDynLib rliger - liger <- methods::setClass( "liger", slots = c( @@ -84,7 +86,9 @@ liger <- methods::setClass( #' @aliases show,liger-method #' @docType methods #' @rdname show-methods - +#' @examples +#' ligerex <- createLiger(list(ctrl = ctrl)) +#' show(ligerex) setMethod( f = "show", signature = "liger", @@ -130,9 +134,8 @@ setMethod( #' @param verbose Print messages (TRUE by default) #' #' @return List of merged matrices across data types (returns sparse matrix if only one data type -#' detected), or nested list of matrices organized by sample if merge=F. +#' detected), or nested list of matrices organized by sample if merge = FALSE. #' -#' @import Matrix #' @importFrom utils read.delim read.table #' #' @export @@ -145,12 +148,11 @@ setMethod( #' dges1 <- read10X(list(sample.dir1, sample.dir2), c("sample1", "sample2"), min.umis = 50) #' ligerex <- createLiger(expr = dges1[["Gene Expression"]], custom = dges1[["CUSTOM"]]) #' } - read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, min.umis = 0, use.filtered = FALSE, reference = NULL, data.type = "rna", verbose = TRUE) { datalist <- list() datatypes <- c("Gene Expression") - + if (length(num.cells) == 1) { num.cells <- rep(num.cells, length(sample.dirs)) } @@ -193,20 +195,20 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m } matrix.file <- paste0(sample.dir, "/matrix.mtx", suffix) barcodes.file <- paste0(sample.dir, "/barcodes.tsv", suffix) - + rawdata <- readMM(matrix.file) # convert to dgc matrix if (class(rawdata)[1] == "dgTMatrix") { rawdata <- as(rawdata, "CsparseMatrix") } - + # filter for UMIs first to increase speed umi.pass <- which(colSums(rawdata) > min.umis) if (length(umi.pass) == 0) { message("No cells pass UMI cutoff. Please lower it.") } rawdata <- rawdata[, umi.pass, drop = FALSE] - + barcodes <- readLines(barcodes.file)[umi.pass] # Remove -1 tag from barcodes if (all(grepl(barcodes, pattern = "\\-1$"))) { @@ -224,7 +226,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m } # since some genes are only differentiated by ENSMBL colnames(rawdata) <- barcodes - + # split based on 10X datatype -- V3 has Gene Expression, Antibody Capture, CRISPR, CUSTOM # V2 has only Gene Expression by default and just two columns if (is.null(ncol(features))) { @@ -243,7 +245,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m }) names(samplelist) <- sam.datatypes.unique } - + # num.cells filter only for gene expression data if (!is.null(num.cells)) { if (names(samplelist) == "Gene Expression" | names(samplelist) == "Chromatin Accessibility") { @@ -260,7 +262,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m samplelist[[data_label]] <- samplelist[[data_label]][, order(cs, decreasing = TRUE) [1:num.cells[i]]] } - + # cs <- colSums(samplelist[["Gene Expression"]]) # limit <- ncol(samplelist[["Gene Expression"]]) # if (num.cells[i] > limit) { @@ -273,12 +275,12 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m # samplelist[["Gene Expression"]] <- samplelist[["Gene Expression"]][, order(cs, decreasing = TRUE) # [1:num.cells[i]]] } - + datalist[[i]] <- samplelist } if (merge) { if (verbose) { - message("Merging samples") + message("Merging samples") } return_dges <- lapply(datatypes, function(x) { mergelist <- lapply(datalist, function(d) { @@ -289,7 +291,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m MergeSparseDataAll(mergelist, sample.names) }) names(return_dges) <- datatypes - + # if only one type of data present if (length(return_dges) == 1) { if (verbose){ @@ -318,11 +320,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m #' @param indptr.name Path to the pointers stored in HDF5 file. #' @param genes.name Path to the gene names stored in HDF5 file. #' @param barcodes.name Path to the barcodes stored in HDF5 file. -#' #' @return Directly generates newly merged hdf5 file. -#' -#' @import hdf5r -#' #' @export #' @examples #' \dontrun{ @@ -332,7 +330,6 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m #' # name for output HDF5 file: "merged.h5" #' mergeH5(list("library1.h5","library2.h5"), c("lib1","lib2"), "merged.h5") #' } - mergeH5 <- function(file.list, library.names, new.filename, @@ -363,7 +360,7 @@ mergeH5 <- function(file.list, indptr = h5file[["raw.X/indptr"]][] barcodes = paste0(library.names[i], "_", h5file[["obs"]][]$cell) genes = h5file[["raw.var"]][]$index - + } else { data = h5file[[data.name]][] indices = h5file[[indices.name]][] @@ -371,7 +368,7 @@ mergeH5 <- function(file.list, barcodes = paste0(library.names[i], "_", h5file[[barcodes.name]][]) genes = h5file[[genes.name]][] } - + if (i != 1) indptr = indptr[2:length(indptr)] num_data = length(data) num_indptr = length(indptr) @@ -406,11 +403,7 @@ mergeH5 <- function(file.list, #' #' @param object \code{liger} object. #' @param file.path List of paths to hdf5 files. -#' #' @return \code{liger} object with restored links. -#' -#' @import hdf5r -#' #' @export #' @examples #' \dontrun{ @@ -423,7 +416,7 @@ restoreOnlineLiger <- function(object, file.path = NULL) { if (is.null(file.path) & is.null(object@h5file.info[[1]][["file.path"]])) { # file path is not provided by file.path param or liger object stop('File path information is not stored in the liger object. Please provide a list of file paths through file.path parameter.') } - + if (!is.null(file.path)) { # if new file path is provided, update liger object h5file.info for (i in 1:length(object@h5file.info)) { object@h5file.info[[i]][["file.path"]] = file.path[[i]] @@ -433,7 +426,7 @@ restoreOnlineLiger <- function(object, file.path = NULL) { object@raw.data = lapply(object@h5file.info, function(x) hdf5r::H5File$new(x[["file.path"]], mode="r+")) object@norm.data = lapply(object@raw.data, function(x) x[["norm.data"]]) object@scale.data = lapply(object@raw.data, function(x) x[["scale.data"]]) - + for (i in 1:length(object@raw.data)){ if (object@h5file.info[[i]][["format.type"]] == "10X"){ barcodes.name = "matrix/barcodes" @@ -443,7 +436,7 @@ restoreOnlineLiger <- function(object, file.path = NULL) { indices.name = "matrix/indices" indptr.name = "matrix/indptr" genes.name = "matrix/features/name" - } else if (format.type.list[i] == "AnnData"){ + } else if (object@h5file.info[[i]][["format.type"]] == "AnnData"){ barcodes.name = "obs" barcodes = object@raw.data[[i]][[barcodes.name]][]$cell num_cells = length(object@raw.data[[i]][[barcodes.name]][]$cell) @@ -487,20 +480,10 @@ restoreOnlineLiger <- function(object, file.path = NULL) { #' @param genes.name Path to the gene names stored in HDF5 file. #' @param barcodes.name Path to the barcodes stored in HDF5 file. #' @param verbose Print messages (TRUE by default) -#' #' @return \code{liger} object with raw.data slot set. -#' -#' @import Matrix -#' @import hdf5r -#' #' @export #' @examples -#' # Demonstration using matrices with randomly generated numbers -#' Y <- matrix(runif(5000,0,2), 10,500) -#' Z <- matrix(runif(5000,0,2), 10,500) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) - - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) createLiger <- function(raw.data, take.gene.union = FALSE, remove.missing = TRUE, @@ -517,6 +500,7 @@ createLiger <- function(raw.data, object@V = rep(list(NULL), length(raw.data)) object@H = rep(list(NULL), length(raw.data)) cell.data = list() + format.type.list = format.type if (length(format.type) == 1) format.type.list = rep(format.type, length(raw.data)) for (i in 1:length(raw.data)){ file.h5 = hdf5r::H5File$new(raw.data[[i]], mode="r+") @@ -556,12 +540,12 @@ createLiger <- function(raw.data, object@norm.data[[i]] = file.h5[["norm.data"]] names(object@norm.data)[[i]] = names(object@raw.data)[[i]] } - + if (file.h5$exists("scale.data")){ object@scale.data[[i]] = file.h5[["scale.data"]] names(object@scale.data)[[i]] = names(object@raw.data)[[i]] } - + if (file.h5$exists("cell.data")){ cell.data[[i]] = data.frame(dataset = file.h5[["cell.data"]][]$dataset, nUMI = file.h5[["cell.data"]][]$nUMI, @@ -580,7 +564,7 @@ createLiger <- function(raw.data, names(object@H) <- names(object@V) <- names(object@h5file.info) <- names(object@raw.data) return(object) } - + raw.data <- lapply(raw.data, function(x) { if (class(x)[1] == "dgTMatrix" | class(x)[1] == 'dgCMatrix') { mat <- as(x, 'CsparseMatrix') @@ -594,7 +578,7 @@ createLiger <- function(raw.data, as(as.matrix(x), 'CsparseMatrix') } }) - + if (length(Reduce(intersect, lapply(raw.data, colnames))) > 0 & length(raw.data) > 1) { stop('At least one cell name is repeated across datasets; please make sure all cell names are unique.') @@ -633,7 +617,7 @@ createLiger <- function(raw.data, object <- removeMissingObs(object, use.cols = FALSE, verbose = verbose) } } - + # Initialize cell.data for object with nUMI, nGene, and dataset nUMI <- unlist(lapply(object@raw.data, function(x) { colSums(x) @@ -648,7 +632,7 @@ createLiger <- function(raw.data, rownames(object@cell.data) <- unlist(lapply(object@raw.data, function(x) { colnames(x) }), use.names = FALSE) - + return(object) } @@ -681,19 +665,11 @@ safe_h5_create = function(object, idx, dataset_name, dims, mode="double", chunk_ #' expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any #' dataset) (default TRUE). #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return \code{liger} object with norm.data slot set. -#' -#' @import hdf5r -#' #' @export #' @examples -#' # Demonstration using matrices with randomly generated numbers -#' Y <- matrix(runif(5000,0,2), 10,500) -#' Z <- matrix(runif(5000,0,2), 10,500) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) - normalize <- function(object, chunk = 1000, format.type = "10X", @@ -713,21 +689,21 @@ normalize <- function(object, num_entries = object@h5file.info[[i]][["data"]]$dims num_cells = object@h5file.info[[i]][["barcodes"]]$dims num_genes = object@h5file.info[[i]][["genes"]]$dims - - + + prev_end_col = 1 prev_end_data = 1 prev_end_ind = 0 gene_sum_sq = rep(0,num_genes) gene_means = rep(0,num_genes) #file.h5$close_all() - + safe_h5_create(object = object, idx = i, dataset_name = "/norm.data", dims = num_entries, mode = h5types$double, chunk_size = chunk_size) safe_h5_create(object = object, idx = i, dataset_name = "/cell_sums", dims = num_cells, mode = h5types$int, chunk_size = chunk_size) - + #file.h5 = H5File$new(fname, mode="r+") num_chunks = ceiling(num_cells/chunk_size) - if (verbose) { + if (verbose) { pb = txtProgressBar(0,num_chunks,style = 3) } ind = 0 @@ -752,7 +728,7 @@ normalize <- function(object, prev_end_col = prev_end_col + chunk_size prev_end_data = prev_end_data + length(norm.data@x) prev_end_ind = tail(start_inds, 1) - + # calculate row sum and sum of squares using normalized data row_sums = Matrix::rowSums(norm.data) gene_sum_sq = gene_sum_sq + rowSums(norm.data*norm.data) @@ -776,7 +752,7 @@ normalize <- function(object, } object@cell.data$nUMI = nUMI object@cell.data$nGene = nGene - + for (i in 1:length(object@raw.data)){ if (!object@raw.data[[i]]$exists("cell.data")) { cell.data.i = object@cell.data[object@cell.data$dataset == names(object@raw.data)[i], ] @@ -784,7 +760,7 @@ normalize <- function(object, object@raw.data[[i]][["cell.data"]] = cell.data.i } } - + names(object@norm.data) = names(object@raw.data) } else { if (remove.missing) { @@ -810,11 +786,7 @@ normalize <- function(object, #' Should call normalize and selectGenes before calling. #' @param chunk size of chunks in hdf5 file. (default 1000) #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return \code{liger} object with scale.data slot set. -#' -#' @import hdf5r - calcGeneVars = function (object, chunk = 1000, verbose = TRUE) { hdf5_files = names(object@raw.data) @@ -826,14 +798,14 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) num_cells = object@h5file.info[[i]][["barcodes"]]$dims num_genes = object@h5file.info[[i]][["genes"]]$dims num_entries = object@h5file.info[[i]][["data"]]$dims - + prev_end_col = 1 prev_end_data = 1 prev_end_ind = 0 gene_vars = rep(0,num_genes) gene_means = object@raw.data[[i]][["gene_means"]][] gene_num_pos = rep(0,num_genes) - + num_chunks = ceiling(num_cells/chunk_size) if (verbose) { pb = txtProgressBar(0, num_chunks, style = 3) @@ -848,7 +820,7 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) row_inds = object@h5file.info[[i]][["indices"]][(prev_end_ind+1):(tail(start_inds, 1))] counts = object@norm.data[[i]][(prev_end_ind+1):(tail(start_inds, 1))] norm.data = sparseMatrix(i=row_inds[1:length(counts)]+1,p=start_inds[1:(chunk_size+1)]-prev_end_ind,x=counts,dims=c(num_genes,chunk_size)) - + num_read = length(counts) prev_end_col = prev_end_col + chunk_size prev_end_data = prev_end_data + num_read @@ -856,7 +828,7 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) gene_vars = gene_vars + sumSquaredDeviations(norm.data,gene_means) if (verbose) { setTxtProgressBar(pb, ind) - } + } } if (verbose) { setTxtProgressBar(pb, num_chunks) @@ -899,32 +871,23 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) #' Selected genes are plotted in green. (default FALSE) #' @param cex.use Point size for plot. #' @param chunk size of chunks in hdf5 file. (default 1000) -#' @param unshared.features Whether to consider unshared features +#' @param unshared Whether to consider unshared features (Default FALSE) #' @param unshared.datasets A list of the datasets to consider unshared features for, i.e. list(2), to use the second dataset #' @param unshared.thresh A list of threshold values to apply to each unshared dataset. If only one value is provided, it will apply to all unshared #' datasets. If a list is provided, it must match the length of the unshared datasets submitted. #' @return \code{liger} object with var.genes slot set. -#' -#' @import hdf5r #' @importFrom stats optimize #' @importFrom graphics abline plot points title #' @importFrom stats qnorm #' #' @export #' @examples -#' \dontrun{ -#' # Given datasets Y and Z -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) -#' # use default selectGenes settings (var.thresh = 0.1) #' ligerex <- selectGenes(ligerex) -#' # select a smaller subset of genes -#' ligerex <- selectGenes(ligerex, var.thresh = 0.3) -#' } - selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes = NULL, tol = 0.0001, datasets.use = 1:length(object@raw.data), combine = "union", - capitalize = FALSE, do.plot = FALSE, cex.use = 0.3, chunk=1000, unshared = F, unshared.datasets = NULL, unshared.thresh = NULL) + capitalize = FALSE, do.plot = FALSE, cex.use = 0.3, chunk=1000, unshared = FALSE, unshared.datasets = NULL, unshared.thresh = NULL) { if (class(object@raw.data[[1]])[1] == "H5File") { if (!object@raw.data[[1]]$exists("gene_vars")) { @@ -941,14 +904,14 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes } else { genes = object@h5file.info[[i]][["genes"]][] } - + if (capitalize) { genes = toupper(genes) } trx_per_cell = object@raw.data[[i]][["cell_sums"]][] gene_expr_mean = object@raw.data[[i]][["gene_means"]][] gene_expr_var = object@raw.data[[i]][["gene_vars"]][] - + names(gene_expr_mean) <- names(gene_expr_var) <- genes # assign gene names nolan_constant <- mean((1/trx_per_cell)) alphathresh.corrected <- alpha.thresh/length(genes) @@ -978,7 +941,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes genes.use <- intersect(genes.use, genes.new) } } - + for (i in 1:length(hdf5_files)) { if (object@h5file.info[[i]][["format.type"]] == "AnnData"){ genes = object@h5file.info[[i]][["genes"]][]$index @@ -987,7 +950,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes } genes.use <- genes.use[genes.use %in% genes] } - + if (length(genes.use) == 0) { warning("No genes were selected; lower var.thresh values or choose 'union' for combine parameter", immediate. = TRUE) @@ -1021,7 +984,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes genemeanupper <- gene_expr_mean + qnorm(1 - alphathresh.corrected / 2) * sqrt(gene_expr_mean * nolan_constant / ncol(object@raw.data[[i]])) basegenelower <- log10(gene_expr_mean * nolan_constant) - + num_varGenes <- function(x, num.genes.des){ # This function returns the difference between the desired number of genes and # the number actually obtained when thresholded on x @@ -1029,7 +992,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes log10(gene_expr_var) > basegenelower + x)) return(abs(num.genes.des - y)) } - + if (!is.null(num.genes)) { # Optimize to find value of x which gives the desired number of genes for this dataset # if very small number of genes requested, var.thresh may need to exceed 1 @@ -1041,19 +1004,19 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes optimized$objective, ". Lower tol or alpha.thresh for better results.")) } } - + genes.new <- names(gene_expr_var)[which(gene_expr_var / nolan_constant > genemeanupper & log10(gene_expr_var) > basegenelower + var.thresh[i])] - + if (do.plot) { graphics::plot(log10(gene_expr_mean), log10(gene_expr_var), cex = cex.use, xlab='Gene Expression Mean (log10)', ylab='Gene Expression Variance (log10)') - + graphics::points(log10(gene_expr_mean[genes.new]), log10(gene_expr_var[genes.new]), cex = cex.use, col = "green") graphics::abline(log10(nolan_constant), 1, col = "purple") - + legend("bottomright", paste0("Selected genes: ", length(genes.new)), pch = 20, col = "green") graphics::title(main = names(object@raw.data)[i]) } @@ -1067,11 +1030,11 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes genes.use <- intersect(genes.use, genes.new) } } - + for (i in 1:length(object@raw.data)) { genes.use <- genes.use[genes.use %in% rownames(object@raw.data[[i]])] } - + if (length(genes.use) == 0) { warning("No genes were selected; lower var.thresh values or choose 'union' for combine parameter", immediate. = TRUE) @@ -1079,7 +1042,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes object@var.genes <- genes.use } # Only for unshared Features - if (unshared == T) { + if (isTRUE(unshared)) { ind.thresh = c() # If only one threshold is provided, apply to all unshared datasets if(length(unshared.thresh == 1)){ @@ -1087,7 +1050,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes } else{ # If thresholds are provided for every dataset, use the respective threshold for each datatset if (length(unshared.thresh) != length(unshared.datasets)) { warning("The number of thresholds does not match the number of datasets; Please provide either a single threshold value or a value for each unshared dataset.", - immediate. = T) + immediate. = TRUE) } names(unshared.thresh) = unshared.datasets for (i in unshared.datasets){ @@ -1095,17 +1058,17 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes } } unshared.feats <- c() - + for (i in 1:length(object@raw.data)){ unshared.feats[i] <- list(NULL) } - + #construct a list of shared features shared_names = rownames(object@raw.data[[1]]) for (matrix in 2:length(object@raw.data)){ shared_names = subset(shared_names, shared_names %in% rownames(object@raw.data[[i]])) } - + for (i in unshared.datasets){ unshared.use <- c() #Provides normalized subset of unshared features @@ -1153,22 +1116,13 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes #' (default TRUE). #' @param chunk size of chunks in hdf5 file. (default 1000) #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return \code{liger} object with scale.data slot set. -#' -#' @import hdf5r -#' #' @export #' @examples -#' \dontrun{ -#' # Given datasets Y and Z -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) -#' # use default selectGenes settings (var.thresh = 0.1) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' } - scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose = TRUE) { if (class(object@raw.data[[1]])[1] == "H5File") { hdf5_files = names(object@raw.data) @@ -1178,7 +1132,7 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose message(hdf5_files[i]) } chunk_size = chunk - + if (object@h5file.info[[i]][["format.type"]] == "AnnData"){ genes = object@raw.data[[i]][["raw.var"]][]$index } else { @@ -1187,14 +1141,14 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose num_cells = object@h5file.info[[i]][["barcodes"]]$dims num_genes = length(genes) num_entries = object@h5file.info[[i]][["data"]]$dims - + prev_end_col = 1 prev_end_data = 1 prev_end_ind = 0 gene_vars = rep(0,num_genes) gene_means = object@raw.data[[i]][["gene_means"]][1:num_genes] gene_sum_sq = object@raw.data[[i]][["gene_sum_sq"]][1:num_genes] - + gene_inds = which(genes %in% vargenes) gene_root_mean_sum_sq = sqrt(gene_sum_sq/(num_cells-1)) safe_h5_create(object = object, idx = i, dataset_name = "scale.data", dims = c(length(vargenes), num_cells), mode = h5types$double, chunk_size = c(length(vargenes), chunk_size)) @@ -1226,7 +1180,7 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose prev_end_data = prev_end_data + num_read prev_end_ind = tail(start_inds, 1) if (verbose) { - setTxtProgressBar(pb, ind) + setTxtProgressBar(pb, ind) } } object@scale.data[[i]] = object@raw.data[[i]][["scale.data"]] @@ -1238,13 +1192,13 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose names(object@scale.data) <- names(object@raw.data) } else { object@scale.data <- lapply(1:length(object@norm.data), function(i) { - scaleNotCenterFast(t(object@norm.data[[i]][object@var.genes, ])) + scaleNotCenterFast(t(object@norm.data[[i]][object@var.genes, , drop = FALSE])) }) # TODO: Preserve sparseness later on (convert inside optimizeALS) object@scale.data <- lapply(object@scale.data, function(x) { as.matrix(x) }) - + names(object@scale.data) <- names(object@norm.data) for (i in 1:length(object@scale.data)) { object@scale.data[[i]][is.na(object@scale.data[[i]])] <- 0 @@ -1257,12 +1211,12 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose if (length(object@var.unshared.features) != 0){ for (i in 1:length(object@raw.data)){ if (!is.null(object@var.unshared.features[[i]])){ - if (class(object@raw.data[[i]])[1] == "dgTMatrix" | + if (class(object@raw.data[[i]])[1] == "dgTMatrix" || class(object@raw.data[[i]])[1] == "dgCMatrix") { - object@scale.unshared.data[[i]] <- scaleNotCenterFast(t(object@norm.data[[i]][object@var.unshared.features[[i]], ])) + object@scale.unshared.data[[i]] <- scaleNotCenterFast(t(object@norm.data[[i]][object@var.unshared.features[[i]],])) object@scale.unshared.data[[i]] <- as.matrix(object@scale.unshared.data[[i]]) } else { - object@scale.unshared.data[[i]] <- scale(t(object@norm.data[[i]][object@var.unshared.features[[i]], ]), center = F, scale = T) + object@scale.unshared.data[[i]] <- scale(t(object@norm.data[[i]][object@var.unshared.features[[i]], ]), center = FALSE, scale = TRUE) } #names(object@scale.unshared.data) <- names(object@norm.data) object@scale.unshared.data[[i]][is.na(object@scale.unshared.data[[i]])] <- 0 @@ -1290,14 +1244,15 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose #' #' @export #' @examples -#' \dontrun{ -#' # liger object: ligerex -#' ligerex <- removeMissingObs(ligerex) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' if (any(rowSums(ctrl) == 0) || any(rowSums(stim) == 0)) { +#' # example datasets do not have missing data, thus put in a condition +#' # Though the function will return unchanged object if no missing found +#' ligerex <- removeMissingObs(ligerex) #' } - removeMissingObs <- function(object, slot.use = "raw.data", use.cols = TRUE, verbose = TRUE) { filter.data <- slot(object, slot.use) - removed <- ifelse(((slot.use %in% c("raw.data", "norm.data")) & (use.cols == TRUE)) | + removed <- ifelse(((slot.use %in% c("raw.data", "norm.data")) & (use.cols == TRUE)) | ((slot.use == "scale.data") & (use.cols == FALSE)) , yes = "cells", no = "genes") expressed <- ifelse(removed == "cells", yes = " any genes", no = "") @@ -1351,7 +1306,7 @@ downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed set.seed(seed) if(is.null(datasets.use)) { - datasets.use = names(object@H) + datasets.use = names(object@raw.data) if (verbose) { message(datasets.use) } @@ -1422,19 +1377,14 @@ downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed #' @param genes.use samples from only the specified genes. Default is NULL (all genes) #' @param rand.seed for reproducibility (default 1). #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return \code{liger} object with sample.data slot set. -#' -#' @import hdf5r -#' #' @export #' @examples -#' \dontrun{ -#' # Only for online liger object (based on HDF5 files) -#' # Example: sample a total amount of 5000 cells from norm.data for downstream analysis -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' if (length(ligerex@H) > 0) { +#' # Downsampling is calculated basing on factorization result +#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 100) #' } - readSubset <- function(object, slot.use = "norm.data", balance = NULL, @@ -1453,10 +1403,10 @@ readSubset <- function(object, datasets.use=names(object@H) } cell_inds = downsample(object, balance = balance, max_cells = max.cells, datasets.use = datasets.use, seed = rand.seed, verbose = verbose) - + hdf5_files = names(object@raw.data) #vargenes = object@var.genes - + # find the intersect of genes from each input datasets genes = c() if (slot.use != "scale.data"){ @@ -1471,12 +1421,12 @@ readSubset <- function(object, } else { genes = object@var.genes } - + if(is.null(genes.use)) { genes.use = genes } - + for (i in 1:length(hdf5_files)) { if (verbose) { message(hdf5_files[i]) @@ -1496,20 +1446,20 @@ readSubset <- function(object, } num_cells = length(barcodes) num_genes = length(genes) - + prev_end_col = 1 prev_end_data = 1 prev_end_ind = 0 - - + + #gene_inds = which(genes %in% vargenes) - + num_chunks = ceiling(num_cells/chunk_size) if (verbose) { pb = txtProgressBar(0, num_chunks, style = 3) } ind = 0 - + while (prev_end_col < num_cells) { ind = ind + 1 if (num_cells - prev_end_col < chunk_size) { @@ -1532,7 +1482,7 @@ readSubset <- function(object, use_these = intersect(colnames(one_chunk),cell_inds[[i]]) one_chunk = one_chunk[genes.use,use_these] data.subset = cbind(data.subset,one_chunk) - + num_read = length(counts) prev_end_col = prev_end_col + chunk_size prev_end_data = prev_end_data + num_read @@ -1545,7 +1495,7 @@ readSubset <- function(object, use_these = intersect(colnames(one_chunk),cell_inds[[i]]) one_chunk = one_chunk[genes.use,use_these] data.subset = cbind(data.subset,one_chunk) - + prev_end_col = prev_end_col + chunk_size if (verbose) { setTxtProgressBar(pb, ind) @@ -1572,7 +1522,7 @@ readSubset <- function(object, datasets.use = names(object@H) } cell_inds = downsample(object, balance = balance, max_cells = max.cells, datasets.use = datasets.use, verbose = verbose) - + files = names(object@raw.data) # find the intersect of genes from each input datasets genes = c() @@ -1603,12 +1553,11 @@ readSubset <- function(object, if (verbose) { setTxtProgressBar(pb, i) } + object@sample.data[[i]] = data.subset_i } if (verbose){ cat("\n") } - object@sample.data[[i]] = data.subset_i - object@h5file.info[[i]][["sample.data.type"]] = slot.use } names(object@sample.data) = names(object@raw.data) return(object) @@ -1656,18 +1605,17 @@ readSubset <- function(object, #' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with H, W, V, A and B slots set. -#' -#' @import hdf5r -#' #' @export #' @examples -#' \dontrun{ -#' # Requires preprocessed liger object -#' # Get factorization using 20 factors and mini-batch of 5000 cells -#' # (default setting, can be adjusted for ideal results) -#' ligerex <- online_iNMF(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' if (length(ligerex@h5file.info) > 0) { +#' # This function only works for HDF5 based liger object +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # `miniBatch_size` has to be no larger than the number of cells in the smallest dataset +#' ligerex <- online_iNMF(ligerex, miniBatch_size = 100) #' } - online_iNMF <- function(object, X_new = NULL, projection = FALSE, @@ -1691,7 +1639,7 @@ online_iNMF <- function(object, scale.data_prev = object@scale.data cell.data_prev = object@cell.data names(raw.data_prev) = names(object@raw.data) - + # assuming only one new dataset arrives at a time raw.data = c() norm.data = c() @@ -1710,7 +1658,7 @@ online_iNMF <- function(object, object@h5file.info = h5file.info object@scale.data = scale.data object@cell.data = cell.data - + # check whether X_new needs to be processed for (i in 1:length(object@raw.data)){ if (class(object@raw.data[[i]])[1] == "H5File"){ @@ -1718,7 +1666,7 @@ online_iNMF <- function(object, } else { processed = !is.null(X_new[[i]]@scale.data) } - + if (processed) { if (verbose) { cat("New dataset", i, "already preprocessed.", "\n") @@ -1734,8 +1682,8 @@ online_iNMF <- function(object, } } } - - + + object@raw.data = c(raw.data_prev, object@raw.data) object@norm.data = c(norm.data_prev, object@norm.data) object@h5file.info = c(h5file.info_prev, object@h5file.info) @@ -1746,11 +1694,11 @@ online_iNMF <- function(object, object@V = lapply(object@V, t) object@H = lapply(object@H, t) } - + for (i in 1:length(object@raw.data)){ if (class(object@raw.data[[i]])[1] != "H5File") object@scale.data[[i]] = t(object@scale.data[[i]]) } - + ## extract required information and initialize algorithm num_files = length(object@raw.data) # number of total input hdf5 files num_prev_files = 0 # number of input hdf5 files processed in last step @@ -1765,16 +1713,16 @@ online_iNMF <- function(object, cat(num_new_files, "new datasets detected.", "\n") } } - + file_idx = 1:num_files # indices for all input files file_idx_new = (num_prev_files+1):num_files # indices only for new input files file_idx_prev = setdiff(file_idx,file_idx_new) - + vargenes = object@var.genes file_names = names(object@raw.data) gene_names = vargenes # genes selected for analysis num_genes = length(vargenes) # number of the selected genes - + cell_barcodes = list() # cell barcodes for each dataset for (i in file_idx){ cell_barcodes[[i]] = rownames(object@cell.data)[object@cell.data$dataset == file_names[i]] @@ -1782,7 +1730,7 @@ online_iNMF <- function(object, num_cells = unlist(lapply(cell_barcodes, length)) # number of cells in each dataset num_cells_new = num_cells[(num_prev_files+1):num_files] minibatch_sizes = rep(0, num_files) - + for (i in file_idx_new) { minibatch_sizes[i] = round((num_cells[i]/sum(num_cells[file_idx_new])) * miniBatch_size) if (minibatch_sizes[i] > num_cells[i]){ @@ -1791,13 +1739,13 @@ online_iNMF <- function(object, } } minibatch_sizes_orig = minibatch_sizes - + if (!projection) { - + if(!is.null(seed)){ set.seed(seed) } - + # W matrix initialization if (is.null(X_new)) { object@W = matrix(abs(runif(num_genes * k, 0, 2)), num_genes, k) @@ -1817,7 +1765,7 @@ online_iNMF <- function(object, # nrow = num_genes, # ncol = k) } - + # normalize the columns of H_i, H_s matrices for (j in 1:k){ for (i in file_idx){ # normalize columns of dictionaries @@ -1847,13 +1795,13 @@ online_iNMF <- function(object, # A = HiHi^t, B = XiHit A_old = list() B_old = list() - + if (is.null(X_new)) { object@A = rep(list(matrix(0, k, k)), num_new_files) object@B = rep(list(matrix(0, num_genes, k)), num_new_files) A_old = rep(list(matrix(0, k, k)), num_new_files) # save information older than 2 epochs B_old = rep(list(matrix(0, num_genes, k)), num_new_files) # save information older than 2 epochs - + } else { object@A[file_idx_prev] = if(!is.null(A.init)) A.init else object@A object@B[file_idx_prev] = if(!is.null(B.init)) B.init else object@B @@ -1862,21 +1810,21 @@ online_iNMF <- function(object, object@A[(num_prev_files+1):num_files] = rep(list(matrix(0, k, k)), num_new_files) object@B[(num_prev_files+1):num_files] = rep(list(matrix(0, num_genes, k)), num_new_files) A_old[(num_prev_files+1):num_files] = rep(list(matrix(0, k, k)), num_new_files) # save information older than 2 epochs - B_old[(num_prev_files+1):num_files] = rep(list(matrix(0, k, k)), num_new_files) # save information older than 2 epochs + B_old[(num_prev_files+1):num_files] = rep(list(matrix(0, num_genes, k)), num_new_files) # save information older than 2 epochs } - + iter = 1 epoch = rep(0, num_files) # intialize the number of epoch for each dataset epoch_prev = rep(0, num_files) # intialize the previous number of epoch for each dataset epoch_next = rep(FALSE, num_files) sqrt_lambda = sqrt(lambda) total_time = 0 # track the total amount of time used for the online learning - - + + num_chunks = rep(NULL, num_files) chunk_idx = rep(list(NULL), num_files) all_idx = rep(list(NULL), num_files) - + # chunk permutation for (i in file_idx_new){ num_chunks[i] = ceiling(num_cells[i]/h5_chunk_size) @@ -1887,7 +1835,7 @@ online_iNMF <- function(object, } else { all_idx[[i]] = (1+h5_chunk_size*(chunk_idx[[i]][1]-1)):(num_cells[i]) } - + for (j in chunk_idx[[i]][-1]){ if (j != num_chunks[i]){ all_idx[[i]] = c(all_idx[[i]],(1+h5_chunk_size*(j-1)):(j*h5_chunk_size)) @@ -1896,13 +1844,13 @@ online_iNMF <- function(object, } } } - + total.iters = floor(sum(num_cells_new) * max.epochs / miniBatch_size) if (verbose) { cat("Starting Online iNMF...", "\n") pb <- txtProgressBar(min = 1, max = total.iters+1, style = 3) - } - + } + while(epoch[file_idx_new[1]] < max.epochs) { # track epochs minibatch_idx = rep(list(NULL), num_files) # indices of samples in each dataest used for this iteration @@ -1925,11 +1873,11 @@ online_iNMF <- function(object, } all_idx[[i]] = all_idx[[i]][-1] # remove the first element 0 minibatch_idx[[i]] = c(minibatch_idx[[i]],all_idx[[i]][1:((iter * minibatch_sizes[i]) %% num_cells[i])]) - + } else if ((epoch_prev[i] != epoch[i]) & ((iter * minibatch_sizes[i]) %% num_cells[i] == 0)){ # if current iter finishes this cycle without start a a new cycle epoch_next[i] = TRUE epoch_prev[i] = epoch[i] - + minibatch_idx[[i]] = all_idx[[i]][((((iter-1) * minibatch_sizes[i]) %% num_cells[i]) + 1):num_cells[i]] chunk_idx[[i]] = sample(1:num_chunks[i],num_chunks[i]) all_idx[[i]] = 0 @@ -1952,21 +1900,21 @@ online_iNMF <- function(object, } epoch[file_idx_new[1]] = max.epochs # last epoch } - - + + if (length(minibatch_idx[[file_idx_new[1]]]) == minibatch_sizes_orig[file_idx_new[1]]){ X_minibatch = rep(list(NULL), num_files) for (i in file_idx_new){ X_minibatch[[i]] = object@scale.data[[i]][1:num_genes ,minibatch_idx[[i]]] } - + # update H_i by ANLS Hi_minibatch[[i]] H_minibatch = rep(list(NULL), num_files) for (i in file_idx_new){ H_minibatch[[i]] = solveNNLS(rbind(object@W + object@V[[i]], sqrt_lambda * object@V[[i]]), rbind(X_minibatch[[i]], matrix(0, num_genes, minibatch_sizes[i]))) } - + # updata A and B matrices if (iter == 1){ scale_param = c(rep(0, num_prev_files), rep(0, num_new_files)) @@ -1975,8 +1923,8 @@ online_iNMF <- function(object, } else { scale_param = c(rep(0, num_prev_files), rep((iter - 2) / (iter - 1), num_new_files)) } - - + + if (epoch[file_idx_new[1]] > 0 & epoch_next[file_idx_new[1]] == TRUE){ # remove information older than 2 epochs for (i in file_idx_new){ object@A[[i]] = object@A[[i]] - A_old[[i]] @@ -1990,19 +1938,19 @@ online_iNMF <- function(object, B_old[[i]] = scale_param[i] * B_old[[i]] } } - + for (i in file_idx_new){ object@A[[i]] = scale_param[i] * object@A[[i]] + H_minibatch[[i]] %*% t(H_minibatch[[i]]) / minibatch_sizes[i] # HiHit diag(object@A[[i]])[diag(object@A[[i]])==0] = 1e-15 object@B[[i]] = scale_param[i] * object@B[[i]] + X_minibatch[[i]] %*% t(H_minibatch[[i]]) / minibatch_sizes[i] # XiHit } - - + + # update W, V_i by HALS iter_miniBatch = 1 delta_miniBatch = Inf max_iters_miniBatch = miniBatch_max_iters - + while(iter_miniBatch <= max_iters_miniBatch){ # update W for (j in 1:k){ @@ -2012,10 +1960,10 @@ online_iNMF <- function(object, W_update_numerator = W_update_numerator + object@B[[i]][, j] - (object@W + object@V[[i]]) %*% object@A[[i]][, j] W_update_denominator = W_update_denominator + object@A[[i]][j,j] } - + object@W[, j] = nonneg(object@W[, j] + W_update_numerator / W_update_denominator) } - + # update V_i for (j in 1:k){ for (i in file_idx_new){ @@ -2023,7 +1971,7 @@ online_iNMF <- function(object, ((1 + lambda) * object@A[[i]][j, j])) } } - + iter_miniBatch = iter_miniBatch + 1 } epoch_next = rep(FALSE, num_files) # reset epoch change indicator @@ -2056,15 +2004,15 @@ online_iNMF <- function(object, } colnames(object@H[[i]]) = cell_barcodes[[i]] } - + rownames(object@W) = gene_names colnames(object@W) = NULL - + for (i in file_idx){ rownames(object@V[[i]]) = gene_names colnames(object@V[[i]]) = NULL } - + } else { if (verbose) { cat("Metagene projection", "\n") @@ -2090,7 +2038,7 @@ online_iNMF <- function(object, object@V[[i]] = matrix(0, num_genes, k) } } - + # gene x k -> k x gene & k x cell -> cell x k object@W = t(object@W) object@V = lapply(object@V, t) @@ -2098,7 +2046,7 @@ online_iNMF <- function(object, for (i in 1:length(object@raw.data)){ if (class(object@raw.data[[i]])[1] != "H5File") object@scale.data[[i]] = t(object@scale.data[[i]]) } - + if (!is.null(X_new)){ names(object@scale.data) <- names(object@raw.data) <- c(names(raw.data_prev), names(X_new)) } @@ -2116,7 +2064,7 @@ online_iNMF <- function(object, #' @param x Dense matrix. #' @param eps Threshold. Should be a small positive value. (default 1e-16) #' @return Dense matrix with smallest values equal to eps. - +#' @noRd nonneg <- function(x, eps = 1e-16) { x[x < eps] = eps return(x) @@ -2155,6 +2103,7 @@ nonneg <- function(x, eps = 1e-16) { #' @param V.init Initial values to use for V matrices (default NULL) #' @param rand.seed Random seed to allow reproducible results (default 1). #' @param print.obj Print objective function values after convergence (default FALSE). +#' @param use.unshared Whether to run UANLS method to integrate datasets with previously identified unshared variable genes. Have to run selectGenes with unshared = TRUE and scaleNotCenter it. (default FALSE). #' @param verbose Print progress bar/messages (TRUE by default) #' @param ... Arguments passed to other methods #' @@ -2162,13 +2111,12 @@ nonneg <- function(x, eps = 1e-16) { #' #' @export #' @examples -#' \dontrun{ -#' # Requires preprocessed liger object (only for objected not based on HDF5 files) -#' # Get factorization using 20 factors and mini-batch of 5000 cells -#' # (default setting, can be adjusted for ideal results) -#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 1) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Minimum specification for fast example pass +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) optimizeALS <- function( object, ... @@ -2179,10 +2127,8 @@ optimizeALS <- function( #' @rdname optimizeALS #' @importFrom stats runif #' @importFrom utils setTxtProgressBar txtProgressBar -#' #' @export #' @method optimizeALS list -#' optimizeALS.list <- function( object, k, @@ -2194,7 +2140,6 @@ optimizeALS.list <- function( W.init = NULL, V.init = NULL, use.unshared = FALSE, - lamda.u = NULL, rand.seed = 1, print.obj = FALSE, verbose = TRUE, @@ -2238,7 +2183,7 @@ optimizeALS.list <- function( nrow = k, ncol = g ) - + V <- lapply( X = 1:N, FUN = function(i) { @@ -2249,7 +2194,7 @@ optimizeALS.list <- function( )) } ) - + H <- lapply( X = ns, FUN = function(n) { @@ -2369,7 +2314,7 @@ optimizeALS.list <- function( sep = "" ) } - + if (verbose) { if (print.obj) { cat("Objective:", obj, "\n") @@ -2410,8 +2355,8 @@ optimizeALS.liger <- function( verbose = TRUE, ... ) { - - if (use.unshared == FALSE){ + + if (isFALSE(use.unshared)){ object <- removeMissingObs( object = object, slot.use = 'scale.data', @@ -2444,7 +2389,7 @@ optimizeALS.liger <- function( object@parameters$lambda <- lambda return(object) } - if(use.unshared == TRUE){ + if(isTRUE(use.unshared)){ object <- optimize_UANLS(object = object, k = k, lambda = lambda, @@ -2478,11 +2423,16 @@ optimizeALS.liger <- function( #' #' @export #' @examples -#' \dontrun{ -#' # decide to run with k = 15 instead (keeping old lambda the same) -#' ligerex <- optimizeNewK(ligerex, k.new = 15) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' k <- 5 +#' # Minimum specification for fast example pass +#' ligerex <- optimizeALS(ligerex, k = k, max.iters = 1) +#' if (k != 5) { +#' ligerex <- optimizeNewK(ligerex, k.new = k, max.iters = 1) #' } - optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters = 100, rand.seed = 1, verbose = TRUE) { if (is.null(lambda)) { @@ -2495,7 +2445,7 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters H <- object@H W <- object@W V <- object@V - + if (k.new > k) { set.seed(rand.seed) sqrt_lambda <- sqrt(lambda) @@ -2559,7 +2509,7 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters }) } object <- optimizeALS(object, k.new, - lambda = lambda, thresh = thresh, max.iters = max.iters, H.init = H, + lambda = lambda, thresh = thresh, max.iters = max.iters, H.init = H, W.init = W, V.init = V, rand.seed = rand.seed, verbose = verbose) return(object) } @@ -2587,19 +2537,30 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters #' #' @export #' @examples -#' \dontrun{ -#' # Given preprocessed liger object: ligerex (contains two datasets Y and Z) -#' # get factorization using three restarts and 20 factors -#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) -#' # acquire new data (Y_new, Z_new) from the same cell type, let's add it to existing datasets -#' new_data <- list(Y_set = Y_new, Z_set = Z_new) -#' ligerex2 <- optimizeNewData(ligerex, new.data = new_data, which.datasets = list('y_set', 'z_set')) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' \donttest{ +#' # Assume we are performing the factorization +#' # Specification for minimal example test time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' # Suppose we have new data, namingly Y_new and Z_new from the same cell type. +#' # Add it to existing datasets. +#' new_data <- list(Y_set = ctrl, Z_set = stim) +#' # 2 iters do not lead to converge, it's for minimal test time +#' ligerex2 <- optimizeNewData(ligerex, new.data = new_data, +#' which.datasets = list('ctrl', 'stim'), +#' max.iters = 1) #' # acquire new data from different cell type (X), we'll just add another dataset -#' # it's probably most similar to y_set -#' ligerex <- optimizeNewData(ligerex, new.data = list(x_set = X), which.datasets = list('y_set'), -#' add.to.existing = FALSE) +#' # it's probably most similar to ctrl +#' X <- ctrl +#' # 2 iters do not lead to converge, it's for minimal test time +#' ligerex3 <- optimizeNewData(ligerex, new.data = list(x_set = X), +#' which.datasets = list('ctrl'), +#' add.to.existing = FALSE, +#' max.iters = 1) #' } - optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = TRUE, lambda = NULL, thresh = 1e-4, max.iters = 100, verbose = TRUE) { if (is.null(lambda)) { @@ -2699,12 +2660,22 @@ optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = #' #' @export #' @examples -#' \dontrun{ -#' # now want to look at only subset of data -#' # Requires a vector of cell names from data 1 and a vector of cell names from data 2 -#' ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2)) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' \donttest{ +#' # Assume we are performing the factorization +#' # Specification for minimal example run time, not converging. +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' # Preparing subset with random sampling. +#' # Subset can also be obtained with prior knowledge from metadata. +#' cell_names_1 <- sample(rownames(ligerex@H[[1]]), 20) +#' cell_names_2 <- sample(rownames(ligerex@H[[2]]), 20) +#' +#' ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2), +#' max.iters = 1) #' } - optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, lambda = NULL, thresh = 1e-4, max.iters = 100, datasets.scale = NULL) { if (is.null(lambda)) { @@ -2726,7 +2697,8 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la object@raw.data <- lapply(1:length(object@raw.data), function(i) { object@raw.data[[i]][, cell.subset[[i]]] }) - object@cell.data <- droplevels(object@cell.data[cell.subset, ]) + all.cell.subset <- Reduce(c, cell.subset) + object@cell.data <- droplevels(object@cell.data[all.cell.subset, ]) for (i in 1:length(object@norm.data)) { object@norm.data[[i]] <- object@norm.data[[i]][, cell.subset[[i]]] if (names(object@norm.data)[i] %in% datasets.scale) { @@ -2734,10 +2706,10 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la scale = TRUE, center = FALSE) object@scale.data[[i]][is.na(object@scale.data[[i]])] <- 0 } else { - object@scale.data[[i]] <- t(object@norm.data[[i]][object@var.genes, ]) + object@scale.data[[i]] <- as.matrix(t(object@norm.data[[i]][object@var.genes, ])) } } - + names(object@raw.data) <- names(object@norm.data) <- names(object@H) <- old_names k <- ncol(H[[1]]) object <- optimizeALS(object, k = k, lambda = lambda, thresh = thresh, max.iters = max.iters, @@ -2763,11 +2735,17 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la #' #' @export #' @examples -#' \dontrun{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' \donttest{ +#' # Assume we are performing the factorization +#' # Specification for minimal example run time, not converging. +#' ligerex <- optimizeALS(ligerex, k = 5, lambda = 5, max.iters = 1) #' # decide to run with lambda = 15 instead (keeping k the same) -#' ligerex <- optimizeNewLambda(ligerex, new.lambda = 15) +#' ligerex <- optimizeNewLambda(ligerex, new.lambda = 15, max.iters = 1) #' } - optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100, rand.seed = 1, verbose = TRUE) { k <- ncol(object@H[[1]]) H <- object@H @@ -2811,24 +2789,22 @@ optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100 #' or dataframe used to produce ggplot object. Raw data is matrix of alignment values for each #' lambda value tested (each column represents a different rep for nrep).(default FALSE) #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return Matrix of results if indicated or ggplot object. Plots alignment vs. lambda to console. -#' #' @import doParallel #' @import parallel #' @importFrom foreach foreach #' @importFrom foreach "%dopar%" #' @importFrom ggplot2 ggplot aes geom_point geom_line guides guide_legend labs theme theme_classic -#' #' @export #' @examples -#' \dontrun{ -#' # Requires preprocessed liger object -#' # examine plot for most appropriate lambda, use multiple cores for faster results -#' suggestLambda(ligerex, k = 20, num.cores = 4) +#' \donttest{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' suggestLambda(ligerex, k = 20, lambda.test = c(5, 10), max.iters = 1) #' } - -suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, thresh = 1e-4, +suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, thresh = 1e-4, max.iters = 100, knn_k = 20, k2 = 500, ref_dataset = NULL, resolution = 1, gen.new = FALSE, nrep = 1, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { if (is.null(lambda.test)) { @@ -2888,26 +2864,26 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core parallel::stopCluster(cl) rep_data[[r]] <- data_matrix } - + aligns <- Reduce(cbind, rep_data) if (is.null(dim(aligns))) { aligns <- matrix(aligns, ncol = 1) } mean_aligns <- apply(aligns, 1, mean) - + time_elapsed <- difftime(Sys.time(), time_start, units = "auto") if (verbose) { cat(paste("\nCompleted in:", as.double(time_elapsed), units(time_elapsed))) } # make dataframe df_al <- data.frame(align = mean_aligns, lambda = lambda.test) - + p1 <- ggplot(df_al, aes_string(x = 'lambda', y = 'mean_aligns')) + geom_line(size=1) + geom_point() + theme_classic() + labs(y = 'Alignment', x = 'Lambda') + guides(col = guide_legend(title = "", override.aes = list(size = 2))) + theme(legend.position = 'top') - + if (return.data) { print(p1) if (return.raw) { @@ -2960,12 +2936,13 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core #' #' @export #' @examples -#' \dontrun{ -#' # Requires preprocessed liger object -#' # examine plot for most appropriate k, use multiple cores for faster results -#' suggestK(ligerex, num.cores = 4) +#' \donttest{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' suggestK(ligerex, k.test = c(5,6), max.iters = 1) #' } - suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, max.iters = 100, num.cores = 1, rand.seed = 1, gen.new = FALSE, nrep = 1, plot.log2 = TRUE, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { @@ -3021,13 +2998,13 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, data_matrix <- data_matrix[nrow(data_matrix):1, ] rep_data[[r]] <- data_matrix } - + medians <- Reduce(cbind, lapply(rep_data, function(x) {apply(x, 1, median)})) if (is.null(dim(medians))) { medians <- matrix(medians, ncol = 1) } mean_kls <- apply(medians, 1, mean) - + time_elapsed <- difftime(Sys.time(), time_start, units = "auto") if (verbose) { cat(paste("\nCompleted in:", as.double(time_elapsed), units(time_elapsed))) @@ -3038,13 +3015,13 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, if (!plot.log2) { df_kl <- df_kl[df_kl$calc == 'KL_div', ] } - + p1 <- ggplot(df_kl, aes_string(x = 'k', y = 'median_kl', col = 'calc')) + geom_line(size=1) + geom_point() + theme_classic() + labs(y='Median KL divergence (across all cells)', x = 'K') + guides(col=guide_legend(title="", override.aes = list(size = 2))) + theme(legend.position = 'top') - + if (return.data) { print(p1) if (return.raw) { @@ -3095,21 +3072,15 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, #' @param ... Arguments passed to other methods #' #' @return \code{liger} object with 'H.norm' and 'clusters' slot set. -#' #' @importFrom stats approxfun -#' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # do basic quantile alignment +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' # higher resolution for more clusters (note that SNF is conserved) -#' ligerex <- quantile_norm(ligerex, resolution = 1.2) -#' # change knn_k for more fine-grained local clustering -#' ligerex <- quantile_norm(ligerex, knn_k = 15, resolution = 1.2) -#' } - quantile_norm <- function( object, ... @@ -3157,7 +3128,7 @@ quantile_norm.list <- function( labels <- lapply(object, max_factor, dims_use = use_these_factors, center_cols = do.center) clusters <- as.factor(unlist(lapply(labels, as.character))) names(clusters) <- unlist(lapply(object, rownames)) - + # increase robustness of cluster assignments using knn graph if (refine.knn) { clusters <- refine_clusts_knn(object, clusters, k = knn_k, eps = eps) @@ -3168,7 +3139,7 @@ quantile_norm.list <- function( }) names(clusters) <- names(object) dims <- ncol(object[[ref_dataset]]) - + dataset <- unlist(lapply(1:length(object), function(i) { rep(names(object)[i], nrow(object[[i]])) })) @@ -3212,7 +3183,6 @@ quantile_norm.list <- function( #' @rdname quantile_norm #' @export #' @method quantile_norm liger -#' quantile_norm.liger <- function( object, quantiles = 50, @@ -3272,29 +3242,30 @@ quantile_norm.liger <- function( #' @param random.seed Seed of the random number generator. (default 1) #' @param verbose Print messages (TRUE by default) #' @param dims.use Indices of factors to use for Louvain clustering (default 1:ncol(H[[1]])). -#' #' @return \code{liger} object with refined 'clusters' slot set. -#' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' ligerex <- louvainCluster(ligerex, resulotion = 0.3) -#' } -#' - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- louvainCluster(ligerex, resolution = 0.3) louvainCluster <- function(object, resolution = 1.0, k = 20, prune = 1 / 15, eps = 0.1, nRandomStarts = 10, nIterations = 100, random.seed = 1, verbose = TRUE, dims.use = NULL) { + tmpdir <- tempdir() output_path <- paste0('edge_', sub('\\s', '_', Sys.time()), '.txt') output_path = sub(":","_",output_path) output_path = sub(":","_",output_path) - + output_path <- file.path(tmpdir, output_path) + if (is.null(dims.use)) { use_these_factors <- 1:ncol(object@H[[1]]) } else { use_these_factors <- dims.use } - + if (dim(object@H.norm)[1] == 0){ if (verbose) { message("Louvain Clustering on unnormalized cell factor loadings.") @@ -3398,19 +3369,19 @@ GroupSingletons <- function(ids, SNN, group.singletons = TRUE, verbose = FALSE) #' @export #' @examples #' \dontrun{ +#' # Only runable for ATAC dataset. See tutorial on GitHub. #' # ligerex (liger object), factorization complete #' # impute every dataset other than the reference dataset #' ligerex <- imputeKNN(ligerex, reference = "y_set", weight = FALSE) #' # impute only z_set dataset #' ligerex <- imputeKNN(ligerex, reference = "y_set", queries = list("z_set"), knn_k = 50) #' } - imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, norm = TRUE, scale = FALSE, verbose = TRUE) { if (verbose) { cat("NOTE: This function will discard the raw data previously stored in the liger object and", "replace the raw.data slot with the imputed data.\n\n") } - + if (length(reference) > 1) { stop("Can only have ONE reference dataset") } @@ -3444,15 +3415,15 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor } } } - + reference_cells <- colnames(object@raw.data[[reference]]) # cells by genes for (query in queries) { query_cells <- colnames(object@raw.data[[query]]) - + # creating a (reference cell numbers X query cell numbers) weights matrix for knn weights and unit weights nn.k <- get.knnx(object@H.norm[reference_cells, ], object@H.norm[query_cells, ], k = knn_k, algorithm = "CR") weights <- Matrix(0, nrow = ncol(object@raw.data[[reference]]), ncol = nrow(nn.k$nn.index), sparse = TRUE) - if (weight == TRUE){ # for weighted situation + if (isTRUE(weight)){ # for weighted situation # find nearest neighbors for query cell in normed ref datasets for (n in 1:nrow(nn.k$nn.index)) { # record ref-query cell-cell distances weights[nn.k$nn.index[n, ], n] <- exp(-nn.k$nn.dist[n, ]) / sum(exp(-nn.k$nn.dist[n, ])) @@ -3463,13 +3434,13 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor weights[nn.k$nn.index[n, ], n] <- 1/knn_k # simply count the mean } } - + # (genes by ref cell num) multiply by the weight matrix (ref cell num by query cell num) imputed_vals <- object@raw.data[[reference]] %*% weights # assigning dimnames colnames(imputed_vals) <- query_cells rownames(imputed_vals) <- rownames(object@raw.data[[reference]]) - + # formatiing the matrix if (class(object@raw.data[[reference]])[1] == "dgTMatrix" | class(object@raw.data[[reference]])[1] == "dgCMatrix") { @@ -3477,10 +3448,10 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor } else { imputed_vals <- as.matrix(imputed_vals) } - + object@raw.data[[query]] <- imputed_vals } - + if (norm) { if (verbose) { cat('\nNormalizing data...\n') @@ -3493,7 +3464,7 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor } object <- rliger::scaleNotCenter(object) } - + return(object) } @@ -3509,25 +3480,24 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete -#' wilcox.results <- runWilcoxon(ligerex, compare.method = "cluster") -#' wilcox.results <- runWilcoxon(ligerex, compare.method = "datastes", data.use = c(1, 2)) -#' # HDF5 input -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete -#' # Need to sample cells before implementing Wilcoxon test -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) -#' de_genes <- runWilcoxon(ligerex, compare.method = "clusters") +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- louvainCluster(ligerex, resolution = 0.3) +#' wilcox.results <- runWilcoxon(ligerex, compare.method = "clusters") +#' wilcox.results <- runWilcoxon(ligerex, compare.method = "datasets", data.use = c(1, 2)) +#' if (length(ligerex@h5file.info) > 0) { +#' # For HDF5 based object +#' # Need to sample cells and read into memory before running Wilcoxon test +#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) +#' wilcox.results <- runWilcoxon(ligerex, compare.method = "clusters") #' } - -runWilcoxon <- function(object, data.use = "all", compare.method) { +runWilcoxon <- function(object, data.use = "all", compare.method = c("clusters", "datasets")) { # check parameter inputs - if (missing(compare.method)) { - stop("Parameter *compare.method* cannot be empty!") - } - if (compare.method != "datasets" & compare.method != "clusters") { - stop("Parameter *compare.method* should be either *clusters* or *datasets*.") - } + compare.method <- match.arg(compare.method) if (compare.method == "datasets") { if (length(names(object@norm.data)) < 2) { stop("Should have at least TWO inputs to compare between datasets") @@ -3536,7 +3506,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { stop("Should have at least TWO inputs to compare between datasets") } } - + if (class(object@raw.data[[1]])[1] == "H5File"){ if (is.null(object@h5file.info[[1]][["sample.data.type"]])){ message("Need to sample data before Wilcoxon test for HDF5 input.") @@ -3544,7 +3514,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { message("Running Wilcoxon test on ", object@h5file.info[[1]][["sample.data.type"]]) } } - + ### create feature x sample matrix if (data.use[1] == "all" | length(data.use) > 1) { # at least two datasets if (data.use[1] == "all") { @@ -3588,20 +3558,20 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { clusters <- object@clusters[colnames(object@sample.data[[data.use]]), drop = TRUE] # from which cluster } } - + ### perform wilcoxon test if (compare.method == "clusters") { # compare between clusters across datasets len <- nrow(feature_matrix) if (len > 100000) { message("Calculating Large-scale Input...") results <- Reduce(rbind, lapply(suppressWarnings(split(seq(len), seq(len / 100000))), function(index) { - wilcoxauc(log(feature_matrix[index, ] + 1e-10), clusters) + wilcoxauc(log1p(1e6*feature_matrix[index, ]), clusters) })) } else { - results <- wilcoxauc(log(feature_matrix + 1e-10), clusters) + results <- wilcoxauc(log1p(1e6*feature_matrix), clusters) } } - + if (compare.method == "datasets") { # compare between datasets within each cluster results <- Reduce(rbind, lapply(levels(clusters), function(cluster) { sub_barcodes <- names(clusters[clusters == cluster]) # every barcode within this cluster @@ -3611,7 +3581,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { message("Note: Skip Cluster ", cluster, " since it has only ONE data source.") return() } - return(wilcoxauc(log(sub_matrix + 1e-10), sub_label)) + return(wilcoxauc(log1p(1e6*sub_matrix), sub_label)) })) } return(results) @@ -3640,12 +3610,12 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { #' @export #' @examples #' \dontrun{ +#' # Only runable for ATAC datasets, see tutorial on GitHub #' # some gene counts matrix: gmat.small -#' # some peak counts matrix: pmat.small +#' # some peak counts matrix: pmat.small #' regnet <- linkGenesAndPeaks(gmat.small, pmat.small, dist = "spearman", #' alpha = 0.05, path_to_coords = 'some_path') #' } - linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist = "spearman", alpha = 0.05, path_to_coords, verbose = TRUE) { ## check dependency @@ -3655,14 +3625,14 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist call. = FALSE ) } - + if (!requireNamespace("IRanges", quietly = TRUE)) { stop("Package \"IRanges\" needed for this function to work. Please install it by command:\n", "BiocManager::install('IRanges')", call. = FALSE ) } - + ### make Granges object for peaks peak.names <- strsplit(rownames(peak_counts), "[:-]") chrs <- Reduce(append, lapply(peak.names, function(peak) { @@ -3678,7 +3648,7 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist seqnames = chrs, ranges = IRanges::IRanges(as.numeric(chrs.start), end = as.numeric(chrs.end)) ) - + ### make Granges object for genes gene.names <- read.csv2(path_to_coords, sep = "\t", header = FALSE, stringsAsFactors = FALSE) gene.names <- gene.names[complete.cases(gene.names), ] @@ -3687,11 +3657,11 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist ranges = IRanges::IRanges(as.numeric(gene.names$V2), end = as.numeric(gene.names$V3)) ) names(genes.coords) <- gene.names$V4 - + ### construct regnet gene_counts <- t(gene_counts) # cell x genes peak_counts <- t(peak_counts) # cell x genes - + # find overlap peaks for each gene if (missing(genes.list)) { genes.list <- colnames(gene_counts) @@ -3702,13 +3672,13 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist } genes.list <- genes.list[!missing_genes] genes.coords <- genes.coords[genes.list] - + if (verbose) { message("Calculating correlation for gene-peak pairs...") } each.len <- 0 # assign('each.len', 0, envir = globalenv()) - + elements <- lapply(seq(length(genes.list)), function(pos) { gene.use <- genes.list[pos] # re-scale the window for each gene @@ -3729,7 +3699,7 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist )) pick <- res[["p"]] < alpha # filter by p-value pick[is.na(pick)] <- FALSE - + if (sum(pick) == 0) { # if no peaks are important, skip this iteration return(list(NULL, as.numeric(each.len), NULL)) } @@ -3741,7 +3711,7 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist assign('each.len', each.len + length(peaks.use), envir = parent.frame(2)) return(list(as.numeric(peaks.use), as.numeric(each.len), res.corr)) }) - + i_index <- Reduce(append, lapply(elements, function(ele) { ele[[1]] })) @@ -3751,14 +3721,14 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist value_list <- Reduce(append, lapply(elements, function(ele) { ele[[3]] })) - + # make final sparse matrix regnet <- sparseMatrix( i = i_index, p = p_index, x = value_list, dims = c(ncol(peak_counts), length(genes.list)), dimnames = list(colnames(peak_counts), genes.list) ) - + return(regnet) } @@ -3775,23 +3745,23 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist #' @param path_to_coords Path to the gene coordinates file. #' #' @return An Interact Track file stored in the specified path. -#' +#' #' @importFrom stats complete.cases #' @importFrom utils write.table #' #' @export #' @examples #' \dontrun{ -#' # some gene-peak correlation matrix: regent +#' # Only runable for ATAC datasets, see tutorial on GitHub +#' # some gene-peak correlation matrix: regent #' makeInteractTrack(regnet, path_to_coords = 'some_path_to_gene_coordinates/hg19_genes.bed') #' } - makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) { # get genomic coordinates if (missing(path_to_coords)) { stop("Parameter 'path_to_coords' cannot be empty.") } - + ### make Granges object for genes genes.coords <- read.csv2(path_to_coords, sep = "\t", header = FALSE, colClasses = @@ -3799,7 +3769,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) ) genes.coords <- genes.coords[complete.cases(genes.coords$V4), ] rownames(genes.coords) <- genes.coords[, 4] - + # split peak names into chrom and coordinates peak.names <- strsplit(rownames(corr.mat), "[:-]") chrs <- Reduce(append, lapply(peak.names, function(peak) { @@ -3811,25 +3781,25 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) chrs.end <- as.numeric(Reduce(append, lapply(peak.names, function(peak) { peak[3] }))) - + # check genes.list if (missing(genes.list)) { genes.list <- colnames(corr.mat) } - + # check output_path if (missing(output_path)) { output_path <- getwd() } - + output_path <- paste0(output_path, "/Interact_Track.bed") track.doc <- paste0('track type=interact name="Interaction Track" description="Gene-Peaks Links"', ' interactDirectional=true maxHeightPixels=200:100:50 visibility=full') write(track.doc, file = output_path) - + genes_not_existed <- 0 filtered_genes <- 0 - + for (gene in genes.list) { if (!gene %in% colnames(corr.mat)) { # if gene not in the corr.mat genes_not_existed <- genes_not_existed + 1 @@ -3840,7 +3810,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) filtered_genes <- filtered_genes + 1 next } - + track <- data.frame( chrom = chrs[peaks.sel], chromStart = chrs.start[peaks.sel], @@ -3868,7 +3838,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) fileEncoding = "" ) } - + message("A total of ", genes_not_existed, " genes do not exist in input matrix.") message("A total of ", filtered_genes, " genes do not have significant correlated peaks.") message("The Interaction Track is stored in Path: ", output_path) @@ -3894,12 +3864,15 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' wilcox.results <- runGSEA(ligerex) -#' wilcox.results <- runGSEA(ligerex, mat_v = c(1, 2)) +#' \donttest{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' result <- runGSEA(ligerex) #' } -#' runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gene_sets = c()) { if (!requireNamespace("org.Hs.eg.db", quietly = TRUE)) { stop("Package \"org.Hs.eg.db\" needed for this function to work. Please install it by command:\n", @@ -3907,29 +3880,29 @@ runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gen call. = FALSE ) } - + if (!requireNamespace("reactome.db", quietly = TRUE)) { stop("Package \"reactome.db\" needed for this function to work. Please install it by command:\n", "BiocManager::install('reactome.db')", call. = FALSE ) } - + if (!requireNamespace("fgsea", quietly = TRUE)) { stop("Package \"fgsea\" needed for this function to work. Please install it by command:\n", "BiocManager::install('fgsea')", call. = FALSE ) } - + if (length(mat_v) > length(object@V)) { stop("The gene loading input is invalid.", call. = FALSE) } - + if (!.hasSlot(object, "W") | !.hasSlot(object, "V")) { stop("There is no W or V matrix. Please do iNMF first.", call. = FALSE) } - + if (mat_w) { gene_loadings <- object@W if (mat_v) { @@ -3942,11 +3915,11 @@ runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gen object@V[[v]] })) } - + gene_ranks <- t(apply(gene_loadings, MARGIN = 1, function(x) { rank(x) })) - + colnames(gene_ranks) <- sapply(colnames(gene_ranks), toupper) gene_id <- as.character(AnnotationDbi::mapIds(org.Hs.eg.db::org.Hs.eg.db, colnames(gene_ranks), "ENTREZID", "SYMBOL")) colnames(gene_ranks) <- gene_id @@ -4009,16 +3982,14 @@ runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gen #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # generate H.norm by quantile normalizig factor loadings +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' # get tsne.coords for normalized data #' ligerex <- runTSNE(ligerex) -#' # get tsne.coords for raw factor loadings -#' ligerex <- runTSNE(ligerex, use.raw = TRUE) -#' } - runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), use.pca = FALSE, perplexity = 30, theta = 0.5, method = "Rtsne", fitsne.path = NULL, rand.seed = 42) { @@ -4089,16 +4060,18 @@ runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), u #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # generate H.norm by quantile normalizig factor loadings -#' ligerex <- quantileAlignSNF(ligerex) -#' # get tsne.coords for normalized data -#' ligerex <- runUMAP(ligerex) -#' # get tsne.coords for raw factor loadings -#' ligerex <- runUMAP(ligerex, use.raw = TRUE) +#' \donttest{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' if (packageVersion("Matrix") <= package_version("1.6.1.1")) { +#' ligerex <- runUMAP(ligerex) +#' } #' } - runUMAP <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k = 2, distance = "euclidean", n_neighbors = 10, min_dist = 0.1, rand.seed = 42) { set.seed(rand.seed) @@ -4147,13 +4120,13 @@ runUMAP <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # generate H.norm by quantile normalizig factor loadings -#' ligerex <- quantile_norm(ligerex) -#' dataset_spec <- calcDatasetSpecificity(ligerex, do.plot = F) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' calcDatasetSpecificity(ligerex) calcDatasetSpecificity <- function(object, dataset1 = NULL, dataset2 = NULL, do.plot = TRUE) { if (is.null(dataset1) | is.null(dataset2)) { dataset1 <- names(object@H)[1] @@ -4214,17 +4187,14 @@ calcDatasetSpecificity <- function(object, dataset1 = NULL, dataset2 = NULL, do. #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete -#' # generate H.norm by quantile normalizig factor loadings -#' ligerex <- quantile_norm(ligerex) -#' agreement <- calcAgreement(ligerex, dr.method = "NMF") -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- readSubset(ligerex, slot.use = "scale.data", max.cells = 5000) -#' agreement <- calcAgreement(ligerex, dr.method = "NMF") -#' } - +#' agreement <- calcAgreement(ligerex) calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.aligned = TRUE, rand.seed = 42, by.dataset = FALSE) { # if (!requireNamespace("NNLM", quietly = TRUE) & dr.method == "NMF") { @@ -4237,7 +4207,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali stop("HDF5-based Liger object requires sampled scale.data for calculating agreement.") } } - + message("Reducing dimensionality using ", dr.method) set.seed(rand.seed) dr <- list() @@ -4258,7 +4228,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali for (i in 1:length(object@H)){ dr[[i]] = icafast(t(object@sample.data[[i]]), nc = ndims)$S } - + } else { dr <- lapply(object@scale.data, function(x) { icafast(x, nc = ndims)$S @@ -4274,7 +4244,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali )$rotation) rownames(dr[[i]]) = colnames(object@sample.data[[i]]) } - + } else { dr <- lapply(object@scale.data, function(x) { suppressWarnings(prcomp_irlba(t(x), @@ -4291,7 +4261,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali n <- sum(ns) jaccard_inds <- c() distorts <- c() - + for (i in 1:length(dr)) { jaccard_inds_i <- c() if (use.aligned) { @@ -4308,7 +4278,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali })) jaccard_inds_i <- jaccard_inds_i[is.finite(jaccard_inds_i)] jaccard_inds <- c(jaccard_inds, jaccard_inds_i) - + distorts <- c(distorts, mean(jaccard_inds_i)) } if (by.dataset) { @@ -4345,12 +4315,14 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object ), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' alignment <- calcAlignment(ligerex) -#' } - +#' agreement <- calcAlignment(ligerex) calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cells.comp = NULL, clusters.use = NULL, by.cell = FALSE, by.dataset = FALSE) { if (is.null(cells.use)) { @@ -4412,10 +4384,10 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel } names(dataset) <- rownames(nmf_factors) dataset <- dataset[sampled_cells] - + num_sampled <- N * min_cells num_same_dataset <- rep(k, num_sampled) - + alignment_per_cell <- c() for (i in 1:num_sampled) { inds <- knn_graph$nn.index[i, ] @@ -4442,7 +4414,7 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel #' #' Returns alignment for each cluster in analysiss (see documentation for calcAlignment). #' -#' @param object \code{liger} object. Should call quantileAlignSNF before calling. +#' @param object \code{liger} object. Should call quantile_norm before calling. #' @param rand.seed Random seed for reproducibility (default 1). #' @param k Number of nearest neighbors in calculating alignment (see calcAlignment for default). #' Can pass in single value or vector with same length as number of clusters. @@ -4454,13 +4426,14 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' # get alignment for each cluster -#' alignment_per_cluster <- calcAlignmentPerCluster(ligerex) -#' } - +#' agreement <- calcAlignmentPerCluster(ligerex) calcAlignmentPerCluster <- function(object, rand.seed = 1, k = NULL, by.dataset = FALSE) { clusters <- levels(object@clusters) if (typeof(k) == "double") { @@ -4502,20 +4475,14 @@ calcAlignmentPerCluster <- function(object, rand.seed = 1, k = NULL, by.dataset #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' # toy clusters -#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) -#' names(cluster1) <- colnames(ligerex@raw.data[[1]]) -#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) -#' names(cluster2) <- colnames(ligerex@raw.data[[2]]) -#' # get ARI for first clustering -#' ari1 <- calcARI(ligerex, cluster1) -#' # get ARI for second clustering -#' ari2 <- calcARI(ligerex, cluster2) -#' } - +#' agreement <- calcARI(ligerex, ligerex@clusters) calcARI <- function(object, clusters.compare, verbose = TRUE) { if (length(clusters.compare) < length(object@clusters) && verbose) { message("Calculating ARI for subset of all cells") @@ -4539,27 +4506,21 @@ calcARI <- function(object, clusters.compare, verbose = TRUE) { #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' # toy clusters -#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) -#' names(cluster1) <- colnames(ligerex@raw.data[[1]]) -#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) -#' names(cluster2) <- colnames(ligerex@raw.data[[2]]) -#' # get ARI for first clustering -#' ari1 <- calcPurity(ligerex, cluster1) -#' # get ARI for second clustering -#' ari2 <- calcPurity(ligerex, cluster2) -#' } - +#' agreement <- calcARI(ligerex, ligerex@clusters) calcPurity <- function(object, classes.compare, verbose = TRUE) { if (length(classes.compare) < length(object@clusters) && verbose) { print("Calculating purity for subset of full cells") } clusters <- object@clusters[names(classes.compare)] purity <- sum(apply(table(classes.compare, clusters), 2, max)) / length(clusters) - + return(purity) } @@ -4569,19 +4530,21 @@ calcPurity <- function(object, classes.compare, verbose = TRUE) { #' #' @param object \code{liger} object. #' @param use.norm Whether to use cell normalized data in calculating contribution (default FALSE). -#' +#' @param mito.pattern Regex pattern for identifying mitochondrial genes. Default "^mt-" typically goes for mouse. +#' May use "^MT-" for human. #' @return Named vector containing proportion of mitochondrial contribution for each cell. #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' ligerex@cell.data[["percent_mito"]] <- getProportionMito(ligerex) -#' } - -getProportionMito <- function(object, use.norm = FALSE) { +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' # Expect a warning because the test data does not contain mito genes +#' ligerex@cell.data$mito <- getProportionMito(ligerex, mito.pattern = "^MT-") +getProportionMito <- function(object, use.norm = FALSE, mito.pattern = "^mt-") { all.genes <- Reduce(union, lapply(object@raw.data, rownames)) - mito.genes <- grep(pattern = "^mt-", x = all.genes, value = TRUE) + mito.genes <- grep(pattern = mito.pattern, x = all.genes, value = TRUE) + if (length(mito.genes) == 0) { + warning("No mito genes identified with pattern \"", mito.pattern, "\". ") + } data.use <- object@raw.data if (use.norm) { data.use <- object@norm.data @@ -4589,7 +4552,7 @@ getProportionMito <- function(object, use.norm = FALSE) { percent_mito <- unlist(lapply(unname(data.use), function(x) { colSums(x[mito.genes, ]) / colSums(x) }), use.names = TRUE) - + return(percent_mito) } @@ -4620,28 +4583,27 @@ getProportionMito <- function(object, use.norm = FALSE) { #' @param new.order new dataset factor order for plotting. must set reorder.idents = TRUE. #' @param return.plots Return ggplot plot objects instead of printing directly (default FALSE). #' @param legend.fonts.size Controls the font size of the legend. -#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if +#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if #' there are over 100,000 cells to plot. #' #' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to #' console). -#' +#' #' @importFrom ggplot2 ggplot geom_point geom_text ggtitle guides guide_legend aes theme xlab ylab #' @importFrom dplyr %>% group_by summarize #' @importFrom scattermore geom_scattermore #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # get tsne.coords for normalized data +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) +#' ligerex <- quantile_norm(ligerex) #' ligerex <- runTSNE(ligerex) -#' # plot to console -#' plotByDatasetAndCluster(ligerex) -#' # return list of plots -#' plots <- plotByDatasetAndCluster(ligerex, return.plots = TRUE) -#' } - +#' ligerex <- louvainCluster(ligerex) +#' plotByDatasetAndCluster(ligerex, pt.size = 1) plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.size = 0.3, text.size = 3, do.shuffle = TRUE, rand.seed = 1, axis.labels = NULL, do.legend = TRUE, legend.size = 5, @@ -4657,13 +4619,13 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si raster <- FALSE } } - + tsne_df <- data.frame(object@tsne.coords) colnames(tsne_df) <- c("Dim1", "Dim2") tsne_df[['Dataset']] <- unlist(lapply(1:length(object@H), function(x) { rep(names(object@H)[x], nrow(object@H[[x]])) })) - if (reorder.idents == TRUE){ + if (isTRUE(reorder.idents)){ tsne_df$Dataset <- factor(tsne_df$Dataset, levels = new.order) } c_names <- names(object@clusters) @@ -4683,18 +4645,18 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si idx <- sample(1:nrow(tsne_df)) tsne_df <- tsne_df[idx, ] } - - + + if (isTRUE(x = raster)) { p1 <- ggplot(tsne_df, aes_string(x = 'Dim1', y = 'Dim2', color = 'Dataset')) + theme_bw() + theme_cowplot(legend.fonts.size) + geom_scattermore(pointsize = pt.size) + guides(color = guide_legend(override.aes = list(size = legend.size))) - + centers <- tsne_df %>% group_by(.data[['Cluster']]) %>% summarize( Dim1 = median(x = .data[['Dim1']]), Dim2 = median(x = .data[['Dim2']]) ) - + p2 <- ggplot(tsne_df, aes_string(x = 'Dim1', y = 'Dim2', color = 'Cluster')) + theme_cowplot(legend.fonts.size) + geom_scattermore(pointsize = pt.size) + geom_text(data = centers, mapping = aes_string(label = 'Cluster'), colour = "black", size = text.size) + @@ -4703,19 +4665,19 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si p1 <- ggplot(tsne_df, aes_string(x = 'Dim1', y = 'Dim2', color = 'Dataset')) + theme_bw() + theme_cowplot(legend.fonts.size) + geom_point(size = pt.size, stroke = 0.2) + guides(color = guide_legend(override.aes = list(size = legend.size))) - + centers <- tsne_df %>% group_by(.data[['Cluster']]) %>% summarize( Dim1 = median(x = .data[['Dim1']]), Dim2 = median(x = .data[['Dim2']]) ) - + p2 <- ggplot(tsne_df, aes_string(x = 'Dim1', y = 'Dim2', color = 'Cluster')) + theme_cowplot(legend.fonts.size) + geom_point(size = pt.size, stroke = 0.2) + geom_text(data = centers, mapping = aes_string(label = 'Cluster'), colour = "black", size = text.size) + guides(color = guide_legend(override.aes = list(size = legend.size))) } - - + + if (!is.null(title)) { p1 <- p1 + ggtitle(title[1]) p2 <- p2 + ggtitle(title[2]) @@ -4767,22 +4729,22 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si #' #' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to #' console). -#' +#' #' @importFrom ggplot2 ggplot geom_point geom_text ggtitle aes guides guide_legend labs #' scale_color_viridis_c scale_color_gradientn theme xlab ylab #' @importFrom dplyr %>% group_by summarize #' @importFrom stats median -#' +#' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # get tsne.coords for normalized data +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) +#' ligerex <- quantile_norm(ligerex) #' ligerex <- runTSNE(ligerex) -#' # plot nUMI to console -#' plotFeature(ligerex, feature = 'nUMI') -#' } - +#' plotFeature(ligerex, "nUMI", pt.size = 1) plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, title = NULL, pt.size = 0.3, text.size = 3, do.shuffle = TRUE, rand.seed = 1, do.labels = FALSE, axis.labels = NULL, do.legend = TRUE, legend.size = 5, option = 'plasma', @@ -4794,7 +4756,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit } dr_df$feature <- object@cell.data[, feature] if (is.null(discrete)) { - if (class(dr_df$feature) != "factor") { + if (!is.factor(dr_df$feature)) { discrete <- FALSE } else { discrete <- TRUE @@ -4816,7 +4778,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit p_list <- list() for (sub_df in split(dr_df, f = dr_df$dataset)) { ggp <- ggplot(sub_df, aes_string(x = 'dr1', y = 'dr2', color = 'feature')) + geom_point(size = pt.size) - + # if data is discrete if (discrete) { ggp <- ggp + guides(color = guide_legend(override.aes = list(size = legend.size))) + @@ -4838,7 +4800,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit ggp <- ggp + scale_color_gradientn(colors = cols.use, na.value = zero.color) + labs(col = feature) } - + } if (by.dataset) { base <- as.character(sub_df$dataset[1]) @@ -4860,7 +4822,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit if (by.dataset) { p_list <- p_list[names(object@raw.data)] } - + if (return.plots){ if (length(p_list) == 1) { return(p_list[[1]]) @@ -4898,17 +4860,17 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete +#' \donttest{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) -#' # get tsne.coords for normalized data -#' ligerex <- runTSNE(ligerex) -#' # factor plots into pdf file -#' # pdf("plot_factors.pdf") #' plotFactors(ligerex) -#' # dev.off() +#' ligerex <- runTSNE(ligerex) +#' plotFactors(ligerex, plot.tsne = TRUE) #' } - plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsne = FALSE, verbose = TRUE) { k <- ncol(object@H.norm) if (verbose) { @@ -4919,18 +4881,18 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn Hs_norm <- object@H.norm # restore default settings when the current function exits init_par <- graphics::par(no.readonly = TRUE) - on.exit(graphics::par(init_par)) + on.exit(graphics::par(init_par)) for (i in 1:k) { graphics::par(mfrow = c(2, 1)) top_genes.W <- rownames(W)[order(W[, i], decreasing = TRUE)[1:num.genes]] top_genes.W.string <- paste0(top_genes.W, collapse = ", ") factor_textstring <- paste0("Factor", i) - + plot_title1 <- paste(factor_textstring, "\n", top_genes.W.string, "\n") cols <- rep("gray", times = nrow(Hs_norm)) names(cols) <- rownames(Hs_norm) cols.use <- grDevices::rainbow(length(object@H)) - + for (cl in 1:length(object@H)) { cols[rownames(object@H[[cl]])] <- rep(cols.use[cl], times = nrow(object@H[[cl]])) } @@ -4994,18 +4956,16 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete +#' \donttest{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) #' ligerex <- runTSNE(ligerex) -#' # pdf('word_clouds.pdf') -#' plotWordClouds(ligerex, num.genes = 20) -#' # dev.off() -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' plotWordClouds(ligerex, num.genes = 20) +#' plotWordClouds(ligerex, do.spec.plot = FALSE) #' } - plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = 30, min.size = 1, max.size = 4, factor.share.thresh = 10, log.fc.thresh = 1, pval.thresh = 0.05, do.spec.plot = TRUE, return.plots = FALSE, verbose = TRUE) { @@ -5013,7 +4973,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = dataset1 <- names(object@H)[1] dataset2 <- names(object@H)[2] } - + if(class(object@raw.data[[1]])[1] == "H5File"){ sample.idx = unlist(lapply(object@sample.data, colnames)) H_aligned = object@H.norm[sample.idx, ] @@ -5022,16 +4982,16 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = H_aligned <- object@H.norm tsne_coords <- object@tsne.coords } - + W <- t(object@W) V1 <- t(object@V[[dataset1]]) V2 <- t(object@V[[dataset2]]) W <- pmin(W + V1, W + V2) - + dataset.specificity <- calcDatasetSpecificity(object, dataset1 = dataset1, dataset2 = dataset2, do.plot = do.spec.plot) factors.use <- which(abs(dataset.specificity[[3]]) <= factor.share.thresh) - + markers <- getFactorMarkers(object, dataset1 = dataset1, dataset2 = dataset2, factor.share.thresh = factor.share.thresh, num.genes = num.genes, log.fc.thresh = log.fc.thresh, @@ -5039,7 +4999,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = dataset.specificity = dataset.specificity, verbose = verbose ) - + rownames(W) <- rownames(V1) <- rownames(V2) <- object@var.genes loadings_list <- list(V1, W, V2) names_list <- list(dataset1, "Shared", dataset2) @@ -5054,11 +5014,11 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = factor_ds <- paste("Factor", i, "Dataset Specificity:", dataset.specificity[[3]][i]) p1 <- ggplot(tsne_df, aes_string(x = "Dim1", y = "Dim2", color = factorlab)) + geom_point() + scale_color_gradient(low = "yellow", high = "red") + ggtitle(label = factor_ds) - + top_genes_V1 <- markers[[1]]$gene[markers[[1]]$factor_num == i] top_genes_W <- markers[[2]]$gene[markers[[2]]$factor_num == i] top_genes_V2 <- markers[[3]]$gene[markers[[3]]$factor_num == i] - + top_genes_list <- list(top_genes_V1, top_genes_W, top_genes_V2) plot_list <- lapply(seq_along(top_genes_list), function(x) { top_genes <- top_genes_list[[x]] @@ -5077,7 +5037,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = labs(x = "", y = "") + ggtitle(label = names_list[[x]]) + coord_fixed() + ggplot2::theme_void() return(out_plot) }) - + p2 <- (plot_grid(plotlist = plot_list, align = "hv", nrow = 1) + draw_grob(roundrectGrob( x = 0.33, y = 0.5, width = 0.67, height = 0.70, @@ -5135,7 +5095,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' @param axis.labels Vector of two strings to use as x and y labels respectively (default NULL). #' @param do.title Include top title with cluster and Dataset Specificity (default FALSE). #' @param verbose Print progress bar/messages (TRUE by default) -#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if +#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if #' there are over 100,000 cells to plot. #' #' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to @@ -5152,18 +5112,16 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete +#' \donttest{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex) -#' # pdf("gene_loadings.pdf") -#' plotGeneLoadings(ligerex, num.genes = 20) -#' # dev.off() -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' plotGeneLoadings(ligerex, num.genes = 20) +#' ligerex <- runTSNE(ligerex) +#' plotGeneLoadings(ligerex, "stim", "ctrl", do.spec.plot = FALSE) #' } -#' plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes.show = 12, num.genes = 30, mark.top.genes = TRUE, factor.share.thresh = 10, log.fc.thresh = 1, umi.thresh = 30, frac.thresh = 0, @@ -5180,12 +5138,12 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes raster <- FALSE } } - + if (is.null(dataset1) | is.null(dataset2)) { dataset1 <- names(object@H)[1] dataset2 <- names(object@H)[2] } - + if(class(object@raw.data[[1]])[1] == "H5File"){ sample.idx = unlist(lapply(object@sample.data, colnames)) H_aligned = object@H.norm[sample.idx, ] @@ -5194,20 +5152,20 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes H_aligned <- object@H.norm tsne_coords <- object@tsne.coords } - + W_orig <- t(object@W) V1 <- t(object@V[[dataset1]]) V2 <- t(object@V[[dataset2]]) W <- pmin(W_orig + V1, W_orig + V2) - + dataset.specificity <- calcDatasetSpecificity(object, dataset1 = dataset1, dataset2 = dataset2, do.plot = do.spec.plot ) - + factors.use <- which(abs(dataset.specificity[[3]]) <= factor.share.thresh) - - + + markers <- getFactorMarkers(object, dataset1 = dataset1, dataset2 = dataset2, factor.share.thresh = factor.share.thresh, @@ -5216,7 +5174,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes dataset.specificity = dataset.specificity, verbose = verbose ) - + rownames(W) <- rownames(V1) <- rownames(V2) <- rownames(W_orig) <- object@var.genes loadings_list <- list(V1, W, V2) names_list <- list(dataset1, "Shared", dataset2) @@ -5237,7 +5195,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes } else { values <- NULL } - + if (isTRUE(x = raster)) { p1 <- ggplot(tsne_df, aes_string(x = "Dim1", y = "Dim2", color = factorlab)) + geom_scattermore(pointsize = pt.size) + @@ -5257,15 +5215,15 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes ) + theme_cowplot(12) } - - + + if (!is.null(axis.labels)) { p1 <- p1 + xlab(axis.labels[1]) + ylab(axis.labels[2]) } if (do.title) { p1 <- p1 + ggtitle(label = factor_ds) } - + # subset to specific factor and sort by p-value top_genes_V1 <- markers[[1]][markers[[1]]$factor_num == i, ] top_genes_V1 <- top_genes_V1[order(top_genes_V1$p_value), ]$gene @@ -5273,10 +5231,10 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes top_genes_W <- markers[[2]][markers[[2]]$factor_num == i, ]$gene top_genes_V2 <- markers[[3]][markers[[3]]$factor_num == i, ] top_genes_V2 <- top_genes_V2[order(top_genes_V2$p_value), ]$gene - + top_genes_list <- list(top_genes_V1, top_genes_W, top_genes_V2) # subset down to those which will be shown if sorting by p-val - + top_genes_list <- lapply(top_genes_list, function(x) { if (length(x) > num.genes.show) { # to avoid subset warning @@ -5284,7 +5242,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes } x }) - + plot_list <- lapply(seq_along(top_genes_list), function(x) { top_genes <- top_genes_list[[x]] # make dataframe for cum gene loadings plot @@ -5296,7 +5254,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes if (length(top_genes) == 0) { top_genes <- c("no genes") } - + gene_df <- data.frame( loadings = sorted, xpos = seq(0, 1, length.out = length(sorted)), @@ -5304,7 +5262,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes ) y_lim_text <- max(gene_df$loadings) # plot and annotate with top genes - + out_plot <- ggplot(gene_df, aes_string(x = 'xpos', y = 'loadings')) + geom_point(size = pt.size) + theme_bw() + @@ -5327,7 +5285,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes clip = "off" ) + theme(plot.margin = unit(c(1, 4, 1, 1), "lines")) - + if (mark.top.genes) { out_plot <- out_plot + geom_point( data = subset(gene_df, gene_df[['top_k']] == TRUE), @@ -5337,9 +5295,9 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes } return(out_plot) }) - + # p2 <- plot_grid(plotlist = plot_list, nrow = 1) - + return_plots[[i]] <- p1 / (plot_list[[1]] | plot_list[[2]] | plot_list[[3]]) # if can figure out how to make cowplot work, might bring this back # return_plots[[i]] <- plot_grid(p1, p2, nrow = 2, align = "h") @@ -5376,15 +5334,15 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete -#' # plot expression for CD4 and return plots -#' violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- louvainCluster(ligerex) +#' plotGeneViolin(ligerex, "CD74", by.dataset = FALSE) +#' plotGeneViolin(ligerex, "CD74") plotGeneViolin <- function(object, gene, methylation.indices = NULL, by.dataset = TRUE, return.plots = FALSE) { if (class(object@raw.data[[1]])[1] == "H5File"){ @@ -5392,18 +5350,17 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, stop("norm.data should be sampled for making violin plots.") } } - + gene_vals <- c() - gene_df <- data.frame(object@tsne.coords) - rownames(gene_df) <- names(object@clusters) - + gene_df <- data.frame(Clusters = object@clusters) + for (i in 1:length(object@raw.data)) { if (class(object@raw.data[[i]])[1] == "H5File"){ if (i %in% methylation.indices) { gene_vals <- c(gene_vals, object@sample.data[[i]][gene, ]) } else { if (gene %in% rownames(object@sample.data[[i]])) { - gene_vals_int <- log2(10000 * object@sample.data[[i]][gene, ] + 1) + gene_vals_int <- log1p(10000 * object@sample.data[[i]][gene, ]) } else { gene_vals_int <- rep(list(0), ncol(object@sample.data[[i]])) @@ -5416,7 +5373,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, gene_vals <- c(gene_vals, object@norm.data[[i]][gene, ]) } else { if (gene %in% rownames(object@norm.data[[i]])) { - gene_vals_int <- log2(10000 * object@norm.data[[i]][gene, ] + 1) + gene_vals_int <- log1p(10000 * object@norm.data[[i]][gene, ]) } else { gene_vals_int <- rep(list(0), ncol(object@norm.data[[i]])) @@ -5426,24 +5383,23 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, } } } - + gene_df$Gene <- as.numeric(gene_vals[rownames(gene_df)]) - colnames(gene_df) <- c("Dim1", "Dim2", "gene") gene_plots <- list() for (i in 1:length(object@scale.data)) { if (by.dataset) { - gene_df.sub <- gene_df[rownames(object@H[[i]]), ] - gene_df.sub$Cluster <- object@clusters[rownames(object@H[[i]])] - title <- names(object@scale.data)[i] + gene_df.sub <- gene_df[colnames(object@norm.data[[i]]), ] + gene_df.sub$Cluster <- object@clusters[colnames(object@norm.data[[i]])] + title <- names(object@norm.data)[i] } else { gene_df.sub <- gene_df gene_df.sub$Cluster <- object@clusters title <- "All Datasets" } - max_v <- max(gene_df.sub["gene"], na.rm = TRUE) - min_v <- min(gene_df.sub["gene"], na.rm = TRUE) + max_v <- max(gene_df.sub["Gene"], na.rm = TRUE) + min_v <- min(gene_df.sub["Gene"], na.rm = TRUE) midpoint <- (max_v - min_v) / 2 - plot_i <- ggplot(gene_df.sub, aes_string(x = "Cluster", y = "gene", fill = "Cluster")) + + plot_i <- ggplot(gene_df.sub, aes_string(x = "Cluster", y = "Gene", fill = "Cluster")) + geom_boxplot(position = "dodge", width = 0.4, outlier.shape = NA, alpha = 0.7) + geom_violin(position = "dodge", alpha = 0.7) + ggtitle(title) @@ -5503,7 +5459,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' @param do.legend Display legend on plots (default TRUE). #' @param return.plots Return ggplot objects instead of printing directly (default FALSE). #' @param keep.scale Maintain min/max color scale across all plots when using plot.by (default FALSE) -#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if +#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if #' there are over 100,000 cells to plot. #' #' @return If returning single plot, returns ggplot object; if returning multiple plots; returns @@ -5517,17 +5473,14 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete -#' ligerex +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) +#' ligerex <- quantile_norm(ligerex) #' ligerex <- runTSNE(ligerex) -#' # plot expression for CD4 and return plots -#' gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) -#' } - +#' plotGene(ligerex, "CD74", pt.size = 1) plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by = 'dataset', log2scale = NULL, methylation.indices = NULL, plot.by = 'dataset', set.dr.lims = FALSE, pt.size = 0.1, min.clip = NULL, max.clip = NULL, @@ -5538,7 +5491,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by warning("Provided values for plot.by and scale.by do not match; results may not be very interpretable.") } - + # check raster and set by number of cells total if NULL if (is.null(x = raster)) { if (nrow(x = object@cell.data) > 1e5) { @@ -5549,8 +5502,8 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by raster <- FALSE } } - - + + if (use.raw) { if (is.null(log2scale)) { log2scale <- FALSE @@ -5622,7 +5575,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by max_exp_val <- max(gene_vals, na.rm = TRUE) min_exp_val <- min(gene_vals, na.rm = TRUE) } - + if (class(object@raw.data[[1]])[1] == "H5File") { cells <- unlist(lapply(object@sample.data, colnames)) dr_df <- data.frame(object@tsne.coords[cells,]) @@ -5635,7 +5588,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by # get dr limits for later lim1 <- c(min(dr_df$dr1), max(dr_df$dr1)) lim2 <- c(min(dr_df$dr2), max(dr_df$dr2)) - + if (plot.by != 'none') { if (!(plot.by %in% colnames(object@cell.data))) { stop("Please select existing feature in cell.data to plot.by, or add it before calling.") @@ -5683,7 +5636,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by } sub_df$gene[sub_df$gene < min_v & !is.na(sub_df$gene)] <- min_v sub_df$gene[sub_df$gene > max_v & !is.na(sub_df$gene)] <- max_v - + if (isTRUE(x = raster)) { ggp <- ggplot(sub_df, aes_string(x = 'dr1', y = 'dr2', color = 'gene')) + geom_scattermore(pointsize = pt.size) + labs(col = gene) @@ -5691,7 +5644,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by ggp <- ggplot(sub_df, aes_string(x = 'dr1', y = 'dr2', color = 'gene')) + geom_point(size = pt.size) + labs(col = gene) } - + if (!is.null(cols.use)) { if (keep.scale) { ggp <- ggp + scale_color_gradientn(colors = cols.use, @@ -5716,14 +5669,14 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by if (set.dr.lims) { ggp <- ggp + xlim(lim1) + ylim(lim2) } - + if (plot.by != 'none') { base <- as.character(sub_df$plotby[1]) } else { base <- "" } ggp <- ggp + ggtitle(base) - + if (!is.null(axis.labels)) { ggp <- ggp + xlab(axis.labels[1]) + ylab(axis.labels[2]) } @@ -5746,7 +5699,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by if (plot.by == 'dataset') { p_list <- p_list[names(object@raw.data)] } - + if (return.plots){ if (length(p_list) == 1) { return(p_list[[1]]) @@ -5776,15 +5729,16 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete input +#' \donttest{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) +#' ligerex <- quantile_norm(ligerex) #' ligerex <- runTSNE(ligerex) -#' # plot expression for CD4 and FCGR3A -#' # pdf("gene_plots.pdf") -#' plotGenes(ligerex, c("CD4", "FCGR3A")) -#' # dev.off() +#' plotGenes(ligerex, c("CD74", "NKG7"), pt.size = 1) #' } - plotGenes <- function(object, genes, ...) { for (i in 1:length(genes)) { print(genes[i]) @@ -5819,17 +5773,16 @@ plotGenes <- function(object, genes, ...) { #' @param node.order Order of clusters in each set (list with three vectors of ordinal numbers). #' By default will try to automatically order them appropriately. #' -#' @return A riverplot object +#' @return NULL for now. Could be back if CRAN dependency riverplot is back. #' #' @importFrom plyr mapvalues -#' @importFrom riverplot makeRiver -#' @importFrom riverplot riverplot #' @importFrom grDevices hcl #' @importFrom utils capture.output #' #' @export #' @examples #' \dontrun{ +#' # Riverplot currently archived, cannot run this example #' # ligerex (liger object), factorization complete input #' # toy clusters #' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) @@ -5839,127 +5792,128 @@ plotGenes <- function(object, genes, ...) { #' # create riverplot #' makeRiverplot(ligerex, cluster1, cluster2) #' } - makeRiverplot <- function(object, cluster1, cluster2, cluster_consensus = NULL, min.frac = 0.05, min.cells = 10, river.yscale = 1, river.lty = 0, river.node_margin = 0.1, label.cex = 1, label.col = "black", lab.srt = 0, river.usr = NULL, node.order = "auto") { - cluster1 <- droplevels(cluster1) - cluster2 <- droplevels(cluster2) - if (is.null(cluster_consensus)) { - cluster_consensus <- droplevels(object@clusters) - } - # Make cluster names unique if necessary - if (length(intersect(levels(cluster1), levels(cluster2))) > 0 | - length(intersect(levels(cluster1), levels(cluster_consensus))) > 0 | - length(intersect(levels(cluster2), levels(cluster_consensus))) > 0) { - message("Duplicate cluster names detected. Adding 1- and 2- to make unique names.") - cluster1 <- mapvalues(cluster1, from = levels(cluster1), - to = paste("1", levels(cluster1), sep = "-")) - cluster2 <- mapvalues(cluster2, from = levels(cluster2), - to = paste("2", levels(cluster2), sep = "-")) - } - cluster1 <- cluster1[intersect(names(cluster1), names(cluster_consensus))] - cluster2 <- cluster2[intersect(names(cluster2), names(cluster_consensus))] - - # set node order - if (identical(node.order, "auto")) { - tab.1 <- table(cluster1, cluster_consensus[names(cluster1)]) - tab.1 <- sweep(tab.1, 1, rowSums(tab.1), "/") - tab.2 <- table(cluster2, cluster_consensus[names(cluster2)]) - tab.2 <- sweep(tab.2, 1, rowSums(tab.2), "/") - whichmax.1 <- apply(tab.1, 1, which.max) - whichmax.2 <- apply(tab.2, 1, which.max) - ord.1 <- order(whichmax.1) - ord.2 <- order(whichmax.2) - cluster1 <- factor(cluster1, levels = levels(cluster1)[ord.1]) - cluster2 <- factor(cluster2, levels = levels(cluster2)[ord.2]) - } else { - if (is.list(node.order)) { - cluster1 <- factor(cluster1, levels = levels(cluster1)[node.order[[1]]]) - cluster_consensus <- factor(cluster_consensus, - levels = levels(cluster_consensus)[node.order[[2]]]) - cluster2 <- factor(cluster2, levels = levels(cluster2)[node.order[[3]]]) - } - } - cluster1 <- cluster1[!is.na(cluster1)] - cluster2 <- cluster2[!is.na(cluster2)] - nodes1 <- levels(cluster1)[table(cluster1) > 0] - nodes2 <- levels(cluster2)[table(cluster2) > 0] - nodes_middle <- levels(cluster_consensus)[table(cluster_consensus) > 0] - node_Xs <- c( - rep(1, length(nodes1)), rep(2, length(nodes_middle)), - rep(3, length(nodes2)) - ) - - # first set of edges - edge_list <- list() - for (i in 1:length(nodes1)) { - temp <- list() - i_cells <- names(cluster1)[cluster1 == nodes1[i]] - for (j in 1:length(nodes_middle)) { - if (length(which(cluster_consensus[i_cells] == nodes_middle[j])) / length(i_cells) > min.frac & - length(which(cluster_consensus[i_cells] == nodes_middle[j])) > min.cells) { - temp[[nodes_middle[j]]] <- sum(cluster_consensus[i_cells] == - nodes_middle[j]) / length(cluster1) - } - } - edge_list[[nodes1[i]]] <- temp - } - # second set of edges - cluster3 <- cluster_consensus[names(cluster2)] - for (i in 1:length(nodes_middle)) { - temp <- list() - i_cells <- names(cluster3)[cluster3 == nodes_middle[i]] - for (j in 1:length(nodes2)) { - j_cells <- names(cluster2)[cluster2 == nodes2[j]] - if (length(which(cluster_consensus[j_cells] == nodes_middle[i])) / length(j_cells) > min.frac & - length(which(cluster_consensus[j_cells] == nodes_middle[i])) > min.cells) { - if (!is.na(sum(cluster2[i_cells] == nodes2[j]))) { - temp[[nodes2[j]]] <- sum(cluster2[i_cells] == - nodes2[j]) / length(cluster2) - } - } - } - edge_list[[nodes_middle[i]]] <- temp - } - # set cluster colors - node_cols <- list() - ggplotColors <- function(g) { - d <- 360 / g - h <- cumsum(c(15, rep(d, g - 1))) - grDevices::hcl(h = h, c = 100, l = 65) - } - pal <- ggplotColors(length(nodes1)) - for (i in 1:length(nodes1)) { - node_cols[[nodes1[i]]] <- list(col = pal[i], textcex = label.cex, - textcol = label.col, srt = lab.srt) - } - pal <- ggplotColors(length(nodes_middle)) - for (i in 1:length(nodes_middle)) { - node_cols[[nodes_middle[i]]] <- list(col = pal[i], textcex = label.cex, - textcol = label.col, srt = lab.srt) - } - pal <- ggplotColors(length(nodes2)) - for (i in 1:length(nodes2)) { - node_cols[[nodes2[i]]] <- list(col = pal[i], textcex = label.cex, - textcol = label.col, srt = lab.srt) - } - # create nodes and riverplot object - nodes <- list(nodes1, nodes_middle, nodes2) - node.limit <- max(unlist(lapply(nodes, length))) - - node_Ys <- lapply(1:length(nodes), function(i) { - seq(1, node.limit, by = node.limit / length(nodes[[i]])) - }) - rp <- makeRiver(c(nodes1, nodes_middle, nodes2), edge_list, - node_xpos = node_Xs, node_ypos = unlist(node_Ys), node_styles = node_cols - ) + .Deprecated(NULL, msg = "Cran package riverplot is archived, we have to disable this function for now.") + return(NULL) + # cluster1 <- droplevels(cluster1) + # cluster2 <- droplevels(cluster2) + # if (is.null(cluster_consensus)) { + # cluster_consensus <- droplevels(object@clusters) + # } + # # Make cluster names unique if necessary + # if (length(intersect(levels(cluster1), levels(cluster2))) > 0 | + # length(intersect(levels(cluster1), levels(cluster_consensus))) > 0 | + # length(intersect(levels(cluster2), levels(cluster_consensus))) > 0) { + # message("Duplicate cluster names detected. Adding 1- and 2- to make unique names.") + # cluster1 <- mapvalues(cluster1, from = levels(cluster1), + # to = paste("1", levels(cluster1), sep = "-")) + # cluster2 <- mapvalues(cluster2, from = levels(cluster2), + # to = paste("2", levels(cluster2), sep = "-")) + # } + # cluster1 <- cluster1[intersect(names(cluster1), names(cluster_consensus))] + # cluster2 <- cluster2[intersect(names(cluster2), names(cluster_consensus))] + # + # # set node order + # if (identical(node.order, "auto")) { + # tab.1 <- table(cluster1, cluster_consensus[names(cluster1)]) + # tab.1 <- sweep(tab.1, 1, rowSums(tab.1), "/") + # tab.2 <- table(cluster2, cluster_consensus[names(cluster2)]) + # tab.2 <- sweep(tab.2, 1, rowSums(tab.2), "/") + # whichmax.1 <- apply(tab.1, 1, which.max) + # whichmax.2 <- apply(tab.2, 1, which.max) + # ord.1 <- order(whichmax.1) + # ord.2 <- order(whichmax.2) + # cluster1 <- factor(cluster1, levels = levels(cluster1)[ord.1]) + # cluster2 <- factor(cluster2, levels = levels(cluster2)[ord.2]) + # } else { + # if (is.list(node.order)) { + # cluster1 <- factor(cluster1, levels = levels(cluster1)[node.order[[1]]]) + # cluster_consensus <- factor(cluster_consensus, + # levels = levels(cluster_consensus)[node.order[[2]]]) + # cluster2 <- factor(cluster2, levels = levels(cluster2)[node.order[[3]]]) + # } + # } + # cluster1 <- cluster1[!is.na(cluster1)] + # cluster2 <- cluster2[!is.na(cluster2)] + # nodes1 <- levels(cluster1)[table(cluster1) > 0] + # nodes2 <- levels(cluster2)[table(cluster2) > 0] + # nodes_middle <- levels(cluster_consensus)[table(cluster_consensus) > 0] + # node_Xs <- c( + # rep(1, length(nodes1)), rep(2, length(nodes_middle)), + # rep(3, length(nodes2)) + # ) + # + # # first set of edges + # edge_list <- list() + # for (i in 1:length(nodes1)) { + # temp <- list() + # i_cells <- names(cluster1)[cluster1 == nodes1[i]] + # for (j in 1:length(nodes_middle)) { + # if (length(which(cluster_consensus[i_cells] == nodes_middle[j])) / length(i_cells) > min.frac & + # length(which(cluster_consensus[i_cells] == nodes_middle[j])) > min.cells) { + # temp[[nodes_middle[j]]] <- sum(cluster_consensus[i_cells] == + # nodes_middle[j]) / length(cluster1) + # } + # } + # edge_list[[nodes1[i]]] <- temp + # } + # # second set of edges + # cluster3 <- cluster_consensus[names(cluster2)] + # for (i in 1:length(nodes_middle)) { + # temp <- list() + # i_cells <- names(cluster3)[cluster3 == nodes_middle[i]] + # for (j in 1:length(nodes2)) { + # j_cells <- names(cluster2)[cluster2 == nodes2[j]] + # if (length(which(cluster_consensus[j_cells] == nodes_middle[i])) / length(j_cells) > min.frac & + # length(which(cluster_consensus[j_cells] == nodes_middle[i])) > min.cells) { + # if (!is.na(sum(cluster2[i_cells] == nodes2[j]))) { + # temp[[nodes2[j]]] <- sum(cluster2[i_cells] == + # nodes2[j]) / length(cluster2) + # } + # } + # } + # edge_list[[nodes_middle[i]]] <- temp + # } + # # set cluster colors + # node_cols <- list() + # ggplotColors <- function(g) { + # d <- 360 / g + # h <- cumsum(c(15, rep(d, g - 1))) + # grDevices::hcl(h = h, c = 100, l = 65) + # } + # pal <- ggplotColors(length(nodes1)) + # for (i in 1:length(nodes1)) { + # node_cols[[nodes1[i]]] <- list(col = pal[i], textcex = label.cex, + # textcol = label.col, srt = lab.srt) + # } + # pal <- ggplotColors(length(nodes_middle)) + # for (i in 1:length(nodes_middle)) { + # node_cols[[nodes_middle[i]]] <- list(col = pal[i], textcex = label.cex, + # textcol = label.col, srt = lab.srt) + # } + # pal <- ggplotColors(length(nodes2)) + # for (i in 1:length(nodes2)) { + # node_cols[[nodes2[i]]] <- list(col = pal[i], textcex = label.cex, + # textcol = label.col, srt = lab.srt) + # } + # # create nodes and riverplot object + # nodes <- list(nodes1, nodes_middle, nodes2) + # node.limit <- max(unlist(lapply(nodes, length))) + # + # node_Ys <- lapply(1:length(nodes), function(i) { + # seq(1, node.limit, by = node.limit / length(nodes[[i]])) + # }) + # rp <- makeRiver(c(nodes1, nodes_middle, nodes2), edge_list, + # node_xpos = node_Xs, node_ypos = unlist(node_Ys), node_styles = node_cols + # ) # prevent normal riverplot output being printed to console - invisible(capture.output(riverplot(rp, - yscale = river.yscale, lty = river.lty, - node_margin = river.node_margin, usr = river.usr - ))) + # invisible(capture.output(riverplot(rp, + # yscale = river.yscale, lty = river.lty, + # node_margin = river.node_margin, usr = river.usr + # ))) } #' Plot cluster proportions by dataset @@ -5978,15 +5932,16 @@ makeRiverplot <- function(object, cluster1, cluster2, cluster_consensus = NULL, #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete input +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) #' ligerex <- quantile_norm(ligerex) -#' # plot cluster proportions +#' ligerex <- louvainCluster(ligerex) #' plotClusterProportions(ligerex) -#' } - plotClusterProportions <- function(object, return.plot = FALSE) { - + sample_names <- unlist(lapply(seq_along(object@H), function(i) { rep(names(object@H)[i], nrow(object@H[[i]])) })) @@ -6046,12 +6001,14 @@ plotClusterProportions <- function(object, return.plot = FALSE) { #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete input -#' # plot expression for CD4 and return plots -#' loading.matrix <- plotClusterFactors(ligerex, return.data = TRUE) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- louvainCluster(ligerex) +#' plotClusterFactors(ligerex) plotClusterFactors <- function(object, use.aligned = FALSE, Rowv = NA, Colv = "Rowv", col = NULL, return.data = FALSE, ...) { if (use.aligned) { @@ -6069,7 +6026,7 @@ plotClusterFactors <- function(object, use.aligned = FALSE, Rowv = NA, Colv = "R for (cluster in levels(object@clusters)) { cluster.bars[[cluster]] <- colSums(row.scaled[names(object@clusters) [which(object@clusters == cluster)], ]) - + } cluster.bars <- Reduce(rbind, cluster.bars) if (is.null(col)) { @@ -6115,18 +6072,18 @@ plotClusterFactors <- function(object, use.aligned = FALSE, Rowv = NA, Colv = "R #' @return List of shared and specific factors. First three elements are dataframes of dataset1- #' specific, shared, and dataset2-specific markers. Last two elements are tables indicating the #' number of factors in which marker appears. -#' +#' #' @importFrom stats wilcox.test #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete input -#' markers <- getFactorMarkers(ligerex, num.genes = 10) -#' # look at shared markers -#' head(markers[[2]]) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' fm <- getFactorMarkers(ligerex, dataset1 = "stim", dataset2 = "ctrl") getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.share.thresh = 10, dataset.specificity = NULL, log.fc.thresh = 1, pval.thresh = 0.05, num.genes = 30, print.genes = FALSE, verbose = TRUE) { @@ -6142,14 +6099,14 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh dataset2 = dataset2, do.plot = FALSE) } factors.use <- which(abs(dataset.specificity[[3]]) <= factor.share.thresh) - + if (length(factors.use) < 2 && verbose) { message( "Warning: only ", length(factors.use), " factors passed the dataset specificity threshold." ) } - + Hs_scaled <- lapply(object@H, function(x) { scale(x, scale = TRUE, center = TRUE) }) @@ -6165,13 +6122,13 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh } } names(labels) <- names(object@H) - + V1_matrices <- list() V2_matrices <- list() W_matrices <- list() for (j in 1:length(factors.use)) { i <- factors.use[j] - + W <- t(object@W) V1 <- t(object@V[[dataset1]]) V2 <- t(object@V[[dataset2]]) @@ -6189,7 +6146,7 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh expr_mat = Reduce(cbind, object@sample.data[c(dataset1,dataset2)])[object@var.genes, c(labels[[dataset1]] == i, labels[[dataset2]] == i)] cell_label = rep(c(dataset1, dataset2), c(sum(labels[[dataset1]] == i), sum(labels[[dataset2]] == i))) wilcoxon_result = wilcoxauc(log(expr_mat + 1e-10), cell_label) - + } else { expr_mat = cbind(object@norm.data[[dataset1]][object@var.genes, labels[[dataset1]] == i], object@norm.data[[dataset2]][object@var.genes, labels[[dataset2]] == i]) @@ -6200,11 +6157,11 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh names(log2fc) = wilcoxon_result[wilcoxon_result$group == dataset1, ]$feature filtered_genes_V1 = wilcoxon_result[wilcoxon_result$logFC > log.fc.thresh & wilcoxon_result$pval < pval.thresh, ]$feature filtered_genes_V2 = wilcoxon_result[-wilcoxon_result$logFC > log.fc.thresh & wilcoxon_result$pval < pval.thresh, ]$feature - + W <- pmin(W + V1, W + V2) V1 <- V1[filtered_genes_V1, , drop = FALSE] V2 <- V2[filtered_genes_V2, , drop = FALSE] - + if (length(filtered_genes_V1) == 0) { top_genes_V1 <- character(0) } else { @@ -6222,7 +6179,7 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh top_genes_W <- row.names(W)[order(W[, i], decreasing = TRUE)[1:num.genes] ] top_genes_W <- top_genes_W[!is.na(top_genes_W)] top_genes_W <- top_genes_W[which(W[top_genes_W, i] > 0)] - + if (print.genes && verbose) { message("Factor ", i) message('Dataset 1') @@ -6232,7 +6189,7 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh message('Dataset 2') message(top_genes_V2) } - + pvals <- list() # order is V1, V2, W top_genes <- list(top_genes_V1, top_genes_V2, top_genes_W) for (k in 1:length(top_genes)) { @@ -6292,109 +6249,78 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh #' @param nms By default, labels cell names with dataset of origin (this is to account for cells in #' different datasets which may have same name). Other names can be passed here as vector, must #' have same length as the number of datasets. (default names(H)) -#' @param renormalize Whether to log-normalize raw data using Seurat defaults (default TRUE). +#' @param renormalize Whether to log-normalize raw data using Seurat defaults (default FALSE). #' @param use.liger.genes Whether to carry over variable genes (default TRUE). #' @param by.dataset Include dataset of origin in cluster identity in Seurat object (default FALSE). -#' +#' @param assay Assay name to set in the Seurat object (default "RNA"). #' @return Seurat object with raw.data, scale.data, dr$tsne, dr$inmf, and ident slots set. -#' -#' @import Matrix -#' @importFrom methods new -#' @importFrom utils packageVersion -#' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets ONLY), factorization complete input -#' s.object <- ligerToSeurat(ligerex) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' if (packageVersion("Matrix") <= package_version("1.6.1.1")) { +#' # 1.6.2 is not compatible thus don't test +#' # but can use `setOldClass("mMatrix")` as a hack +#' srt <- ligerToSeurat(ligerex) #' } - -ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use.liger.genes = TRUE, - by.dataset = FALSE) { +ligerToSeurat <- function(object, nms = NULL, renormalize = FALSE, use.liger.genes = TRUE, + by.dataset = FALSE, assay = "RNA") { if (!requireNamespace("Seurat", quietly = TRUE)) { stop("Package \"Seurat\" needed for this function to work. Please install it.", - call. = FALSE - ) + call. = FALSE) } - # get Seurat version - maj_version <- packageVersion('Seurat')$major - if (class(object@raw.data[[1]])[1] != 'dgCMatrix') { - # mat <- as(x, 'CsparseMatrix') - object@raw.data <- lapply(object@raw.data, function(x) { - as(x, 'CsparseMatrix') - }) + if (!inherits(object@raw.data[[1]], 'dgCMatrix')) { + object@raw.data <- lapply(object@raw.data, as, Class = "CsparseMatrix") } raw.data <- MergeSparseDataAll(object@raw.data, nms) - scale.data <- do.call(rbind, object@scale.data) - rownames(scale.data) <- colnames(raw.data) - if (maj_version < 3) { - var.genes <- object@var.genes - inmf.obj <- new( - Class = "dim.reduction", gene.loadings = t(object@W), - cell.embeddings = object@H.norm, key = "iNMF_" - ) - rownames(inmf.obj@gene.loadings) <- var.genes - tsne.obj <- new( - Class = "dim.reduction", cell.embeddings = object@tsne.coords, - key = "tSNE_" - ) + new.seurat <- Seurat::CreateSeuratObject(raw.data, assay = assay) + if (isTRUE(renormalize)) { + new.seurat <- Seurat::NormalizeData(new.seurat) } else { - var.genes <- object@var.genes - if (any(grepl('_', var.genes))) { - message("Warning: Seurat v3 genes cannot have underscores, replacing with dashes ('-')") - var.genes <- gsub("_", replacement = "-", var.genes) + if (length(object@norm.data) > 0) { + norm.data <- MergeSparseDataAll(object@norm.data, nms) + new.seurat <- SeuratObject::SetAssayData(new.seurat, layer = "data", slot = "data", new.data = norm.data) } + } + if (length(object@var.genes) > 0 && use.liger.genes) { + Seurat::VariableFeatures(new.seurat) <- object@var.genes + } + if (length(object@scale.data) > 0) { + scale.data <- t(Reduce(rbind, object@scale.data)) + colnames(scale.data) <- colnames(raw.data) + new.seurat <- SeuratObject::SetAssayData(object = new.seurat, layer = "scale.data", slot = "scale.data", new.data = scale.data) + } + if (all(dim(object@W) > 0) && all(dim(object@H.norm) > 0)) { inmf.loadings <- t(x = object@W) + dimnames(inmf.loadings) <- list(object@var.genes, + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) inmf.embeddings <- object@H.norm - tsne.embeddings <- object@tsne.coords - rownames(x = inmf.loadings) <- var.genes - rownames(x = inmf.embeddings) <- - rownames(x = tsne.embeddings) <- - rownames(x = scale.data) + dimnames(inmf.embeddings) <- list(unlist(lapply(object@scale.data, rownames), use.names = FALSE), + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) inmf.obj <- Seurat::CreateDimReducObject( embeddings = inmf.embeddings, - loadings = inmf.loadings, - key = "iNMF_", - global = TRUE + loadings = inmf.embeddings, + assay = assay, + key = "iNMF_" ) + new.seurat[["iNMF"]] <- inmf.obj + } + if (all(dim(object@tsne.coords) > 0)) { + tsne.embeddings <- object@tsne.coords + dimnames(tsne.embeddings) <- list(rownames(object@H.norm), + c("TSNE_1", "TSNE_2")) tsne.obj <- Seurat::CreateDimReducObject( embeddings = tsne.embeddings, - key = "tSNE_", - global = TRUE + assay = assay, + key = "TSNE_" ) + new.seurat[["TSNE"]] <- tsne.obj } - new.seurat <- Seurat::CreateSeuratObject(raw.data) - if (renormalize) { - new.seurat <- Seurat::NormalizeData(new.seurat) - } - if (by.dataset) { - ident.use <- as.character(unlist(lapply(1:length(object@raw.data), function(i) { - dataset.name <- names(object@raw.data)[i] - paste0(dataset.name, as.character(object@clusters[colnames(object@raw.data[[i]])])) - }))) - } else { - ident.use <- as.character(object@clusters) - } - - if (maj_version < 3) { - if (use.liger.genes) { - new.seurat@var.genes <- var.genes - } - new.seurat@scale.data <- t(scale.data) - new.seurat@dr$tsne <- tsne.obj - new.seurat@dr$inmf <- inmf.obj - new.seurat <- Seurat::SetIdent(new.seurat, ident.use = ident.use) - - } else { - if (use.liger.genes) { - Seurat::VariableFeatures(new.seurat) <- var.genes - } - Seurat::SetAssayData(new.seurat, slot = "scale.data", t(scale.data), assay = "RNA") - new.seurat[['tsne']] <- tsne.obj - new.seurat[['inmf']] <- inmf.obj - Seurat::Idents(new.seurat) <- ident.use - } + new.seurat$orig.ident <- object@cell.data$dataset + idents <- object@clusters + if (length(idents) == 0 || isTRUE(by.dataset)) idents <- object@cell.data$dataset + Seurat::Idents(new.seurat) <- idents + return(new.seurat) } @@ -6437,27 +6363,15 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use #' @param cca.to.H Carry over CCA (and aligned) loadings and insert them into H (and H.norm) slot in #' liger object (only meaningful for combined analysis Seurat object). Useful for plotting directly #' afterwards. (default FALSE) -#' #' @return \code{liger} object. -#' -#' @import Matrix -#' #' @export #' @examples -#' \dontrun{ -#' # Seurat objects for two pbmc datasets -#' tenx <- readRDS('tenx.RDS') -#' seqwell <- readRDS('seqwell.RDS') -#' # create liger object, using project names -#' ligerex <- seuratToLiger(list(tenx, seqwell)) -#' # create liger object, passing in names explicitly, using hvg.info genes -#' ligerex2 <- seuratToLiger(list(tenx, seqwell), names = c('tenx', 'seqwell'), num.hvg.info = 2000) -#' # Seurat object for joint analysis -#' pbmc <- readRDS('pbmc.RDS') -#' # create liger object, using 'protocol' for dataset names -#' ligerex3 <- seuratToLiger(pbmc, combined.seurat = TRUE, meta.var = 'protocol', num.hvg.info = 2000) +#' if (packageVersion("Matrix") <= package_version("1.6.1.1")) { +#' ctrl.srt <- Seurat::CreateSeuratObject(ctrl, project = "ctrl") +#' stim.srt <- Seurat::CreateSeuratObject(stim, project = "stim") +#' ligerex <- seuratToLiger(list(ctrl = ctrl.srt, stim = stim.srt), +#' use.seurat.genes = FALSE) #' } - seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-projects", meta.var = NULL, assays.use = NULL, raw.assay = "RNA", remove.missing = TRUE, renormalize = TRUE, use.seurat.genes = TRUE, num.hvg.info = NULL, use.idents = TRUE, use.tsne = TRUE, @@ -6467,7 +6381,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project call. = FALSE ) } - + # Remind to set combined.seurat if ((typeof(objects) != "list") & (!combined.seurat)) { stop("Please pass a list of objects or set combined.seurat = TRUE") @@ -6485,7 +6399,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project version <- version[1] } } - + # Only a single seurat object expected if combined.seurat if (combined.seurat) { if ((is.null(meta.var)) & (is.null(assays.use))) { @@ -6516,7 +6430,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project }) names(raw.data) <- assays.use } - + if (version > 2) { var.genes <- Seurat::VariableFeatures(objects) idents <- Seurat::Idents(objects) @@ -6566,7 +6480,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project }) # tsne coords not very meaningful for separate objects tsne.coords <- NULL - + if (version > 2) { var.genes <- Reduce(union, lapply(objects, function(x) { Seurat::VariableFeatures(x) @@ -6610,7 +6524,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project var.genes <- var.genes[rowSums(new.liger@raw.data[[i]][var.genes, ]) > 0] var.genes <- var.genes[!is.na(var.genes)] } - + new.liger@var.genes <- var.genes } if (use.idents) { @@ -6665,13 +6579,8 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), with clusters 0:10 -#' # factorization, alignment, and t-SNE calculation have been performed -#' # subset by clusters -#' ligerex_subset <- subsetLiger(ligerex, clusters.use = c(1, 4, 5)) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' lig.small <- subsetLiger(ligerex, cells.use = c(colnames(ctrl)[1:100], colnames(stim)[1:100])) subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.missing = TRUE) { if (!is.null(clusters.use)) { cells.use <- names(object@clusters)[which(object@clusters %in% clusters.use)] @@ -6711,7 +6620,7 @@ subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.mi if (ncol(a@cell.data) < ncol(object@cell.data)) { a@cell.data <- droplevels(data.frame(object@cell.data[cell.names, ])) } - + a@W <- object@W a@V <- object@V a@var.genes <- object@var.genes @@ -6732,35 +6641,32 @@ subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.mi #' @param ... Additional parameters passed on to createLiger. #' #' @return \code{liger} object with rearranged raw.data slot. -#' +#' #' @import Matrix #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory objects) organized by species -#' # with column designating sex in cell.data -#' # rearrange by sex -#' ligerex_new <- reorganizeLiger(ligerex, by.feature = "sex", new.label = "species") -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' # Create a random variable of two categories +#' ligerex@cell.data$foo <- factor(sample(c(1,2), 600, replace = TRUE)) +#' ligerexFoo <- reorganizeLiger(ligerex, "foo") reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "orig.dataset", ...) { if (!(by.feature %in% colnames(object@cell.data))) { stop("Please select existing feature in cell.data to reorganize by, or add it before calling.") } - if(class(object@cell.data[, by.feature]) != "factor"){ + if(!is.factor(object@cell.data[, by.feature])){ stop("Error: cell.data feature must be of class 'factor' to reorganize object. Please change column to factor and re-run reorganizeLiger") } - if (!is.null(object@clusters)) { + if (length(object@clusters) > 0) { object@cell.data[['orig.clusters']] <- object@clusters } orig.data <- object@cell.data colnames(orig.data)[colnames(orig.data) == "dataset"] <- new.label - + # make this less memory intensive for large datasets all.data <- MergeSparseDataAll(object@raw.data) - + new.raw <- lapply(levels(orig.data[[by.feature]]), function(x) { cells.keep <- rownames(orig.data)[which(orig.data[[by.feature]] == x)] all.data[, cells.keep] @@ -6769,7 +6675,7 @@ reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "o rm(all.data) gc() new.object <- createLiger(raw.data = new.raw, ...) - + if (keep.meta) { cols.to.add <- setdiff(colnames(orig.data), colnames(new.object@cell.data)) cols.to.add <- cols.to.add[which(cols.to.add != by.feature)] @@ -6792,17 +6698,15 @@ reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "o #' @param verbose Print progress bar/messages (TRUE by default) #' #' @return Updated \code{liger} object. -#' +#' #' @importFrom methods .hasSlot slot slotNames #' #' @export #' @examples #' \dontrun{ -#' # analogy (old Analogizer object) -#' # convert to latest class definition +#' # Not able to generate old object from current version, thus not run #' ligerex <- convertOldLiger(analogy) #' } - convertOldLiger = function(object, override.raw = FALSE, verbose = TRUE) { new.liger <- createLiger(object@raw.data) slots_new <- slotNames(new.liger) @@ -6810,7 +6714,7 @@ convertOldLiger = function(object, override.raw = FALSE, verbose = TRUE) { slots_exist <- sapply(slots_new, function(x) { .hasSlot(object, x) }) - + slots <- slots_new[slots_exist] for (slotname in slots) { if (!(slotname %in% c('raw.data')) | (override.raw)) { @@ -6839,17 +6743,17 @@ convertOldLiger = function(object, override.raw = FALSE, verbose = TRUE) { #' factorizations of the same dataset can be run with one rep if necessary. (default 1) #' @param rand.seed Random seed to allow reproducible results (default 1). #' @param print.obj Print objective function values after convergence (default FALSE). -#' @param vectorized.lamba Whether or not to expect a vectorized lambda parameter -#' ########################################################################## +#' @param vectorized.lamba Whether or not to expect a vectorized lambda parameter +#' @noRd optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e-10,rand.seed=1, print.obj = FALSE, vectorized.lambda = FALSE){ - + set.seed(seed =rand.seed) #Account for vectorized lambda - print('Performing Factorization using UINMF and unshared features') + print('Performing Factorization using UINMF and unshared features') if (vectorized.lambda == FALSE){ lambda = rep(lambda, length(names(object@raw.data))) } - + # Get a list of all the matrices mlist = list() xdim = list() @@ -6857,7 +6761,7 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- mlist[[i]] = t(object@scale.data[[i]]) xdim[[i]] = dim(mlist[[i]]) } - + #return what datasets have unshared features, and the dimensions of those unshared features u_dim <- c() max_feats = 0 @@ -6877,14 +6781,14 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- ############## For every set of additional features less than the maximum, append an additional zero matrix s.t. it matches the maximum for (i in 1:length(object@scale.data)){ if (i %in% unshared){ - mlist[[i]] <- rbind(mlist[[i]],object@scale.unshared.data[[i]]) + mlist[[i]] <- rbind(mlist[[i]],object@scale.unshared.data[[i]]) } #For the U matrix with the maximum amount of features, append the whole thing else { mlist[[i]] <- rbind(mlist[[i]]) } } - + X <- mlist ################# Create an 0 matrix the size of U for all U's, s.t. it can be stacked to W zero_matrix_u_full <- c() @@ -6895,14 +6799,14 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- zero_matrix_u_partial[[i]] <- matrix(0, nrow = u_dim[[i]][1], ncol = k) } } - + num_cells = c() for (i in 1:length(X)){ num_cells = c(num_cells, ncol(X[[i]])) } - + num_genes = length(object@var.genes) - + best_obj <- Inf for (i in 1:nrep){ print("Processing") @@ -6913,38 +6817,38 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- idX[[i]] = sample(1:num_cells[i], k) } V = list() - + #Establish V from only the RNA dimensions - + for (i in 1:length(X)){ V[[i]] = t(object@scale.data[[i]])[,idX[[i]]] } #Establish W from the shared gene dimensions - - W = matrix(abs(runif(num_genes * k, 0, 2)), num_genes, k) - + + W = matrix(abs(runif(num_genes * k, 0, 2)), num_genes, k) + H = list() - - #Initialize U + + #Initialize U U = list() for (i in 1:length(X)){ if (i %in% unshared){ U[[i]] = t(ulist[[i]])[,idX[[i]]] } } - + iter = 0 - total_time = 0 + total_time = 0 pb <- txtProgressBar(min = 0, max = max.iters, style = 3) sqrt_lambda = list() for (i in 1:length(X)){ sqrt_lambda[[i]]= sqrt(lambda[[i]]) } - ############################ Initial Training Objects - + ############################ Initial Training Objects + obj_train_approximation = 0 obj_train_penalty = 0 - + for (i in 1:length(X)){ H[[i]] = matrix(abs(runif(k * num_cells[i], 0, 2)), k, num_cells[i]) if (i %in% unshared){ @@ -6954,21 +6858,21 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- else { obj_train_approximation = obj_train_approximation + norm(X[[i]] - (W+ V[[i]]) %*% H[[i]],"F")^2 obj_train_penalty = obj_train_penalty + lambda[[i]]*norm(V[[i]]%*% H[[i]], "F")^2 - + } } obj_train = obj_train_approximation + obj_train_penalty - - ######################### Initialize Object Complete ########################### + + ######################### Initialize Object Complete ########################### ########################## Begin Updates######################################## delta = Inf objective_value_list = list() - - iter = 1 + + iter = 1 while(delta > thresh & iter <= max.iters){ iter_start_time = Sys.time() - - + + #H- Updates for (i in 1:length(X)){ if (!(i %in% unshared)){ @@ -6978,20 +6882,20 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- H[[i]] = solveNNLS(rbind(rbind(W,zero_matrix_u_partial[[i]]) + rbind((V[[i]]),U[[i]]), sqrt_lambda[[i]] * rbind(V[[i]],U[[i]])), rbind((X[[i]]), matrix(0, num_genes+ u_dim[[i]][1], xdim[[i]][2]))) } } - + #V - updates for (i in 1:length(X)){ V[[i]] = t(solveNNLS(rbind(t(H[[i]]), sqrt_lambda[[i]] * t(H[[i]])), rbind(t(X[[i]][0:num_genes,] - W %*% H[[i]]), matrix(0, num_cells[i], num_genes)))) } ################################################# Updating U################################## - + for (i in 1:length(X)){ if (i %in% unshared){ U[[i]] = t(solveNNLS(rbind(t(H[[i]]),sqrt_lambda[[i]]* t(H[[i]])), rbind(t(X[[i]][(num_genes+1):(u_dim[[i]][1]+num_genes), ]),t(zero_matrix_u_full[[i]])))) } } - - + + ############################################################################################## ################################################# Updating W ################################# H_t_stack = c() @@ -7003,19 +6907,19 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- diff_stack_w = rbind(diff_stack_w,t(X[[i]][0:num_genes,] - V[[i]] %*% H[[i]])) } W = t(solveNNLS(H_t_stack, diff_stack_w)) - - ############################################################################################ + + ############################################################################################ iter_end_time = Sys.time() iter_time = as.numeric(difftime(iter_end_time, iter_start_time, units = "secs")) total_time = total_time + iter_time - + #Updating training object obj_train_prev = obj_train obj_train_approximation = 0 obj_train_penalty = 0 - - - + + + for (i in 1:length(X)){ if (i %in% unshared){ obj_train_approximation = obj_train_approximation + norm(X[[i]] - (rbind(W,zero_matrix_u_partial[[i]]) + rbind(V[[i]],U[[i]])) %*% H[[i]],"F")^2 @@ -7024,10 +6928,10 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- else { obj_train_approximation = obj_train_approximation + norm(X[[i]] - (W+ V[[i]]) %*% H[[i]],"F")^2 obj_train_penalty = obj_train_penalty + lambda[[i]]*norm(V[[i]]%*% H[[i]], "F")^2 - + } } - + obj_train = obj_train_approximation + obj_train_penalty delta = abs(obj_train_prev-obj_train)/mean(c(obj_train_prev,obj_train)) iter = iter + 1 @@ -7041,13 +6945,13 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- V_m <- V U_m <- U best_obj <- obj_train - best_seed <- current + best_seed <- current } } - + rownames(W_m) = rownames(X[[1]][0:xdim[[i]][1],]) colnames(W_m) = NULL - + for (i in 1:length(X)){ if (i %in% unshared){ rownames(U_m[[i]]) = rownames(X[[i]][(num_genes+1):(u_dim[[i]][1]+num_genes), ]) @@ -7056,8 +6960,8 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- rownames(V_m[[i]]) = rownames(X[[i]][0:xdim[[i]][1],]) colnames(V_m[[i]]) = NULL colnames(H_m[[i]]) = colnames(X[[i]]) - } - + } + ################################## Returns Results Section ######################################################### object@W <- t(W_m) for (i in 1:length(X)){ @@ -7076,13 +6980,13 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- if (print.obj) { cat("\n", "Objective:", best_obj, "\n") } - + rel_cells = list() for (i in 1:length(X)){ rel_cells <- c(rel_cells, rownames(object@scale.data[[i]])) } rel_cells <- unlist(rel_cells) - + object@cell.data <- object@cell.data[rel_cells,] cat("\n", "Best results with seed ", best_seed, ".\n", sep = "") return (object) @@ -7092,13 +6996,22 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- #' Calculate loadings for each factor #' -#' Calculates the contribution of each factor of W,V, and U to the reconstruction. +#' Calculates the contribution of each factor of W,V, and U to the reconstruction. #' #' @param object \code{liger} object. Should call quantileNorm before calling. #' @return A dataframe, such that each column represents the contribution of a specific matrix (W, V_1, V_2, etc. ) #' @export +#' @examples +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Minimum specification for fast example pass +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' calcNormLoadings(ligerex) calcNormLoadings = function(object) { - H_norm = object@H.norm + H_norm = object@H.norm W_norm = object@W V_norm = object@V U_norm = object@U @@ -7119,7 +7032,7 @@ calcNormLoadings = function(object) { hw = hi %*% wi forb_hw = norm(hw, type = "F")/dim(W_norm)[[2]] w_loadings = append(w_loadings, forb_hw) - + ###### Calculate V for (j in 1:length(object@raw.data)){ temp_v = t(as.matrix(V_norm[[j]][i,])) @@ -7138,12 +7051,12 @@ calcNormLoadings = function(object) { } } } - + ################# Format the return object w_loadings = unlist(w_loadings) factors = 1:dim(object@H.norm)[[2]] results = data.frame(factors, w_loadings) - + # For all V for (j in 1:length(object@raw.data)){ results = cbind(results, unlist(v_loadings[[j]])) @@ -7159,6 +7072,6 @@ calcNormLoadings = function(object) { } } } - + return(results) -} \ No newline at end of file +} diff --git a/R/utilities.R b/R/utilities.R index ac01d101..abfc2ce1 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -2,6 +2,8 @@ #' @importFrom grDevices heat.colors #' @importFrom methods as is #' @importFrom rlang .data +#' @importFrom FNN get.knnx +#' @importFrom stats p.adjust NULL # Utility functions for iiger methods. Some published, some not. @@ -11,7 +13,7 @@ fplot = function(tsne,NMFfactor,title,cols.use=heat.colors(10),pt.size=0.7,pch.u data.cut=as.numeric(as.factor(cut(as.numeric(NMFfactor),breaks=length(cols.use)))) data.col=rev(cols.use)[data.cut] plot(tsne[,1],tsne[,2],col=data.col,cex=pt.size,pch=pch.use,main=title) - + } # Binds list of matrices row-wise (vertical stack) @@ -22,7 +24,6 @@ rbindlist = function(mat_list) # helper function for calculating KL divergence from uniform distribution # (related to Shannon entropy) for factorization -#' @importFrom methods is kl_divergence_uniform = function(object, Hs=NULL) { if (is.null(Hs)) {Hs = object@H} @@ -31,7 +32,7 @@ kl_divergence_uniform = function(object, Hs=NULL) dataset_list = list() for (i in 1:length(Hs)) { scaled = scale(Hs[[i]], center=FALSE, scale=TRUE) - + inflated = t(apply(scaled, 1, function(x) { replace(x, x == 0, 1e-20) })) @@ -42,11 +43,11 @@ kl_divergence_uniform = function(object, Hs=NULL) return(dataset_list) } -# Function takes in a list of DGEs, with gene rownames and cell colnames, +# Function takes in a list of DGEs, with gene rownames and cell colnames, # and merges them into a single DGE. # Also adds library.names to cell.names if expected to be overlap (common with 10X barcodes) MergeSparseDataAll <- function(datalist, library.names = NULL) { - + # Use summary to convert the sparse matrices into three-column indexes where i are the # row numbers, j are the column numbers, and x are the nonzero entries col_offset <- 0 @@ -55,11 +56,11 @@ MergeSparseDataAll <- function(datalist, library.names = NULL) { for (i in 1:length(datalist)) { curr <- datalist[[i]] curr_s <- summary(curr) - + # Now, alter the indexes so that the two 3-column matrices can be properly merged. # First, make the current and full column numbers non-overlapping. curr_s[, 2] <- curr_s[, 2] + col_offset - + # Update full cell names if (!is.null(library.names)) { cellnames <- paste0(library.names[i], "_", colnames(curr)) @@ -67,13 +68,13 @@ MergeSparseDataAll <- function(datalist, library.names = NULL) { cellnames <- colnames(curr) } allCells <- c(allCells, cellnames) - + # Next, change the row (gene) indexes so that they index on the union of the gene sets, # so that proper merging can occur. idx <- match(rownames(curr), allGenes) newgenescurr <- idx[curr_s[, 1]] curr_s[, 1] <- newgenescurr - + # Now bind the altered 3-column matrices together, and convert into a single sparse matrix. if (!exists("full_mat")) { full_mat <- curr_s @@ -120,7 +121,7 @@ sparse.var = function(x){ sparse.transpose = function(x){ h = summary(x) sparseMatrix(i = h[,2],j=h[,1],x=h[,3]) - + } # After running modularity clustering, assign singleton communities to the mode of the cluster @@ -137,8 +138,6 @@ assign.singletons <- function(object, idents, k.use = 15, center = FALSE) { )) } -#' @importFrom FNN get.knnx -# assign.singletons.list <- function(object, idents, k.use = 15, center = FALSE) { if (!is.list(x = object) || !all(sapply(X = object, FUN = is.matrix))) { stop("'assign.singletons.list' expects a list of matrices") @@ -187,20 +186,20 @@ assign.singletons.list <- function(object, idents, k.use = 15, center = FALSE) { return(idents) } -# Run modularity based clustering on edge file -SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R=1, - modularity=1, ModularityJarFile="",random.seed=1, +# Run modularity based clustering on edge file +SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R=1, + modularity=1, ModularityJarFile="",random.seed=1, id.number=NULL, print.mod=F) { - + # Prepare data for modularity based clustering edge = edge[which(edge[,3]>prune.thresh),] - + message("making edge file.") edge_file <- paste0("edge", id.number, fileext=".txt") # Make sure no scientific notation written to edge file # restore default settings when the current function exits - init_option <- options() + init_option <- options() on.exit(options(init_option)) saveScipen=options(scipen=10000)[[1]] @@ -213,7 +212,7 @@ SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R= } liger.dir <- system.file(package = "rliger") ModularityJarFile <- paste0(liger.dir, "/java/ModularityOptimizer.jar") - command <- paste("java -jar", ModularityJarFile, edge_file, output_file, modularity, R, + command <- paste("java -jar", ModularityJarFile, edge_file, output_file, modularity, R, algorithm, nstart, iter.max, random.seed, as.numeric(print.mod), sep = " ") message ("Starting SLM") @@ -223,7 +222,7 @@ SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R= } unlink(edge_file) ident.use <- factor(read.table(file = output_file, header = FALSE, sep = "\t")[, 1]) - + return(ident.use) } @@ -232,7 +231,7 @@ scaleL2norm <- function(x) { return(x / sqrt(sum(x^2))) } -# get mode of identities +# get mode of identities getMode <- function(x, na.rm = FALSE) { if(na.rm){ x = x[!is.na(x)] @@ -242,8 +241,8 @@ getMode <- function(x, na.rm = FALSE) { } # utility function for seuratToLiger function -# Compares colnames in reference matrix1 and adds back any missing -# column names to matrix.subset as rows +# Compares colnames in reference matrix1 and adds back any missing +# column names to matrix.subset as rows # Set transpose = TRUE if rownames of matrix1 should be referenced addMissingCells <- function(matrix1, matrix.subset, transpose = F) { if (transpose) { @@ -264,15 +263,15 @@ addMissingCells <- function(matrix1, matrix.subset, transpose = F) { #' Get gene expression values from list of expression matrices. #' #' @description -#' Returns single vector of gene values across all datasets in list provided. Data can be in raw, -#' normalized or scaled form. If matrices are in cell x gene format, set use.cols = TRUE. +#' Returns single vector of gene values across all datasets in list provided. Data can be in raw, +#' normalized or scaled form. If matrices are in cell x gene format, set use.cols = TRUE. #' #' @param list List of gene x cell (or cell x gene) matrices #' @param gene Gene for which to return values (if gene is not found in appropriate dimnames will #' return vector of NA). -#' @param use.cols Whether to query columns for desired gene (set to TRUE if matrices are cell x +#' @param use.cols Whether to query columns for desired gene (set to TRUE if matrices are cell x #' gene) (default FALSE). -#' @param methylation.indices Indices of datasets with methylation data (never log2scaled) +#' @param methylation.indices Indices of datasets with methylation data (never log2scaled) #' (default NULL). #' @param log2scale Whether to log2+1 scale (with multiplicative factor) values (default FALSE). #' @param scale.factor Scale factor to use with log2 scaling (default 10000). @@ -280,19 +279,14 @@ addMissingCells <- function(matrix1, matrix.subset, transpose = F) { #' @return Plots to console (1-2 pages per factor) #' @export #' @examples -#' \dontrun{ -#' # liger object with factorization complete -#' # ligerex -#' gene_values <- getGeneValues(ligerex@raw.data, 'MALAT1') -#' } - +#' NKG7 <- getGeneValues(list(ctrl = ctrl, stim = stim), "NKG7") getGeneValues <- function(list, gene, use.cols = FALSE, methylation.indices = NULL, log2scale = FALSE, scale.factor = 10000) { gene_vals <- unlist(lapply(seq_along(list), function(i) { mtx <- unname(list)[[i]] if (use.cols) { mtx <- t(mtx) - } + } if (gene %in% rownames(mtx)) { gene_vals_int <- mtx[gene, ] } else { @@ -301,7 +295,7 @@ getGeneValues <- function(list, gene, use.cols = FALSE, methylation.indices = NU } if (log2scale & !(i %in% methylation.indices)) { gene_vals_int <- log2(scale.factor * gene_vals_int + 1) - } + } return(gene_vals_int) }), use.names = TRUE) @@ -331,7 +325,6 @@ refine_clusts_knn = function(H,clusts,k,eps=0.1) ################################## For fast Wilcoxon test ################################ # helper function for wilcoxon tests on general variables like matrix and dgCMatrix # related to function runWilcoxon -#' @importFrom stats p.adjust wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { ## Check and possibly correct input values if (is(X, 'dgeMatrix')) X <- as.matrix(X) @@ -347,22 +340,22 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { y <- y[idx_use] X <- X[, idx_use] } - - + + y <- factor(y) idx_use <- which(!is.na(y)) if (length(idx_use) < length(y)) { y <- y[idx_use] X <- X[, idx_use] - if (verbose) - message('Removing NA values from labels') + if (verbose) + message('Removing NA values from labels') } - + group.size <- as.numeric(table(y)) if (length(group.size[group.size > 0]) < 2) { stop('Must have at least 2 groups defined.') } - + # features_use <- which(apply(!is.na(X), 1, all)) # if (verbose & length(features_use) < nrow(X)) { # message('Removing features with NA values') @@ -371,27 +364,27 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { if (is.null(row.names(X))) { row.names(X) <- paste0('Feature', seq_len(nrow(X))) } - + ## Compute primary statistics group.size <- as.numeric(table(y)) n1n2 <- group.size * (ncol(X) - group.size) if (is(X, 'dgCMatrix')) { - rank_res <- rank_matrix(Matrix::t(X)) + rank_res <- rank_matrix(Matrix::t(X)) } else { rank_res <- rank_matrix(X) } - - ustat <- compute_ustat(rank_res$X_ranked, y, n1n2, group.size) + + ustat <- compute_ustat(rank_res$X_ranked, y, n1n2, group.size) auc <- t(ustat / n1n2) - pvals <- compute_pval(ustat, rank_res$ties, ncol(X), n1n2) + pvals <- compute_pval(ustat, rank_res$ties, ncol(X), n1n2) fdr <- apply(pvals, 2, function(x) p.adjust(x, 'BH')) - + ### Auxiliary Statistics (AvgExpr, PctIn, LFC, etc) group_sums <- sumGroups(X, y, 1) group_nnz <- nnzeroGroups(X, y, 1) group_pct <- sweep(group_nnz, 1, as.numeric(table(y)), "/") %>% t() - group_pct_out <- -group_nnz %>% - sweep(2, colSums(group_nnz) , "+") %>% + group_pct_out <- -group_nnz %>% + sweep(2, colSums(group_nnz) , "+") %>% sweep(1, as.numeric(length(y) - table(y)), "/") %>% t() group_means <- sweep(group_sums, 1, as.numeric(table(y)), "/") %>% t() cs <- colSums(group_sums) @@ -399,13 +392,13 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { lfc <- Reduce(cbind, lapply(seq_len(length(levels(y))), function(g) { group_means[, g] - ((cs - group_sums[g, ]) / (length(y) - gs[g])) })) - - res_list <- list(auc = auc, + + res_list <- list(auc = auc, pval = pvals, - padj = fdr, - pct_in = 100 * group_pct, + padj = fdr, + pct_in = 100 * group_pct, pct_out = 100 * group_pct_out, - avgExpr = group_means, + avgExpr = group_means, statistic = t(ustat), logFC = lfc) return(tidy_results(res_list, row.names(X), levels(y))) @@ -413,20 +406,20 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { tidy_results <- function(wide_res, features, groups) { - res <- Reduce(cbind, lapply(wide_res, as.numeric)) %>% data.frame() + res <- Reduce(cbind, lapply(wide_res, as.numeric)) %>% data.frame() colnames(res) <- names(wide_res) res$feature <- rep(features, times = length(groups)) res$group <- rep(groups, each = length(features)) res %>% dplyr::select( - .data$feature, - .data$group, - .data$avgExpr, - .data$logFC, - .data$statistic, - .data$auc, - .data$pval, - .data$padj, - .data$pct_in, + .data$feature, + .data$group, + .data$avgExpr, + .data$logFC, + .data$statistic, + .data$auc, + .data$pval, + .data$padj, + .data$pct_in, .data$pct_out ) } @@ -434,12 +427,12 @@ tidy_results <- function(wide_res, features, groups) { compute_ustat <- function(Xr, cols, n1n2, group.size) { grs <- sumGroups(Xr, cols) - + if (is(Xr, 'dgCMatrix')) { gnz <- (group.size - nnzeroGroups(Xr, cols)) zero.ranks <- (nrow(Xr) - diff(Xr@p) + 1) / 2 ustat <- t((t(gnz) * zero.ranks)) + grs - group.size * - (group.size + 1 ) / 2 + (group.size + 1 ) / 2 } else { ustat <- grs - group.size * (group.size + 1 ) / 2 } @@ -457,71 +450,66 @@ compute_pval <- function(ustat, ties, N, n1n2) { }) %>% unlist usigma <- sqrt(matrix(n1n2, ncol = 1) %*% matrix(rhs, nrow = 1)) z <- t(z / usigma) - + pvals <- matrix(2 * pnorm(-abs(as.numeric(z))), ncol = ncol(z)) return(pvals) } #' rank_matrix -#' +#' #' Utility function to rank columns of matrix -#' -#' @param X feature by observation matrix. -#' +#' +#' @param X feature by observation matrix. +#' #' @return List with 2 items - - +#' @noRd rank_matrix <- function(X) { UseMethod('rank_matrix') } -##' @rdname rank_matrix rank_matrix.dgCMatrix <- function(X) { Xr <- Matrix(X, sparse = TRUE) ties <- cpp_rank_matrix_dgc(Xr@x, Xr@p, nrow(Xr), ncol(Xr)) return(list(X_ranked = Xr, ties = ties)) } -##' @rdname rank_matrix rank_matrix.matrix <- function(X) { cpp_rank_matrix_dense(X) } #' sumGroups -#' +#' #' Utility function to sum over group labels -#' +#' #' @param X matrix #' @param y group labels #' @param MARGIN whether observations are rows (=2) or columns (=1) -#' +#' #' @return Matrix of groups by features - +#' @noRd sumGroups <- function(X, y, MARGIN=2) { if (MARGIN == 2 & nrow(X) != length(y)) { stop('wrong dims') } else if (MARGIN == 1 & ncol(X) != length(y)) { - stop('wrong dims') + stop('wrong dims') } UseMethod('sumGroups') } -##' @rdname sumGroups sumGroups.dgCMatrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { cpp_sumGroups_dgc_T(X@x, X@p, X@i, ncol(X), nrow(X), as.integer(y) - 1, - length(unique(y))) + length(unique(y))) } else { cpp_sumGroups_dgc(X@x, X@p, X@i, ncol(X), as.integer(y) - 1, length(unique(y))) } } -##' @rdname sumGroups sumGroups.matrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { - cpp_sumGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) + cpp_sumGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) } else { cpp_sumGroups_dense(X, as.integer(y) - 1, length(unique(y))) } @@ -530,39 +518,37 @@ sumGroups.matrix <- function(X, y, MARGIN=2) { #' nnzeroGroups -#' +#' #' Utility function to compute number of zeros-per-feature within group -#' +#' #' @param X matrix #' @param y group labels #' @param MARGIN whether observations are rows (=2) or columns (=1) -#' +#' #' @return Matrix of groups by features - +#' @noRd nnzeroGroups <- function(X, y, MARGIN=2) { if (MARGIN == 2 & nrow(X) != length(y)) { stop('wrong dims') } else if (MARGIN == 1 & ncol(X) != length(y)) { - stop('wrong dims') + stop('wrong dims') } UseMethod('nnzeroGroups') } -##' @rdname nnzeroGroups nnzeroGroups.dgCMatrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { cpp_nnzeroGroups_dgc_T(X@p, X@i, ncol(X), nrow(X), as.integer(y) - 1, - length(unique(y))) + length(unique(y))) } else { cpp_nnzeroGroups_dgc(X@p, X@i, ncol(X), as.integer(y) - 1, length(unique(y))) } } -##' @rdname nnzeroGroups nnzeroGroups.matrix <- function(X, y, MARGIN=2) { - if (MARGIN == 1) { - cpp_nnzeroGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) + if (MARGIN == 1) { + cpp_nnzeroGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) } else { cpp_nnzeroGroups_dense(X, as.integer(y) - 1, length(unique(y))) } @@ -651,7 +637,6 @@ nmf_hals <- function(A, k, max_iters = 500, thresh = 1e-4, reps = 20, W0 = NULL, # commit d2cf403 on Feb 8, 2019 # #' @importFrom utils file_test -# fftRtsne <- function(X, dims = 2, perplexity = 30, diff --git a/README.md b/README.md index 40da63df..306c9be0 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ LIGER (installed as `rliger` ) is a package for integrating and analyzing multiple single-cell datasets, developed by the Macosko lab and maintained/extended by the Welch lab. It relies on integrative non-negative matrix factorization to identify shared and dataset-specific factors. -Check out our [Cell paper](https://www.cell.com/cell/fulltext/S0092-8674%2819%2930504-5) for a more complete description of the methods and analyses. To access data used in our SN and BNST analyses, visit our [study](https://portals.broadinstitute.org/single_cell/study/SCP466) on the -Single Cell Portal. +Check out our [Cell paper](https://doi.org/10.1016/j.cell.2019.05.006) for a more complete description of the methods and analyses. To access data used in our SN and BNST analyses, visit our study "SCP466" on the +[Single Cell Portal](https://singlecell.broadinstitute.org/single_cell). LIGER can be used to compare and contrast experimental datasets in a variety of contexts, for instance: @@ -72,8 +72,8 @@ Before setting up the `rliger` package, users should have R version 3.4.0 or hig LIGER is written in R and is also available on the Comprehensive R Archive Network (CRAN). Note that the package name is `rliger` to avoid a naming conflict with an unrelated package. To install the version on CRAN, follow these instructions: -1. Install [R](https://www.r-project.org/) (>= 3.4) -2. Install [Rstudio](https://rstudio.com/products/rstudio/download/) (recommended) +1. Install [R](https://www.r-project.org/) (>= 3.6) +2. Install [Rstudio](https://posit.co/download/rstudio-desktop/) (recommended) 3. Type the following R command: ``` install.packages('rliger') diff --git a/data/ctrl.rda b/data/ctrl.rda new file mode 100644 index 00000000..da53bfde Binary files /dev/null and b/data/ctrl.rda differ diff --git a/data/stim.rda b/data/stim.rda new file mode 100644 index 00000000..36ec017a Binary files /dev/null and b/data/stim.rda differ diff --git a/man/calcARI.Rd b/man/calcARI.Rd index 222711f6..eb1bc6d3 100644 --- a/man/calcARI.Rd +++ b/man/calcARI.Rd @@ -22,17 +22,12 @@ The Rand index ranges from 0 to 1, with 0 indicating no agreement between cluste indicating perfect agreement. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# toy clusters -cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) -names(cluster1) <- colnames(ligerex@raw.data[[1]]) -cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) -names(cluster2) <- colnames(ligerex@raw.data[[2]]) -# get ARI for first clustering -ari1 <- calcARI(ligerex, cluster1) -# get ARI for second clustering -ari2 <- calcARI(ligerex, cluster2) -} +agreement <- calcARI(ligerex, ligerex@clusters) } diff --git a/man/calcAgreement.Rd b/man/calcAgreement.Rd index 1ce3ad5a..22d338ac 100644 --- a/man/calcAgreement.Rd +++ b/man/calcAgreement.Rd @@ -49,14 +49,12 @@ expected to be most similar to iNMF. Although agreement can theoretically approa it is usually no higher than 0.2-0.3 (particularly for non-deterministic approaches like NMF). } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete -# generate H.norm by quantile normalizig factor loadings +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -agreement <- calcAgreement(ligerex, dr.method = "NMF") -# ligerex (liger object based on datasets in HDF5 format), factorization complete -ligerex <- quantile_norm(ligerex) -ligerex <- readSubset(ligerex, slot.use = "scale.data", max.cells = 5000) -agreement <- calcAgreement(ligerex, dr.method = "NMF") -} +agreement <- calcAgreement(ligerex) } diff --git a/man/calcAlignment.Rd b/man/calcAlignment.Rd index 4b7bc143..bba763a7 100644 --- a/man/calcAlignment.Rd +++ b/man/calcAlignment.Rd @@ -48,9 +48,12 @@ value for perfectly mixed datasets, and scale the value from 0 to 1. Note that i alignment can be greater than 1 occasionally. } \examples{ -\dontrun{ -# ligerex (liger object ), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -alignment <- calcAlignment(ligerex) -} +agreement <- calcAlignment(ligerex) } diff --git a/man/calcAlignmentPerCluster.Rd b/man/calcAlignmentPerCluster.Rd index 22890873..430b931f 100644 --- a/man/calcAlignmentPerCluster.Rd +++ b/man/calcAlignmentPerCluster.Rd @@ -7,7 +7,7 @@ calcAlignmentPerCluster(object, rand.seed = 1, k = NULL, by.dataset = FALSE) } \arguments{ -\item{object}{\code{liger} object. Should call quantileAlignSNF before calling.} +\item{object}{\code{liger} object. Should call quantile_norm before calling.} \item{rand.seed}{Random seed for reproducibility (default 1).} @@ -23,10 +23,12 @@ Vector of alignment statistics (with names of clusters). Returns alignment for each cluster in analysiss (see documentation for calcAlignment). } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# get alignment for each cluster -alignment_per_cluster <- calcAlignmentPerCluster(ligerex) -} +agreement <- calcAlignmentPerCluster(ligerex) } diff --git a/man/calcDatasetSpecificity.Rd b/man/calcDatasetSpecificity.Rd index cc7f39a4..3ff1f6eb 100644 --- a/man/calcDatasetSpecificity.Rd +++ b/man/calcDatasetSpecificity.Rd @@ -32,10 +32,11 @@ calculate the norm of the sum of each factor's shared loadings (W) and dataset-s description. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# generate H.norm by quantile normalizig factor loadings -ligerex <- quantile_norm(ligerex) -dataset_spec <- calcDatasetSpecificity(ligerex, do.plot = F) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +calcDatasetSpecificity(ligerex) } diff --git a/man/calcNormLoadings.Rd b/man/calcNormLoadings.Rd index fcd39da9..7979a887 100644 --- a/man/calcNormLoadings.Rd +++ b/man/calcNormLoadings.Rd @@ -15,3 +15,13 @@ A dataframe, such that each column represents the contribution of a specific mat \description{ Calculates the contribution of each factor of W,V, and U to the reconstruction. } +\examples{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Minimum specification for fast example pass +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) +calcNormLoadings(ligerex) +} diff --git a/man/calcPurity.Rd b/man/calcPurity.Rd index 4a466d5e..8d9d9ddb 100644 --- a/man/calcPurity.Rd +++ b/man/calcPurity.Rd @@ -23,17 +23,12 @@ subgroups or clusters than the true clusters (or classes). Purity also ranges fr with a score of 1 representing a pure, or accurate, clustering. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# toy clusters -cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) -names(cluster1) <- colnames(ligerex@raw.data[[1]]) -cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) -names(cluster2) <- colnames(ligerex@raw.data[[2]]) -# get ARI for first clustering -ari1 <- calcPurity(ligerex, cluster1) -# get ARI for second clustering -ari2 <- calcPurity(ligerex, cluster2) -} +agreement <- calcARI(ligerex, ligerex@clusters) } diff --git a/man/convertOldLiger.Rd b/man/convertOldLiger.Rd index 88aa09f4..2de152b6 100644 --- a/man/convertOldLiger.Rd +++ b/man/convertOldLiger.Rd @@ -24,8 +24,7 @@ class NULL. } \examples{ \dontrun{ -# analogy (old Analogizer object) -# convert to latest class definition +# Not able to generate old object from current version, thus not run ligerex <- convertOldLiger(analogy) } } diff --git a/man/createLiger.Rd b/man/createLiger.Rd index c1719a28..e0442c8e 100644 --- a/man/createLiger.Rd +++ b/man/createLiger.Rd @@ -51,8 +51,5 @@ By default, it converts all passed data into sparse matrices (dgCMatrix) to redu It initializes cell.data with nUMI and nGene calculated for every cell. } \examples{ -# Demonstration using matrices with randomly generated numbers -Y <- matrix(runif(5000,0,2), 10,500) -Z <- matrix(runif(5000,0,2), 10,500) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) } diff --git a/man/getFactorMarkers.Rd b/man/getFactorMarkers.Rd index 84a47632..317b6a9d 100644 --- a/man/getFactorMarkers.Rd +++ b/man/getFactorMarkers.Rd @@ -53,10 +53,11 @@ factorization, before selecting those which load most significantly on each fact or dataset-specific way). } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete input -markers <- getFactorMarkers(ligerex, num.genes = 10) -# look at shared markers -head(markers[[2]]) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +fm <- getFactorMarkers(ligerex, dataset1 = "stim", dataset2 = "ctrl") } diff --git a/man/getGeneValues.Rd b/man/getGeneValues.Rd index cc72db70..941850f1 100644 --- a/man/getGeneValues.Rd +++ b/man/getGeneValues.Rd @@ -19,10 +19,10 @@ getGeneValues( \item{gene}{Gene for which to return values (if gene is not found in appropriate dimnames will return vector of NA).} -\item{use.cols}{Whether to query columns for desired gene (set to TRUE if matrices are cell x +\item{use.cols}{Whether to query columns for desired gene (set to TRUE if matrices are cell x gene) (default FALSE).} -\item{methylation.indices}{Indices of datasets with methylation data (never log2scaled) +\item{methylation.indices}{Indices of datasets with methylation data (never log2scaled) (default NULL).} \item{log2scale}{Whether to log2+1 scale (with multiplicative factor) values (default FALSE).} @@ -33,13 +33,9 @@ gene) (default FALSE).} Plots to console (1-2 pages per factor) } \description{ -Returns single vector of gene values across all datasets in list provided. Data can be in raw, +Returns single vector of gene values across all datasets in list provided. Data can be in raw, normalized or scaled form. If matrices are in cell x gene format, set use.cols = TRUE. } \examples{ -\dontrun{ -# liger object with factorization complete -# ligerex -gene_values <- getGeneValues(ligerex@raw.data, 'MALAT1') -} +NKG7 <- getGeneValues(list(ctrl = ctrl, stim = stim), "NKG7") } diff --git a/man/getProportionMito.Rd b/man/getProportionMito.Rd index 67e7e9e5..93737a8d 100644 --- a/man/getProportionMito.Rd +++ b/man/getProportionMito.Rd @@ -4,12 +4,15 @@ \alias{getProportionMito} \title{Calculate proportion mitochondrial contribution} \usage{ -getProportionMito(object, use.norm = FALSE) +getProportionMito(object, use.norm = FALSE, mito.pattern = "^mt-") } \arguments{ \item{object}{\code{liger} object.} \item{use.norm}{Whether to use cell normalized data in calculating contribution (default FALSE).} + +\item{mito.pattern}{Regex pattern for identifying mitochondrial genes. Default "^mt-" typically goes for mouse. +May use "^MT-" for human.} } \value{ Named vector containing proportion of mitochondrial contribution for each cell. @@ -18,8 +21,7 @@ Named vector containing proportion of mitochondrial contribution for each cell. Calculates proportion of mitochondrial contribution based on raw or normalized data. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -ligerex@cell.data[["percent_mito"]] <- getProportionMito(ligerex) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +# Expect a warning because the test data does not contain mito genes +ligerex@cell.data$mito <- getProportionMito(ligerex, mito.pattern = "^MT-") } diff --git a/man/imputeKNN.Rd b/man/imputeKNN.Rd index 861fb3fd..dcbfbf8d 100644 --- a/man/imputeKNN.Rd +++ b/man/imputeKNN.Rd @@ -40,6 +40,7 @@ Impute query features from a reference dataset using KNN. } \examples{ \dontrun{ +# Only runable for ATAC dataset. See tutorial on GitHub. # ligerex (liger object), factorization complete # impute every dataset other than the reference dataset ligerex <- imputeKNN(ligerex, reference = "y_set", weight = FALSE) diff --git a/man/liger-class.Rd b/man/liger-class.Rd index 980ab98f..f9810c9c 100644 --- a/man/liger-class.Rd +++ b/man/liger-class.Rd @@ -23,8 +23,9 @@ The key slots used in the liger object are described below. \item{\code{scale.data}}{List of scaled matrices (cells by genes)} -\item{\code{sample.data}}{List of sampled matrices (gene by cells) -#' @slot scale.unshared.data List of scaled matrices of unshared features} +\item{\code{sample.data}}{List of sampled matrices (gene by cells)} + +\item{\code{scale.unshared.data}}{List of scaled matrices of unshared features} \item{\code{h5file.info}}{List of HDF5-related information for each input dataset. Paths to raw data, indices, indptr, barcodes, genes and the pipeline through which the HDF5 file is formated (10X, AnnData, etc), @@ -34,8 +35,9 @@ type of sampled data (raw, normalized or scaled).} cells across all datasets)} \item{\code{var.genes}}{Subset of informative genes shared across datasets to be used in matrix -factorization -#' @slot var.unshared.features Highly variable unshared features selected from each dataset} +factorization} + +\item{\code{var.unshared.features}}{Highly variable unshared features selected from each dataset} \item{\code{H}}{Cell loading factors (one matrix per dataset, dimensions cells by k)} diff --git a/man/liger-demodata.Rd b/man/liger-demodata.Rd new file mode 100644 index 00000000..1c21eb8e --- /dev/null +++ b/man/liger-demodata.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{ctrl} +\alias{ctrl} +\alias{stim} +\title{dgCMatrix object of PBMC subsample data with Control and Stimulated datasets} +\format{ +\code{dgCMatrix} object of gene expression matrix from PBMC study, + named by "ctrl" and "stim". + +An object of class \code{dgCMatrix} with 262 rows and 300 columns. +} +\source{ +https://www.nature.com/articles/nbt.4042 +} +\usage{ +ctrl + +stim +} +\description{ +dgCMatrix object of PBMC subsample data with Control and Stimulated datasets +} +\references{ +Hyun Min Kang and et. al., Nature Biotechnology, 2018 +} +\keyword{datasets} diff --git a/man/ligerToSeurat.Rd b/man/ligerToSeurat.Rd index 4129fa3b..fae2ebc9 100644 --- a/man/ligerToSeurat.Rd +++ b/man/ligerToSeurat.Rd @@ -6,10 +6,11 @@ \usage{ ligerToSeurat( object, - nms = names(object@H), - renormalize = TRUE, + nms = NULL, + renormalize = FALSE, use.liger.genes = TRUE, - by.dataset = FALSE + by.dataset = FALSE, + assay = "RNA" ) } \arguments{ @@ -19,11 +20,13 @@ ligerToSeurat( different datasets which may have same name). Other names can be passed here as vector, must have same length as the number of datasets. (default names(H))} -\item{renormalize}{Whether to log-normalize raw data using Seurat defaults (default TRUE).} +\item{renormalize}{Whether to log-normalize raw data using Seurat defaults (default FALSE).} \item{use.liger.genes}{Whether to carry over variable genes (default TRUE).} \item{by.dataset}{Include dataset of origin in cluster identity in Seurat object (default FALSE).} + +\item{assay}{Assay name to set in the Seurat object (default "RNA").} } \value{ Seurat object with raw.data, scale.data, dr$tsne, dr$inmf, and ident slots set. @@ -37,8 +40,10 @@ Stores original dataset identity by default in new object metadata if dataset na in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets ONLY), factorization complete input -s.object <- ligerToSeurat(ligerex) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +if (packageVersion("Matrix") <= package_version("1.6.1.1")) { + # 1.6.2 is not compatible thus don't test + # but can use `setOldClass("mMatrix")` as a hack + srt <- ligerToSeurat(ligerex) } } diff --git a/man/linkGenesAndPeaks.Rd b/man/linkGenesAndPeaks.Rd index f09687b0..16b16612 100644 --- a/man/linkGenesAndPeaks.Rd +++ b/man/linkGenesAndPeaks.Rd @@ -42,8 +42,9 @@ Evaluate the relationships between pairs of genes and peaks based on specified d } \examples{ \dontrun{ +# Only runable for ATAC datasets, see tutorial on GitHub # some gene counts matrix: gmat.small -# some peak counts matrix: pmat.small +# some peak counts matrix: pmat.small regnet <- linkGenesAndPeaks(gmat.small, pmat.small, dist = "spearman", alpha = 0.05, path_to_coords = 'some_path') } diff --git a/man/louvainCluster.Rd b/man/louvainCluster.Rd index ab533b34..ef9936a1 100644 --- a/man/louvainCluster.Rd +++ b/man/louvainCluster.Rd @@ -52,9 +52,11 @@ for community detection, which is widely used in single-cell analysis and excels small clusters into broad cell classes. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -ligerex <- louvainCluster(ligerex, resulotion = 0.3) -} - +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) +ligerex <- louvainCluster(ligerex, resolution = 0.3) } diff --git a/man/makeInteractTrack.Rd b/man/makeInteractTrack.Rd index 96faaeaa..552f8456 100644 --- a/man/makeInteractTrack.Rd +++ b/man/makeInteractTrack.Rd @@ -25,7 +25,8 @@ Export the predicted gene-pair interactions calculated by upstream function 'lin } \examples{ \dontrun{ -# some gene-peak correlation matrix: regent +# Only runable for ATAC datasets, see tutorial on GitHub +# some gene-peak correlation matrix: regent makeInteractTrack(regnet, path_to_coords = 'some_path_to_gene_coordinates/hg19_genes.bed') } } diff --git a/man/makeRiverplot.Rd b/man/makeRiverplot.Rd index 4b1b0b3e..6eba96d9 100644 --- a/man/makeRiverplot.Rd +++ b/man/makeRiverplot.Rd @@ -57,7 +57,7 @@ between the nodes (default 0.1).} By default will try to automatically order them appropriately.} } \value{ -A riverplot object +NULL for now. Could be back if CRAN dependency riverplot is back. } \description{ Creates a riverplot to show how separate cluster assignments from two datasets map onto a @@ -66,6 +66,7 @@ can also be passed in. Uses the riverplot package to construct riverplot object } \examples{ \dontrun{ +# Riverplot currently archived, cannot run this example # ligerex (liger object), factorization complete input # toy clusters cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) diff --git a/man/nnzeroGroups.Rd b/man/nnzeroGroups.Rd deleted file mode 100644 index 1844d764..00000000 --- a/man/nnzeroGroups.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{nnzeroGroups} -\alias{nnzeroGroups} -\alias{nnzeroGroups.dgCMatrix} -\alias{nnzeroGroups.matrix} -\title{nnzeroGroups} -\usage{ -nnzeroGroups(X, y, MARGIN = 2) - -\method{nnzeroGroups}{dgCMatrix}(X, y, MARGIN = 2) - -\method{nnzeroGroups}{matrix}(X, y, MARGIN = 2) -} -\arguments{ -\item{X}{matrix} - -\item{y}{group labels} - -\item{MARGIN}{whether observations are rows (=2) or columns (=1)} -} -\value{ -Matrix of groups by features -} -\description{ -Utility function to compute number of zeros-per-feature within group -} diff --git a/man/nonneg.Rd b/man/nonneg.Rd deleted file mode 100644 index 9b82c0b2..00000000 --- a/man/nonneg.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rliger.R -\name{nonneg} -\alias{nonneg} -\title{Perform thresholding on dense matrix} -\usage{ -nonneg(x, eps = 1e-16) -} -\arguments{ -\item{x}{Dense matrix.} - -\item{eps}{Threshold. Should be a small positive value. (default 1e-16)} -} -\value{ -Dense matrix with smallest values equal to eps. -} -\description{ -Perform thresholding on the input dense matrix. Remove any values samller than eps by eps. -Helper function for online_iNMF -} diff --git a/man/normalize.Rd b/man/normalize.Rd index daa761c7..8cc22440 100644 --- a/man/normalize.Rd +++ b/man/normalize.Rd @@ -32,9 +32,6 @@ dataset) (default TRUE).} This function normalizes data to account for total gene expression across a cell. } \examples{ -# Demonstration using matrices with randomly generated numbers -Y <- matrix(runif(5000,0,2), 10,500) -Z <- matrix(runif(5000,0,2), 10,500) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) } diff --git a/man/online_iNMF.Rd b/man/online_iNMF.Rd index ccb0f1e4..cb974a0a 100644 --- a/man/online_iNMF.Rd +++ b/man/online_iNMF.Rd @@ -79,10 +79,13 @@ W is identical among all datasets, as it represents the shared components of the across datasets. The V matrices represent the dataset-specific components of the metagenes. } \examples{ -\dontrun{ -# Requires preprocessed liger object -# Get factorization using 20 factors and mini-batch of 5000 cells -# (default setting, can be adjusted for ideal results) -ligerex <- online_iNMF(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +if (length(ligerex@h5file.info) > 0) { + # This function only works for HDF5 based liger object + ligerex <- normalize(ligerex) + ligerex <- selectGenes(ligerex) + ligerex <- scaleNotCenter(ligerex) + # `miniBatch_size` has to be no larger than the number of cells in the smallest dataset + ligerex <- online_iNMF(ligerex, miniBatch_size = 100) } } diff --git a/man/optimizeALS.Rd b/man/optimizeALS.Rd index 64438eaf..d027050f 100644 --- a/man/optimizeALS.Rd +++ b/man/optimizeALS.Rd @@ -19,7 +19,6 @@ optimizeALS(object, ...) W.init = NULL, V.init = NULL, use.unshared = FALSE, - lamda.u = NULL, rand.seed = 1, print.obj = FALSE, verbose = TRUE, @@ -72,6 +71,8 @@ factorizations of the same dataset can be run with one rep if necessary. (defaul \item{V.init}{Initial values to use for V matrices (default NULL)} +\item{use.unshared}{Whether to run UANLS method to integrate datasets with previously identified unshared variable genes. Have to run selectGenes with unshared = TRUE and scaleNotCenter it. (default FALSE).} + \item{rand.seed}{Random seed to allow reproducible results (default 1).} \item{print.obj}{Print objective function values after convergence (default FALSE).} @@ -93,10 +94,10 @@ W is held consistent among all datasets, as it represents the shared components across datasets. The V matrices represent the dataset-specific components of the metagenes. } \examples{ -\dontrun{ -# Requires preprocessed liger object (only for objected not based on HDF5 files) -# Get factorization using 20 factors and mini-batch of 5000 cells -# (default setting, can be adjusted for ideal results) -ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 1) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Minimum specification for fast example pass +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) } diff --git a/man/optimizeNewData.Rd b/man/optimizeNewData.Rd index e8877d1b..c25aa412 100644 --- a/man/optimizeNewData.Rd +++ b/man/optimizeNewData.Rd @@ -45,16 +45,28 @@ Uses an efficient strategy for updating that takes advantage of the information factorization. Assumes that selected genes (var.genes) are represented in the new datasets. } \examples{ -\dontrun{ -# Given preprocessed liger object: ligerex (contains two datasets Y and Z) -# get factorization using three restarts and 20 factors -ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) -# acquire new data (Y_new, Z_new) from the same cell type, let's add it to existing datasets -new_data <- list(Y_set = Y_new, Z_set = Z_new) -ligerex2 <- optimizeNewData(ligerex, new.data = new_data, which.datasets = list('y_set', 'z_set')) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +\donttest{ +# Assume we are performing the factorization +# Specification for minimal example test time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +# Suppose we have new data, namingly Y_new and Z_new from the same cell type. +# Add it to existing datasets. +new_data <- list(Y_set = ctrl, Z_set = stim) +# 2 iters do not lead to converge, it's for minimal test time +ligerex2 <- optimizeNewData(ligerex, new.data = new_data, + which.datasets = list('ctrl', 'stim'), + max.iters = 1) # acquire new data from different cell type (X), we'll just add another dataset -# it's probably most similar to y_set -ligerex <- optimizeNewData(ligerex, new.data = list(x_set = X), which.datasets = list('y_set'), - add.to.existing = FALSE) +# it's probably most similar to ctrl +X <- ctrl +# 2 iters do not lead to converge, it's for minimal test time +ligerex3 <- optimizeNewData(ligerex, new.data = list(x_set = X), + which.datasets = list('ctrl'), + add.to.existing = FALSE, + max.iters = 1) } } diff --git a/man/optimizeNewK.Rd b/man/optimizeNewK.Rd index 3279c5e7..047727a3 100644 --- a/man/optimizeNewK.Rd +++ b/man/optimizeNewK.Rd @@ -40,8 +40,14 @@ existing factorization. It is most recommended for values of k smaller than curr where it is more likely to speed up the factorization. } \examples{ -\dontrun{ -# decide to run with k = 15 instead (keeping old lambda the same) -ligerex <- optimizeNewK(ligerex, k.new = 15) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +k <- 5 +# Minimum specification for fast example pass +ligerex <- optimizeALS(ligerex, k = k, max.iters = 1) +if (k != 5) { + ligerex <- optimizeNewK(ligerex, k.new = k, max.iters = 1) } } diff --git a/man/optimizeNewLambda.Rd b/man/optimizeNewLambda.Rd index dd6f1f87..c30b0198 100644 --- a/man/optimizeNewLambda.Rd +++ b/man/optimizeNewLambda.Rd @@ -36,8 +36,15 @@ factorization; uses previous k. Recommended mainly when re-optimizing for higher new lambda value is significantly different; otherwise may not return optimal results. } \examples{ -\dontrun{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +\donttest{ +# Assume we are performing the factorization +# Specification for minimal example run time, not converging. +ligerex <- optimizeALS(ligerex, k = 5, lambda = 5, max.iters = 1) # decide to run with lambda = 15 instead (keeping k the same) -ligerex <- optimizeNewLambda(ligerex, new.lambda = 15) +ligerex <- optimizeNewLambda(ligerex, new.lambda = 15, max.iters = 1) } } diff --git a/man/optimizeSubset.Rd b/man/optimizeSubset.Rd index 7cc719ec..58d170c4 100644 --- a/man/optimizeSubset.Rd +++ b/man/optimizeSubset.Rd @@ -42,9 +42,20 @@ factorization. Can use either cell names or cluster names to subset. For more ba functionality (without automatic optimization), see subsetLiger. } \examples{ -\dontrun{ -# now want to look at only subset of data -# Requires a vector of cell names from data 1 and a vector of cell names from data 2 -ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +\donttest{ +# Assume we are performing the factorization +# Specification for minimal example run time, not converging. +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +# Preparing subset with random sampling. +# Subset can also be obtained with prior knowledge from metadata. +cell_names_1 <- sample(rownames(ligerex@H[[1]]), 20) +cell_names_2 <- sample(rownames(ligerex@H[[2]]), 20) + +ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2), + max.iters = 1) } } diff --git a/man/optimize_UANLS.Rd b/man/optimize_UANLS.Rd deleted file mode 100644 index 7e2973e5..00000000 --- a/man/optimize_UANLS.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rliger.R -\name{optimize_UANLS} -\alias{optimize_UANLS} -\title{Perform iNMF on scaled datasets, and include unshared, scaled and normalized, features} -\usage{ -optimize_UANLS( - object, - k = 30, - lambda = 5, - max.iters = 30, - nrep = 1, - thresh = 1e-10, - rand.seed = 1, - print.obj = FALSE, - vectorized.lambda = FALSE -) -} -\arguments{ -\item{object}{\code{liger} object. Should normalize, select genes, and scale before calling.} - -\item{k}{Inner dimension of factorization (number of factors).} - -\item{lambda}{The lambda penalty. Default 5} - -\item{max.iters}{Maximum number of block coordinate descent iterations to perform (default 30).} - -\item{nrep}{Number of restarts to perform (iNMF objective function is non-convex, so taking the -best objective from multiple successive initializations is recommended). For easier -reproducibility, this increments the random seed by 1 for each consecutive restart, so future -factorizations of the same dataset can be run with one rep if necessary. (default 1)} - -\item{thresh}{Convergence threshold. Convergence occurs when |obj0-obj|/(mean(obj0,obj)) < thresh. -(default 1e-6)} - -\item{rand.seed}{Random seed to allow reproducible results (default 1).} - -\item{print.obj}{Print objective function values after convergence (default FALSE).} - -\item{vectorized.lamba}{Whether or not to expect a vectorized lambda parameter -##########################################################################} -} -\description{ -Perform iNMF on scaled datasets, and include unshared, scaled and normalized, features -} diff --git a/man/plotByDatasetAndCluster.Rd b/man/plotByDatasetAndCluster.Rd index 778d0cf1..07ef9347 100644 --- a/man/plotByDatasetAndCluster.Rd +++ b/man/plotByDatasetAndCluster.Rd @@ -53,7 +53,7 @@ one after the other (default TRUE).} \item{legend.fonts.size}{Controls the font size of the legend.} -\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if +\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if there are over 100,000 cells to plot.} } \value{ @@ -68,13 +68,13 @@ single color for second plot. It is also possible to pass in another clustering names match those of cells). } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# get tsne.coords for normalized data +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) +ligerex <- quantile_norm(ligerex) ligerex <- runTSNE(ligerex) -# plot to console -plotByDatasetAndCluster(ligerex) -# return list of plots -plots <- plotByDatasetAndCluster(ligerex, return.plots = TRUE) -} +ligerex <- louvainCluster(ligerex) +plotByDatasetAndCluster(ligerex, pt.size = 1) } diff --git a/man/plotClusterFactors.Rd b/man/plotClusterFactors.Rd index 6d813bea..2d01a550 100644 --- a/man/plotClusterFactors.Rd +++ b/man/plotClusterFactors.Rd @@ -42,9 +42,12 @@ loadings for a factor, black low. Optionally can also include dendrograms and so factors and clusters. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete input -# plot expression for CD4 and return plots -loading.matrix <- plotClusterFactors(ligerex, return.data = TRUE) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +ligerex <- louvainCluster(ligerex) +plotClusterFactors(ligerex) } diff --git a/man/plotClusterProportions.Rd b/man/plotClusterProportions.Rd index 5fbe344e..1de49c65 100644 --- a/man/plotClusterProportions.Rd +++ b/man/plotClusterProportions.Rd @@ -19,10 +19,12 @@ print plot to console (return.plot = FALSE); ggplot object (return.plot = TRUE) Generates plot of clusters sized by the proportion of total cells } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete input +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) ligerex <- quantile_norm(ligerex) -# plot cluster proportions +ligerex <- louvainCluster(ligerex) plotClusterProportions(ligerex) } -} diff --git a/man/plotFactors.Rd b/man/plotFactors.Rd index 621c8858..d62c8a50 100644 --- a/man/plotFactors.Rd +++ b/man/plotFactors.Rd @@ -36,14 +36,15 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete +\donttest{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) -# get tsne.coords for normalized data -ligerex <- runTSNE(ligerex) -# factor plots into pdf file -# pdf("plot_factors.pdf") plotFactors(ligerex) -# dev.off() +ligerex <- runTSNE(ligerex) +plotFactors(ligerex, plot.tsne = TRUE) } } diff --git a/man/plotFeature.Rd b/man/plotFeature.Rd index d9a1dbb0..67529de0 100644 --- a/man/plotFeature.Rd +++ b/man/plotFeature.Rd @@ -72,11 +72,12 @@ Feature can be categorical (factor) or continuous. Can also plot all datasets combined with by.dataset = FALSE. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# get tsne.coords for normalized data +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) +ligerex <- quantile_norm(ligerex) ligerex <- runTSNE(ligerex) -# plot nUMI to console -plotFeature(ligerex, feature = 'nUMI') -} +plotFeature(ligerex, "nUMI", pt.size = 1) } diff --git a/man/plotGene.Rd b/man/plotGene.Rd index 5f382b48..92fc86bd 100644 --- a/man/plotGene.Rd +++ b/man/plotGene.Rd @@ -87,7 +87,7 @@ mismatches in order (default NULL).} \item{keep.scale}{Maintain min/max color scale across all plots when using plot.by (default FALSE)} -\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if +\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if there are over 100,000 cells to plot.} } \value{ @@ -100,14 +100,12 @@ specified gene. Data can be scaled by dataset or selected feature column from ce all cells). Data plots can be split by feature. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete -ligerex +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) +ligerex <- quantile_norm(ligerex) ligerex <- runTSNE(ligerex) -# plot expression for CD4 and return plots -gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) -# ligerex (liger object based on datasets in HDF5 format), factorization complete input -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) -} +plotGene(ligerex, "CD74", pt.size = 1) } diff --git a/man/plotGeneLoadings.Rd b/man/plotGeneLoadings.Rd index 5ed95105..c456a35d 100644 --- a/man/plotGeneLoadings.Rd +++ b/man/plotGeneLoadings.Rd @@ -75,7 +75,7 @@ NULL to revert to default gradient scaling. (default 0.1)} \item{verbose}{Print progress bar/messages (TRUE by default)} -\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if +\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if there are over 100,000 cells to plot.} } \value{ @@ -91,16 +91,14 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete +\donttest{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex) -# pdf("gene_loadings.pdf") -plotGeneLoadings(ligerex, num.genes = 20) -# dev.off() -# ligerex (liger object based on datasets in HDF5 format), factorization complete input -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -plotGeneLoadings(ligerex, num.genes = 20) +ligerex <- runTSNE(ligerex) +plotGeneLoadings(ligerex, "stim", "ctrl", do.spec.plot = FALSE) } - } diff --git a/man/plotGeneViolin.Rd b/man/plotGeneViolin.Rd index 66e59c28..1ca65a6b 100644 --- a/man/plotGeneViolin.Rd +++ b/man/plotGeneViolin.Rd @@ -33,12 +33,13 @@ List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots t Generates violin plots of expression of specified gene for each dataset. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete -# plot expression for CD4 and return plots -violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) -# ligerex (liger object based on datasets in HDF5 format), factorization complete input -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +ligerex <- louvainCluster(ligerex) +plotGeneViolin(ligerex, "CD74", by.dataset = FALSE) +plotGeneViolin(ligerex, "CD74") } diff --git a/man/plotGenes.Rd b/man/plotGenes.Rd index 22fec2d5..e4196139 100644 --- a/man/plotGenes.Rd +++ b/man/plotGenes.Rd @@ -22,12 +22,14 @@ Uses plotGene to plot each gene (and dataset) on a separate page. It is recommen function into a PDF due to the large number of plots produced. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete input +\donttest{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) +ligerex <- quantile_norm(ligerex) ligerex <- runTSNE(ligerex) -# plot expression for CD4 and FCGR3A -# pdf("gene_plots.pdf") -plotGenes(ligerex, c("CD4", "FCGR3A")) -# dev.off() +plotGenes(ligerex, c("CD74", "NKG7"), pt.size = 1) } } diff --git a/man/plotWordClouds.Rd b/man/plotWordClouds.Rd index f946523a..68992dd7 100644 --- a/man/plotWordClouds.Rd +++ b/man/plotWordClouds.Rd @@ -60,15 +60,14 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete +\donttest{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) ligerex <- runTSNE(ligerex) -# pdf('word_clouds.pdf') -plotWordClouds(ligerex, num.genes = 20) -# dev.off() -# ligerex (liger object based on datasets in HDF5 format), factorization complete input -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -plotWordClouds(ligerex, num.genes = 20) +plotWordClouds(ligerex, do.spec.plot = FALSE) } } diff --git a/man/quantile_norm.Rd b/man/quantile_norm.Rd index 5f86098f..ce060d53 100644 --- a/man/quantile_norm.Rd +++ b/man/quantile_norm.Rd @@ -86,13 +86,10 @@ stretching/compressing datasets' quantiles to better match those of the referenc aligned factor loadings are combined into a single matrix and returned as H.norm. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# do basic quantile alignment +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# higher resolution for more clusters (note that SNF is conserved) -ligerex <- quantile_norm(ligerex, resolution = 1.2) -# change knn_k for more fine-grained local clustering -ligerex <- quantile_norm(ligerex, knn_k = 15, resolution = 1.2) -} } diff --git a/man/rank_matrix.Rd b/man/rank_matrix.Rd deleted file mode 100644 index 77a78398..00000000 --- a/man/rank_matrix.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{rank_matrix} -\alias{rank_matrix} -\alias{rank_matrix.dgCMatrix} -\alias{rank_matrix.matrix} -\title{rank_matrix} -\usage{ -rank_matrix(X) - -\method{rank_matrix}{dgCMatrix}(X) - -\method{rank_matrix}{matrix}(X) -} -\arguments{ -\item{X}{feature by observation matrix.} -} -\value{ -List with 2 items -} -\description{ -Utility function to rank columns of matrix -} diff --git a/man/read10X.Rd b/man/read10X.Rd index 62561ad6..6e57bc11 100644 --- a/man/read10X.Rd +++ b/man/read10X.Rd @@ -44,7 +44,7 @@ considered scRNA-seq data (default 'rna', alternatives: 'atac').} } \value{ List of merged matrices across data types (returns sparse matrix if only one data type - detected), or nested list of matrices organized by sample if merge=F. + detected), or nested list of matrices organized by sample if merge = FALSE. } \description{ This function generates a sparse matrix (genes x cells) from the data generated by 10X's diff --git a/man/readSubset.Rd b/man/readSubset.Rd index f0462a11..c0c7a4be 100644 --- a/man/readSubset.Rd +++ b/man/readSubset.Rd @@ -45,9 +45,9 @@ This function samples raw/normalized/scaled data from on-disk HDF5 files for plo This function assumes that the cell barcodes are unique across all datasets. } \examples{ -\dontrun{ -# Only for online liger object (based on HDF5 files) -# Example: sample a total amount of 5000 cells from norm.data for downstream analysis -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +if (length(ligerex@H) > 0) { + # Downsampling is calculated basing on factorization result + ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 100) } } diff --git a/man/removeMissingObs.Rd b/man/removeMissingObs.Rd index 1135dee1..3d4c3a0f 100644 --- a/man/removeMissingObs.Rd +++ b/man/removeMissingObs.Rd @@ -27,8 +27,10 @@ removeMissingObs( Removes cells/genes from chosen slot with no expression in any genes or cells respectively. } \examples{ -\dontrun{ -# liger object: ligerex -ligerex <- removeMissingObs(ligerex) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +if (any(rowSums(ctrl) == 0) || any(rowSums(stim) == 0)) { + # example datasets do not have missing data, thus put in a condition + # Though the function will return unchanged object if no missing found + ligerex <- removeMissingObs(ligerex) } } diff --git a/man/reorganizeLiger.Rd b/man/reorganizeLiger.Rd index 09174895..1a401137 100644 --- a/man/reorganizeLiger.Rd +++ b/man/reorganizeLiger.Rd @@ -32,10 +32,8 @@ Using the same data, rearrange functional datasets using another discrete featur This removes most computed data slots, though cell.data and current clustering can be retained. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory objects) organized by species -# with column designating sex in cell.data -# rearrange by sex -ligerex_new <- reorganizeLiger(ligerex, by.feature = "sex", new.label = "species") -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +# Create a random variable of two categories +ligerex@cell.data$foo <- factor(sample(c(1,2), 600, replace = TRUE)) +ligerexFoo <- reorganizeLiger(ligerex, "foo") } diff --git a/man/runGSEA.Rd b/man/runGSEA.Rd index a32e9ea9..b2c5f16c 100644 --- a/man/runGSEA.Rd +++ b/man/runGSEA.Rd @@ -33,10 +33,13 @@ A list of matrices with GSEA analysis for each factor Identify the biological pathways (gene sets from Reactome) that each metagene (factor) might belongs to. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -wilcox.results <- runGSEA(ligerex) -wilcox.results <- runGSEA(ligerex, mat_v = c(1, 2)) +\donttest{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +result <- runGSEA(ligerex) } - } diff --git a/man/runTSNE.Rd b/man/runTSNE.Rd index b1590917..8f7150cb 100644 --- a/man/runTSNE.Rd +++ b/man/runTSNE.Rd @@ -54,13 +54,12 @@ FIt-SNE directory as the fitsne.path parameter, though this is only necessary fo to runTSNE. For more detailed FIt-SNE installation instructions, see the liger repo README. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# generate H.norm by quantile normalizig factor loadings +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# get tsne.coords for normalized data ligerex <- runTSNE(ligerex) -# get tsne.coords for raw factor loadings -ligerex <- runTSNE(ligerex, use.raw = TRUE) -} } diff --git a/man/runUMAP.Rd b/man/runUMAP.Rd index 00087453..e2ddaf30 100644 --- a/man/runUMAP.Rd +++ b/man/runUMAP.Rd @@ -53,13 +53,16 @@ Note that this method requires that the package uwot is installed. It does not d on reticulate or python umap-learn. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# generate H.norm by quantile normalizig factor loadings -ligerex <- quantileAlignSNF(ligerex) -# get tsne.coords for normalized data -ligerex <- runUMAP(ligerex) -# get tsne.coords for raw factor loadings -ligerex <- runUMAP(ligerex, use.raw = TRUE) +\donttest{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) +if (packageVersion("Matrix") <= package_version("1.6.1.1")) { + ligerex <- runUMAP(ligerex) +} } } diff --git a/man/runWilcoxon.Rd b/man/runWilcoxon.Rd index 4dfbbdc2..6e03d2a1 100644 --- a/man/runWilcoxon.Rd +++ b/man/runWilcoxon.Rd @@ -4,7 +4,11 @@ \alias{runWilcoxon} \title{Perform Wilcoxon rank-sum test} \usage{ -runWilcoxon(object, data.use = "all", compare.method) +runWilcoxon( + object, + data.use = "all", + compare.method = c("clusters", "datasets") +) } \arguments{ \item{object}{\code{liger} object.} @@ -20,14 +24,19 @@ A 10-columns data.frame with test results. Perform Wilcoxon rank-sum tests on specified dataset using given method. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete -wilcox.results <- runWilcoxon(ligerex, compare.method = "cluster") -wilcox.results <- runWilcoxon(ligerex, compare.method = "datastes", data.use = c(1, 2)) -# HDF5 input -# ligerex (liger object based on datasets in HDF5 format), factorization complete -# Need to sample cells before implementing Wilcoxon test -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) -de_genes <- runWilcoxon(ligerex, compare.method = "clusters") +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) +ligerex <- louvainCluster(ligerex, resolution = 0.3) +wilcox.results <- runWilcoxon(ligerex, compare.method = "clusters") +wilcox.results <- runWilcoxon(ligerex, compare.method = "datasets", data.use = c(1, 2)) +if (length(ligerex@h5file.info) > 0) { + # For HDF5 based object + # Need to sample cells and read into memory before running Wilcoxon test + ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) + wilcox.results <- runWilcoxon(ligerex, compare.method = "clusters") } } diff --git a/man/scaleNotCenter.Rd b/man/scaleNotCenter.Rd index 6082c595..20d57ea0 100644 --- a/man/scaleNotCenter.Rd +++ b/man/scaleNotCenter.Rd @@ -26,12 +26,8 @@ positive (NMF only accepts positive values). It also removes cells which do not expression across the genes selected, by default. } \examples{ -\dontrun{ -# Given datasets Y and Z -ligerex <- createLiger(list(y_set = Y, z_set = Z)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) -# use default selectGenes settings (var.thresh = 0.1) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) } -} diff --git a/man/selectGenes.Rd b/man/selectGenes.Rd index e63718dd..e29d5f59 100644 --- a/man/selectGenes.Rd +++ b/man/selectGenes.Rd @@ -16,7 +16,7 @@ selectGenes( do.plot = FALSE, cex.use = 0.3, chunk = 1000, - unshared = F, + unshared = FALSE, unshared.datasets = NULL, unshared.thresh = NULL ) @@ -54,12 +54,12 @@ Selected genes are plotted in green. (default FALSE)} \item{chunk}{size of chunks in hdf5 file. (default 1000)} +\item{unshared}{Whether to consider unshared features (Default FALSE)} + \item{unshared.datasets}{A list of the datasets to consider unshared features for, i.e. list(2), to use the second dataset} \item{unshared.thresh}{A list of threshold values to apply to each unshared dataset. If only one value is provided, it will apply to all unshared datasets. If a list is provided, it must match the length of the unshared datasets submitted.} - -\item{unshared.features}{Whether to consider unshared features} } \value{ \code{liger} object with var.genes slot set. @@ -73,13 +73,7 @@ It also provides a log plot of gene variance vs gene expression (with a line ind expression across genes and cells). Selected genes are plotted in green. } \examples{ -\dontrun{ -# Given datasets Y and Z -ligerex <- createLiger(list(y_set = Y, z_set = Z)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) -# use default selectGenes settings (var.thresh = 0.1) ligerex <- selectGenes(ligerex) -# select a smaller subset of genes -ligerex <- selectGenes(ligerex, var.thresh = 0.3) -} } diff --git a/man/seuratToLiger.Rd b/man/seuratToLiger.Rd index 39a2e071..250e6b17 100644 --- a/man/seuratToLiger.Rd +++ b/man/seuratToLiger.Rd @@ -76,17 +76,10 @@ identities from the original Seurat objects. Seurat V2 and V3 supported (though should share the same major version). } \examples{ -\dontrun{ -# Seurat objects for two pbmc datasets -tenx <- readRDS('tenx.RDS') -seqwell <- readRDS('seqwell.RDS') -# create liger object, using project names -ligerex <- seuratToLiger(list(tenx, seqwell)) -# create liger object, passing in names explicitly, using hvg.info genes -ligerex2 <- seuratToLiger(list(tenx, seqwell), names = c('tenx', 'seqwell'), num.hvg.info = 2000) -# Seurat object for joint analysis -pbmc <- readRDS('pbmc.RDS') -# create liger object, using 'protocol' for dataset names -ligerex3 <- seuratToLiger(pbmc, combined.seurat = TRUE, meta.var = 'protocol', num.hvg.info = 2000) +if (packageVersion("Matrix") <= package_version("1.6.1.1")) { + ctrl.srt <- Seurat::CreateSeuratObject(ctrl, project = "ctrl") + stim.srt <- Seurat::CreateSeuratObject(stim, project = "stim") + ligerex <- seuratToLiger(list(ctrl = ctrl.srt, stim = stim.srt), + use.seurat.genes = FALSE) } } diff --git a/man/show-methods.Rd b/man/show-methods.Rd index 55da3d41..ff8cdfac 100644 --- a/man/show-methods.Rd +++ b/man/show-methods.Rd @@ -14,3 +14,7 @@ \description{ show method for liger } +\examples{ +ligerex <- createLiger(list(ctrl = ctrl)) +show(ligerex) +} diff --git a/man/subsetLiger.Rd b/man/subsetLiger.Rd index 98c98480..cacf47cb 100644 --- a/man/subsetLiger.Rd +++ b/man/subsetLiger.Rd @@ -31,10 +31,6 @@ raw.data, norm.data, scale.data, cell.data, H, W, V, H.norm, tsne.coords, and cl Note that it does NOT reoptimize the factorization. See optimizeSubset for this functionality. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), with clusters 0:10 -# factorization, alignment, and t-SNE calculation have been performed -# subset by clusters -ligerex_subset <- subsetLiger(ligerex, clusters.use = c(1, 4, 5)) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +lig.small <- subsetLiger(ligerex, cells.use = c(colnames(ctrl)[1:100], colnames(stim)[1:100])) } diff --git a/man/suggestK.Rd b/man/suggestK.Rd index 09b2a1e9..8398f988 100644 --- a/man/suggestK.Rd +++ b/man/suggestK.Rd @@ -66,9 +66,11 @@ are not uniformly distributed when an appropriate number of factors is reached. Depending on number of cores used, this process can take 10-20 minutes. } \examples{ -\dontrun{ -# Requires preprocessed liger object -# examine plot for most appropriate k, use multiple cores for faster results -suggestK(ligerex, num.cores = 4) +\donttest{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +suggestK(ligerex, k.test = c(5,6), max.iters = 1) } } diff --git a/man/suggestLambda.Rd b/man/suggestLambda.Rd index f055850e..8620a779 100644 --- a/man/suggestLambda.Rd +++ b/man/suggestLambda.Rd @@ -73,9 +73,11 @@ likely also correspond to slower decrease in agreement. Depending on number of c this process can take 10-20 minutes. } \examples{ -\dontrun{ -# Requires preprocessed liger object -# examine plot for most appropriate lambda, use multiple cores for faster results -suggestLambda(ligerex, k = 20, num.cores = 4) +\donttest{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +suggestLambda(ligerex, k = 20, lambda.test = c(5, 10), max.iters = 1) } } diff --git a/man/sumGroups.Rd b/man/sumGroups.Rd deleted file mode 100644 index 5572a97b..00000000 --- a/man/sumGroups.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{sumGroups} -\alias{sumGroups} -\alias{sumGroups.dgCMatrix} -\alias{sumGroups.matrix} -\title{sumGroups} -\usage{ -sumGroups(X, y, MARGIN = 2) - -\method{sumGroups}{dgCMatrix}(X, y, MARGIN = 2) - -\method{sumGroups}{matrix}(X, y, MARGIN = 2) -} -\arguments{ -\item{X}{matrix} - -\item{y}{group labels} - -\item{MARGIN}{whether observations are rows (=2) or columns (=1)} -} -\value{ -Matrix of groups by features -} -\description{ -Utility function to sum over group labels -} diff --git a/src/Makevars b/src/Makevars index 5485e26a..cd3a3c52 100755 --- a/src/Makevars +++ b/src/Makevars @@ -1,5 +1,5 @@ -## With R 3.1.0 or later, you can uncomment the following line to tell R to +## With R 3.1.0 or later, you can uncomment the following line to tell R to ## enable compilation with C++11 (where available) ## ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider @@ -9,11 +9,9 @@ ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP ## support within Armadillo prefers / requires it ## -## Ensure support for 64bit int sparse matrices with -DARMA_64BIT_WORD=1 +## Ensure support for 64bit int sparse matrices with -DARMA_64BIT_WORD=1 -CXX_STD = CXX11 - -PKG_CXXFLAGS = -DARMA_DONT_USE_OPENMP -PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 +PKG_CXXFLAGS = -DARMA_DONT_USE_OPENMP +PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/src/Makevars.win b/src/Makevars.win index e7c87ad5..405fadd3 100755 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,5 +1,5 @@ -## With R 3.1.0 or later, you can uncomment the following line to tell R to +## With R 3.1.0 or later, you can uncomment the following line to tell R to ## enable compilation with C++11 (where available) ## ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider @@ -9,10 +9,8 @@ ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP ## support within Armadillo prefers / requires it ## -## Ensure support for 64bit int sparse matrices with -DARMA_64BIT_WORD=1 +## Ensure support for 64bit int sparse matrices with -DARMA_64BIT_WORD=1 -CXX_STD = CXX11 - -PKG_CXXFLAGS = -DARMA_DONT_USE_OPENMP -PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 -PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ No newline at end of file +PKG_CXXFLAGS = -DARMA_DONT_USE_OPENMP +PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 +PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index f51c32b4..6c8630b1 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -8,6 +8,11 @@ using namespace Rcpp; +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + // RunModularityClusteringCpp IntegerVector RunModularityClusteringCpp(Eigen::SparseMatrix SNN, int modularityFunction, double resolution, int algorithm, int nRandomStarts, int nIterations, int randomSeed, bool printOutput, std::string edgefilename); RcppExport SEXP _rliger_RunModularityClusteringCpp(SEXP SNNSEXP, SEXP modularityFunctionSEXP, SEXP resolutionSEXP, SEXP algorithmSEXP, SEXP nRandomStartsSEXP, SEXP nIterationsSEXP, SEXP randomSeedSEXP, SEXP printOutputSEXP, SEXP edgefilenameSEXP) {