From 3208082f74193a670d3f5ab355acc8520e746d34 Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Thu, 2 Nov 2023 12:54:24 -0400 Subject: [PATCH 1/8] Deprecate riverplot; fix bugs; fix check warnings; change maintainer --- DESCRIPTION | 11 +- NAMESPACE | 2 - R/rliger.R | 927 +++++++++++++++++---------------- man/liger-class.Rd | 10 +- man/linkGenesAndPeaks.Rd | 2 +- man/makeInteractTrack.Rd | 2 +- man/makeRiverplot.Rd | 2 +- man/online_iNMF.Rd | 2 +- man/optimizeALS.Rd | 5 +- man/optimize_UANLS.Rd | 45 -- man/plotByDatasetAndCluster.Rd | 2 +- man/plotGene.Rd | 2 +- man/plotGeneLoadings.Rd | 2 +- man/readSubset.Rd | 2 +- man/selectGenes.Rd | 4 +- src/Makevars | 10 +- src/Makevars.win | 12 +- src/RcppExports.cpp | 5 + 18 files changed, 503 insertions(+), 544 deletions(-) delete mode 100644 man/optimize_UANLS.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 488547e3..233cfe94 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,26 +1,28 @@ Package: rliger -Version: 1.0.0 +Version: 1.0.1 Date: 2021-03-09 Type: Package Title: Linked Inference of Genomic Experimental Relationships Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) , and Liu J, Gao C, Sodicoff J, et al (2020) for more details. Authors@R: c( person(given = 'Joshua', family = 'Welch', email = 'welchjd@umich.edu', role = c('aut', 'ctb')), - person(given = 'Chao', family = 'Gao', email = 'gchao@umich.edu', role = c('aut', 'ctb', 'cre')), + person(given = 'Chao', family = 'Gao', email = 'gchao@umich.edu', role = c('aut', 'ctb')), person(given = 'Jialin', family = 'Liu', email = 'alanliu@umich.edu', role = c('aut', 'ctb')), person(given = 'Joshua', family = 'Sodicoff', email = 'sodicoff@umich.edu', role = c('aut', 'ctb')), person(given = 'Velina', family = 'Kozareva', role = c('aut', 'ctb')), person(given = 'Evan', family = 'Macosko', role = c('aut', 'ctb')), + person(given = 'Yichen', family = 'Wang', email = 'wayichen@umich.edu', role = c('aut', 'ctb', 'cre')), person(given = 'Paul', family = 'Hoffman', role = 'ctb'), person(given = 'Ilya', family = 'Korsunsky', role = 'ctb'), person(given = 'Robert', family = 'Lee', role = 'ctb') ) Author: Joshua Welch [aut, ctb], - Chao Gao [aut, ctb, cre], + Chao Gao [aut, ctb], Jialin Liu [aut, ctb], Joshua Sodicoff [aut, ctb], Velina Kozareva [aut, ctb], Evan Macosko [aut, ctb], + Yichen Wang [aut, ctb, cre], Paul Hoffman [ctb], Ilya Korsunsky [ctb], Robert Lee [ctb] @@ -38,7 +40,6 @@ Imports: Rcpp (>= 0.12.10), ica, Rtsne, ggplot2, - riverplot, foreach, parallel, doParallel, @@ -59,7 +60,7 @@ Depends: Matrix, methods, patchwork -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.3 Encoding: UTF-8 Suggests: Seurat, diff --git a/NAMESPACE b/NAMESPACE index 395b0972..f7dd6838 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -144,8 +144,6 @@ importFrom(methods,slot) importFrom(methods,slotNames) importFrom(plyr,mapvalues) importFrom(plyr,rbind.fill.matrix) -importFrom(riverplot,makeRiver) -importFrom(riverplot,riverplot) importFrom(rlang,.data) importFrom(scattermore,geom_scattermore) importFrom(stats,approxfun) diff --git a/R/rliger.R b/R/rliger.R index 81570570..292c9356 100755 --- a/R/rliger.R +++ b/R/rliger.R @@ -15,7 +15,7 @@ NULL #' @slot norm.data List of normalized matrices (genes by cells) #' @slot scale.data List of scaled matrices (cells by genes) #' @slot sample.data List of sampled matrices (gene by cells) -#' #' @slot scale.unshared.data List of scaled matrices of unshared features +#' @slot scale.unshared.data List of scaled matrices of unshared features #' @slot h5file.info List of HDF5-related information for each input dataset. Paths to raw data, indices, #' indptr, barcodes, genes and the pipeline through which the HDF5 file is formated (10X, AnnData, etc), #' type of sampled data (raw, normalized or scaled). @@ -23,7 +23,7 @@ NULL #' cells across all datasets) #' @slot var.genes Subset of informative genes shared across datasets to be used in matrix #' factorization -#' #' @slot var.unshared.features Highly variable unshared features selected from each dataset +#' @slot var.unshared.features Highly variable unshared features selected from each dataset #' @slot H Cell loading factors (one matrix per dataset, dimensions cells by k) #' @slot H.norm Normalized cell loading factors (cells across all datasets combined into single #' matrix) @@ -150,7 +150,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m use.filtered = FALSE, reference = NULL, data.type = "rna", verbose = TRUE) { datalist <- list() datatypes <- c("Gene Expression") - + if (length(num.cells) == 1) { num.cells <- rep(num.cells, length(sample.dirs)) } @@ -193,20 +193,20 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m } matrix.file <- paste0(sample.dir, "/matrix.mtx", suffix) barcodes.file <- paste0(sample.dir, "/barcodes.tsv", suffix) - + rawdata <- readMM(matrix.file) # convert to dgc matrix if (class(rawdata)[1] == "dgTMatrix") { rawdata <- as(rawdata, "CsparseMatrix") } - + # filter for UMIs first to increase speed umi.pass <- which(colSums(rawdata) > min.umis) if (length(umi.pass) == 0) { message("No cells pass UMI cutoff. Please lower it.") } rawdata <- rawdata[, umi.pass, drop = FALSE] - + barcodes <- readLines(barcodes.file)[umi.pass] # Remove -1 tag from barcodes if (all(grepl(barcodes, pattern = "\\-1$"))) { @@ -224,7 +224,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m } # since some genes are only differentiated by ENSMBL colnames(rawdata) <- barcodes - + # split based on 10X datatype -- V3 has Gene Expression, Antibody Capture, CRISPR, CUSTOM # V2 has only Gene Expression by default and just two columns if (is.null(ncol(features))) { @@ -243,7 +243,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m }) names(samplelist) <- sam.datatypes.unique } - + # num.cells filter only for gene expression data if (!is.null(num.cells)) { if (names(samplelist) == "Gene Expression" | names(samplelist) == "Chromatin Accessibility") { @@ -260,7 +260,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m samplelist[[data_label]] <- samplelist[[data_label]][, order(cs, decreasing = TRUE) [1:num.cells[i]]] } - + # cs <- colSums(samplelist[["Gene Expression"]]) # limit <- ncol(samplelist[["Gene Expression"]]) # if (num.cells[i] > limit) { @@ -273,12 +273,12 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m # samplelist[["Gene Expression"]] <- samplelist[["Gene Expression"]][, order(cs, decreasing = TRUE) # [1:num.cells[i]]] } - + datalist[[i]] <- samplelist } if (merge) { if (verbose) { - message("Merging samples") + message("Merging samples") } return_dges <- lapply(datatypes, function(x) { mergelist <- lapply(datalist, function(d) { @@ -289,7 +289,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m MergeSparseDataAll(mergelist, sample.names) }) names(return_dges) <- datatypes - + # if only one type of data present if (length(return_dges) == 1) { if (verbose){ @@ -363,7 +363,7 @@ mergeH5 <- function(file.list, indptr = h5file[["raw.X/indptr"]][] barcodes = paste0(library.names[i], "_", h5file[["obs"]][]$cell) genes = h5file[["raw.var"]][]$index - + } else { data = h5file[[data.name]][] indices = h5file[[indices.name]][] @@ -371,7 +371,7 @@ mergeH5 <- function(file.list, barcodes = paste0(library.names[i], "_", h5file[[barcodes.name]][]) genes = h5file[[genes.name]][] } - + if (i != 1) indptr = indptr[2:length(indptr)] num_data = length(data) num_indptr = length(indptr) @@ -423,7 +423,7 @@ restoreOnlineLiger <- function(object, file.path = NULL) { if (is.null(file.path) & is.null(object@h5file.info[[1]][["file.path"]])) { # file path is not provided by file.path param or liger object stop('File path information is not stored in the liger object. Please provide a list of file paths through file.path parameter.') } - + if (!is.null(file.path)) { # if new file path is provided, update liger object h5file.info for (i in 1:length(object@h5file.info)) { object@h5file.info[[i]][["file.path"]] = file.path[[i]] @@ -433,7 +433,7 @@ restoreOnlineLiger <- function(object, file.path = NULL) { object@raw.data = lapply(object@h5file.info, function(x) hdf5r::H5File$new(x[["file.path"]], mode="r+")) object@norm.data = lapply(object@raw.data, function(x) x[["norm.data"]]) object@scale.data = lapply(object@raw.data, function(x) x[["scale.data"]]) - + for (i in 1:length(object@raw.data)){ if (object@h5file.info[[i]][["format.type"]] == "10X"){ barcodes.name = "matrix/barcodes" @@ -443,7 +443,7 @@ restoreOnlineLiger <- function(object, file.path = NULL) { indices.name = "matrix/indices" indptr.name = "matrix/indptr" genes.name = "matrix/features/name" - } else if (format.type.list[i] == "AnnData"){ + } else if (object@h5file.info[[i]][["format.type"]] == "AnnData"){ barcodes.name = "obs" barcodes = object@raw.data[[i]][[barcodes.name]][]$cell num_cells = length(object@raw.data[[i]][[barcodes.name]][]$cell) @@ -489,7 +489,7 @@ restoreOnlineLiger <- function(object, file.path = NULL) { #' @param verbose Print messages (TRUE by default) #' #' @return \code{liger} object with raw.data slot set. -#' +#' #' @import Matrix #' @import hdf5r #' @@ -517,6 +517,7 @@ createLiger <- function(raw.data, object@V = rep(list(NULL), length(raw.data)) object@H = rep(list(NULL), length(raw.data)) cell.data = list() + format.type.list = format.type if (length(format.type) == 1) format.type.list = rep(format.type, length(raw.data)) for (i in 1:length(raw.data)){ file.h5 = hdf5r::H5File$new(raw.data[[i]], mode="r+") @@ -556,12 +557,12 @@ createLiger <- function(raw.data, object@norm.data[[i]] = file.h5[["norm.data"]] names(object@norm.data)[[i]] = names(object@raw.data)[[i]] } - + if (file.h5$exists("scale.data")){ object@scale.data[[i]] = file.h5[["scale.data"]] names(object@scale.data)[[i]] = names(object@raw.data)[[i]] } - + if (file.h5$exists("cell.data")){ cell.data[[i]] = data.frame(dataset = file.h5[["cell.data"]][]$dataset, nUMI = file.h5[["cell.data"]][]$nUMI, @@ -580,7 +581,7 @@ createLiger <- function(raw.data, names(object@H) <- names(object@V) <- names(object@h5file.info) <- names(object@raw.data) return(object) } - + raw.data <- lapply(raw.data, function(x) { if (class(x)[1] == "dgTMatrix" | class(x)[1] == 'dgCMatrix') { mat <- as(x, 'CsparseMatrix') @@ -594,7 +595,7 @@ createLiger <- function(raw.data, as(as.matrix(x), 'CsparseMatrix') } }) - + if (length(Reduce(intersect, lapply(raw.data, colnames))) > 0 & length(raw.data) > 1) { stop('At least one cell name is repeated across datasets; please make sure all cell names are unique.') @@ -633,7 +634,7 @@ createLiger <- function(raw.data, object <- removeMissingObs(object, use.cols = FALSE, verbose = verbose) } } - + # Initialize cell.data for object with nUMI, nGene, and dataset nUMI <- unlist(lapply(object@raw.data, function(x) { colSums(x) @@ -648,7 +649,7 @@ createLiger <- function(raw.data, rownames(object@cell.data) <- unlist(lapply(object@raw.data, function(x) { colnames(x) }), use.names = FALSE) - + return(object) } @@ -713,21 +714,21 @@ normalize <- function(object, num_entries = object@h5file.info[[i]][["data"]]$dims num_cells = object@h5file.info[[i]][["barcodes"]]$dims num_genes = object@h5file.info[[i]][["genes"]]$dims - - + + prev_end_col = 1 prev_end_data = 1 prev_end_ind = 0 gene_sum_sq = rep(0,num_genes) gene_means = rep(0,num_genes) #file.h5$close_all() - + safe_h5_create(object = object, idx = i, dataset_name = "/norm.data", dims = num_entries, mode = h5types$double, chunk_size = chunk_size) safe_h5_create(object = object, idx = i, dataset_name = "/cell_sums", dims = num_cells, mode = h5types$int, chunk_size = chunk_size) - + #file.h5 = H5File$new(fname, mode="r+") num_chunks = ceiling(num_cells/chunk_size) - if (verbose) { + if (verbose) { pb = txtProgressBar(0,num_chunks,style = 3) } ind = 0 @@ -752,7 +753,7 @@ normalize <- function(object, prev_end_col = prev_end_col + chunk_size prev_end_data = prev_end_data + length(norm.data@x) prev_end_ind = tail(start_inds, 1) - + # calculate row sum and sum of squares using normalized data row_sums = Matrix::rowSums(norm.data) gene_sum_sq = gene_sum_sq + rowSums(norm.data*norm.data) @@ -776,7 +777,7 @@ normalize <- function(object, } object@cell.data$nUMI = nUMI object@cell.data$nGene = nGene - + for (i in 1:length(object@raw.data)){ if (!object@raw.data[[i]]$exists("cell.data")) { cell.data.i = object@cell.data[object@cell.data$dataset == names(object@raw.data)[i], ] @@ -784,7 +785,7 @@ normalize <- function(object, object@raw.data[[i]][["cell.data"]] = cell.data.i } } - + names(object@norm.data) = names(object@raw.data) } else { if (remove.missing) { @@ -812,7 +813,7 @@ normalize <- function(object, #' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with scale.data slot set. -#' +#' #' @import hdf5r calcGeneVars = function (object, chunk = 1000, verbose = TRUE) @@ -826,14 +827,14 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) num_cells = object@h5file.info[[i]][["barcodes"]]$dims num_genes = object@h5file.info[[i]][["genes"]]$dims num_entries = object@h5file.info[[i]][["data"]]$dims - + prev_end_col = 1 prev_end_data = 1 prev_end_ind = 0 gene_vars = rep(0,num_genes) gene_means = object@raw.data[[i]][["gene_means"]][] gene_num_pos = rep(0,num_genes) - + num_chunks = ceiling(num_cells/chunk_size) if (verbose) { pb = txtProgressBar(0, num_chunks, style = 3) @@ -848,7 +849,7 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) row_inds = object@h5file.info[[i]][["indices"]][(prev_end_ind+1):(tail(start_inds, 1))] counts = object@norm.data[[i]][(prev_end_ind+1):(tail(start_inds, 1))] norm.data = sparseMatrix(i=row_inds[1:length(counts)]+1,p=start_inds[1:(chunk_size+1)]-prev_end_ind,x=counts,dims=c(num_genes,chunk_size)) - + num_read = length(counts) prev_end_col = prev_end_col + chunk_size prev_end_data = prev_end_data + num_read @@ -856,7 +857,7 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) gene_vars = gene_vars + sumSquaredDeviations(norm.data,gene_means) if (verbose) { setTxtProgressBar(pb, ind) - } + } } if (verbose) { setTxtProgressBar(pb, num_chunks) @@ -899,12 +900,12 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) #' Selected genes are plotted in green. (default FALSE) #' @param cex.use Point size for plot. #' @param chunk size of chunks in hdf5 file. (default 1000) -#' @param unshared.features Whether to consider unshared features +#' @param unshared Whether to consider unshared features (Default FALSE) #' @param unshared.datasets A list of the datasets to consider unshared features for, i.e. list(2), to use the second dataset #' @param unshared.thresh A list of threshold values to apply to each unshared dataset. If only one value is provided, it will apply to all unshared #' datasets. If a list is provided, it must match the length of the unshared datasets submitted. #' @return \code{liger} object with var.genes slot set. -#' +#' #' @import hdf5r #' @importFrom stats optimize #' @importFrom graphics abline plot points title @@ -941,14 +942,14 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes } else { genes = object@h5file.info[[i]][["genes"]][] } - + if (capitalize) { genes = toupper(genes) } trx_per_cell = object@raw.data[[i]][["cell_sums"]][] gene_expr_mean = object@raw.data[[i]][["gene_means"]][] gene_expr_var = object@raw.data[[i]][["gene_vars"]][] - + names(gene_expr_mean) <- names(gene_expr_var) <- genes # assign gene names nolan_constant <- mean((1/trx_per_cell)) alphathresh.corrected <- alpha.thresh/length(genes) @@ -978,7 +979,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes genes.use <- intersect(genes.use, genes.new) } } - + for (i in 1:length(hdf5_files)) { if (object@h5file.info[[i]][["format.type"]] == "AnnData"){ genes = object@h5file.info[[i]][["genes"]][]$index @@ -987,7 +988,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes } genes.use <- genes.use[genes.use %in% genes] } - + if (length(genes.use) == 0) { warning("No genes were selected; lower var.thresh values or choose 'union' for combine parameter", immediate. = TRUE) @@ -1021,7 +1022,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes genemeanupper <- gene_expr_mean + qnorm(1 - alphathresh.corrected / 2) * sqrt(gene_expr_mean * nolan_constant / ncol(object@raw.data[[i]])) basegenelower <- log10(gene_expr_mean * nolan_constant) - + num_varGenes <- function(x, num.genes.des){ # This function returns the difference between the desired number of genes and # the number actually obtained when thresholded on x @@ -1029,7 +1030,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes log10(gene_expr_var) > basegenelower + x)) return(abs(num.genes.des - y)) } - + if (!is.null(num.genes)) { # Optimize to find value of x which gives the desired number of genes for this dataset # if very small number of genes requested, var.thresh may need to exceed 1 @@ -1041,19 +1042,19 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes optimized$objective, ". Lower tol or alpha.thresh for better results.")) } } - + genes.new <- names(gene_expr_var)[which(gene_expr_var / nolan_constant > genemeanupper & log10(gene_expr_var) > basegenelower + var.thresh[i])] - + if (do.plot) { graphics::plot(log10(gene_expr_mean), log10(gene_expr_var), cex = cex.use, xlab='Gene Expression Mean (log10)', ylab='Gene Expression Variance (log10)') - + graphics::points(log10(gene_expr_mean[genes.new]), log10(gene_expr_var[genes.new]), cex = cex.use, col = "green") graphics::abline(log10(nolan_constant), 1, col = "purple") - + legend("bottomright", paste0("Selected genes: ", length(genes.new)), pch = 20, col = "green") graphics::title(main = names(object@raw.data)[i]) } @@ -1067,11 +1068,11 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes genes.use <- intersect(genes.use, genes.new) } } - + for (i in 1:length(object@raw.data)) { genes.use <- genes.use[genes.use %in% rownames(object@raw.data[[i]])] } - + if (length(genes.use) == 0) { warning("No genes were selected; lower var.thresh values or choose 'union' for combine parameter", immediate. = TRUE) @@ -1095,17 +1096,17 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes } } unshared.feats <- c() - + for (i in 1:length(object@raw.data)){ unshared.feats[i] <- list(NULL) } - + #construct a list of shared features shared_names = rownames(object@raw.data[[1]]) for (matrix in 2:length(object@raw.data)){ shared_names = subset(shared_names, shared_names %in% rownames(object@raw.data[[i]])) } - + for (i in unshared.datasets){ unshared.use <- c() #Provides normalized subset of unshared features @@ -1155,7 +1156,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes #' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with scale.data slot set. -#' +#' #' @import hdf5r #' #' @export @@ -1178,7 +1179,7 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose message(hdf5_files[i]) } chunk_size = chunk - + if (object@h5file.info[[i]][["format.type"]] == "AnnData"){ genes = object@raw.data[[i]][["raw.var"]][]$index } else { @@ -1187,14 +1188,14 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose num_cells = object@h5file.info[[i]][["barcodes"]]$dims num_genes = length(genes) num_entries = object@h5file.info[[i]][["data"]]$dims - + prev_end_col = 1 prev_end_data = 1 prev_end_ind = 0 gene_vars = rep(0,num_genes) gene_means = object@raw.data[[i]][["gene_means"]][1:num_genes] gene_sum_sq = object@raw.data[[i]][["gene_sum_sq"]][1:num_genes] - + gene_inds = which(genes %in% vargenes) gene_root_mean_sum_sq = sqrt(gene_sum_sq/(num_cells-1)) safe_h5_create(object = object, idx = i, dataset_name = "scale.data", dims = c(length(vargenes), num_cells), mode = h5types$double, chunk_size = c(length(vargenes), chunk_size)) @@ -1226,7 +1227,7 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose prev_end_data = prev_end_data + num_read prev_end_ind = tail(start_inds, 1) if (verbose) { - setTxtProgressBar(pb, ind) + setTxtProgressBar(pb, ind) } } object@scale.data[[i]] = object@raw.data[[i]][["scale.data"]] @@ -1238,13 +1239,13 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose names(object@scale.data) <- names(object@raw.data) } else { object@scale.data <- lapply(1:length(object@norm.data), function(i) { - scaleNotCenterFast(t(object@norm.data[[i]][object@var.genes, ])) + scaleNotCenterFast(t(object@norm.data[[i]][object@var.genes, , drop = FALSE])) }) # TODO: Preserve sparseness later on (convert inside optimizeALS) object@scale.data <- lapply(object@scale.data, function(x) { as.matrix(x) }) - + names(object@scale.data) <- names(object@norm.data) for (i in 1:length(object@scale.data)) { object@scale.data[[i]][is.na(object@scale.data[[i]])] <- 0 @@ -1257,9 +1258,9 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose if (length(object@var.unshared.features) != 0){ for (i in 1:length(object@raw.data)){ if (!is.null(object@var.unshared.features[[i]])){ - if (class(object@raw.data[[i]])[1] == "dgTMatrix" | + if (class(object@raw.data[[i]])[1] == "dgTMatrix" || class(object@raw.data[[i]])[1] == "dgCMatrix") { - object@scale.unshared.data[[i]] <- scaleNotCenterFast(t(object@norm.data[[i]][object@var.unshared.features[[i]], ])) + object@scale.unshared.data[[i]] <- scaleNotCenterFast(t(object@norm.data[[i]][object@var.unshared.features[[i]],])) object@scale.unshared.data[[i]] <- as.matrix(object@scale.unshared.data[[i]]) } else { object@scale.unshared.data[[i]] <- scale(t(object@norm.data[[i]][object@var.unshared.features[[i]], ]), center = F, scale = T) @@ -1297,7 +1298,7 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose removeMissingObs <- function(object, slot.use = "raw.data", use.cols = TRUE, verbose = TRUE) { filter.data <- slot(object, slot.use) - removed <- ifelse(((slot.use %in% c("raw.data", "norm.data")) & (use.cols == TRUE)) | + removed <- ifelse(((slot.use %in% c("raw.data", "norm.data")) & (use.cols == TRUE)) | ((slot.use == "scale.data") & (use.cols == FALSE)) , yes = "cells", no = "genes") expressed <- ifelse(removed == "cells", yes = " any genes", no = "") @@ -1424,14 +1425,14 @@ downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed #' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with sample.data slot set. -#' +#' #' @import hdf5r #' #' @export #' @examples #' \dontrun{ #' # Only for online liger object (based on HDF5 files) -#' # Example: sample a total amount of 5000 cells from norm.data for downstream analysis +#' # Example: sample a total amount of 5000 cells from norm.data for downstream analysis #' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) #' } @@ -1453,10 +1454,10 @@ readSubset <- function(object, datasets.use=names(object@H) } cell_inds = downsample(object, balance = balance, max_cells = max.cells, datasets.use = datasets.use, seed = rand.seed, verbose = verbose) - + hdf5_files = names(object@raw.data) #vargenes = object@var.genes - + # find the intersect of genes from each input datasets genes = c() if (slot.use != "scale.data"){ @@ -1471,12 +1472,12 @@ readSubset <- function(object, } else { genes = object@var.genes } - + if(is.null(genes.use)) { genes.use = genes } - + for (i in 1:length(hdf5_files)) { if (verbose) { message(hdf5_files[i]) @@ -1496,20 +1497,20 @@ readSubset <- function(object, } num_cells = length(barcodes) num_genes = length(genes) - + prev_end_col = 1 prev_end_data = 1 prev_end_ind = 0 - - + + #gene_inds = which(genes %in% vargenes) - + num_chunks = ceiling(num_cells/chunk_size) if (verbose) { pb = txtProgressBar(0, num_chunks, style = 3) } ind = 0 - + while (prev_end_col < num_cells) { ind = ind + 1 if (num_cells - prev_end_col < chunk_size) { @@ -1532,7 +1533,7 @@ readSubset <- function(object, use_these = intersect(colnames(one_chunk),cell_inds[[i]]) one_chunk = one_chunk[genes.use,use_these] data.subset = cbind(data.subset,one_chunk) - + num_read = length(counts) prev_end_col = prev_end_col + chunk_size prev_end_data = prev_end_data + num_read @@ -1545,7 +1546,7 @@ readSubset <- function(object, use_these = intersect(colnames(one_chunk),cell_inds[[i]]) one_chunk = one_chunk[genes.use,use_these] data.subset = cbind(data.subset,one_chunk) - + prev_end_col = prev_end_col + chunk_size if (verbose) { setTxtProgressBar(pb, ind) @@ -1572,7 +1573,7 @@ readSubset <- function(object, datasets.use = names(object@H) } cell_inds = downsample(object, balance = balance, max_cells = max.cells, datasets.use = datasets.use, verbose = verbose) - + files = names(object@raw.data) # find the intersect of genes from each input datasets genes = c() @@ -1663,7 +1664,7 @@ readSubset <- function(object, #' @examples #' \dontrun{ #' # Requires preprocessed liger object -#' # Get factorization using 20 factors and mini-batch of 5000 cells +#' # Get factorization using 20 factors and mini-batch of 5000 cells #' # (default setting, can be adjusted for ideal results) #' ligerex <- online_iNMF(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) #' } @@ -1691,7 +1692,7 @@ online_iNMF <- function(object, scale.data_prev = object@scale.data cell.data_prev = object@cell.data names(raw.data_prev) = names(object@raw.data) - + # assuming only one new dataset arrives at a time raw.data = c() norm.data = c() @@ -1710,7 +1711,7 @@ online_iNMF <- function(object, object@h5file.info = h5file.info object@scale.data = scale.data object@cell.data = cell.data - + # check whether X_new needs to be processed for (i in 1:length(object@raw.data)){ if (class(object@raw.data[[i]])[1] == "H5File"){ @@ -1718,7 +1719,7 @@ online_iNMF <- function(object, } else { processed = !is.null(X_new[[i]]@scale.data) } - + if (processed) { if (verbose) { cat("New dataset", i, "already preprocessed.", "\n") @@ -1734,8 +1735,8 @@ online_iNMF <- function(object, } } } - - + + object@raw.data = c(raw.data_prev, object@raw.data) object@norm.data = c(norm.data_prev, object@norm.data) object@h5file.info = c(h5file.info_prev, object@h5file.info) @@ -1746,11 +1747,11 @@ online_iNMF <- function(object, object@V = lapply(object@V, t) object@H = lapply(object@H, t) } - + for (i in 1:length(object@raw.data)){ if (class(object@raw.data[[i]])[1] != "H5File") object@scale.data[[i]] = t(object@scale.data[[i]]) } - + ## extract required information and initialize algorithm num_files = length(object@raw.data) # number of total input hdf5 files num_prev_files = 0 # number of input hdf5 files processed in last step @@ -1765,16 +1766,16 @@ online_iNMF <- function(object, cat(num_new_files, "new datasets detected.", "\n") } } - + file_idx = 1:num_files # indices for all input files file_idx_new = (num_prev_files+1):num_files # indices only for new input files file_idx_prev = setdiff(file_idx,file_idx_new) - + vargenes = object@var.genes file_names = names(object@raw.data) gene_names = vargenes # genes selected for analysis num_genes = length(vargenes) # number of the selected genes - + cell_barcodes = list() # cell barcodes for each dataset for (i in file_idx){ cell_barcodes[[i]] = rownames(object@cell.data)[object@cell.data$dataset == file_names[i]] @@ -1782,7 +1783,7 @@ online_iNMF <- function(object, num_cells = unlist(lapply(cell_barcodes, length)) # number of cells in each dataset num_cells_new = num_cells[(num_prev_files+1):num_files] minibatch_sizes = rep(0, num_files) - + for (i in file_idx_new) { minibatch_sizes[i] = round((num_cells[i]/sum(num_cells[file_idx_new])) * miniBatch_size) if (minibatch_sizes[i] > num_cells[i]){ @@ -1791,13 +1792,13 @@ online_iNMF <- function(object, } } minibatch_sizes_orig = minibatch_sizes - + if (!projection) { - + if(!is.null(seed)){ set.seed(seed) } - + # W matrix initialization if (is.null(X_new)) { object@W = matrix(abs(runif(num_genes * k, 0, 2)), num_genes, k) @@ -1817,7 +1818,7 @@ online_iNMF <- function(object, # nrow = num_genes, # ncol = k) } - + # normalize the columns of H_i, H_s matrices for (j in 1:k){ for (i in file_idx){ # normalize columns of dictionaries @@ -1847,13 +1848,13 @@ online_iNMF <- function(object, # A = HiHi^t, B = XiHit A_old = list() B_old = list() - + if (is.null(X_new)) { object@A = rep(list(matrix(0, k, k)), num_new_files) object@B = rep(list(matrix(0, num_genes, k)), num_new_files) A_old = rep(list(matrix(0, k, k)), num_new_files) # save information older than 2 epochs B_old = rep(list(matrix(0, num_genes, k)), num_new_files) # save information older than 2 epochs - + } else { object@A[file_idx_prev] = if(!is.null(A.init)) A.init else object@A object@B[file_idx_prev] = if(!is.null(B.init)) B.init else object@B @@ -1862,21 +1863,21 @@ online_iNMF <- function(object, object@A[(num_prev_files+1):num_files] = rep(list(matrix(0, k, k)), num_new_files) object@B[(num_prev_files+1):num_files] = rep(list(matrix(0, num_genes, k)), num_new_files) A_old[(num_prev_files+1):num_files] = rep(list(matrix(0, k, k)), num_new_files) # save information older than 2 epochs - B_old[(num_prev_files+1):num_files] = rep(list(matrix(0, k, k)), num_new_files) # save information older than 2 epochs + B_old[(num_prev_files+1):num_files] = rep(list(matrix(0, num_genes, k)), num_new_files) # save information older than 2 epochs } - + iter = 1 epoch = rep(0, num_files) # intialize the number of epoch for each dataset epoch_prev = rep(0, num_files) # intialize the previous number of epoch for each dataset epoch_next = rep(FALSE, num_files) sqrt_lambda = sqrt(lambda) total_time = 0 # track the total amount of time used for the online learning - - + + num_chunks = rep(NULL, num_files) chunk_idx = rep(list(NULL), num_files) all_idx = rep(list(NULL), num_files) - + # chunk permutation for (i in file_idx_new){ num_chunks[i] = ceiling(num_cells[i]/h5_chunk_size) @@ -1887,7 +1888,7 @@ online_iNMF <- function(object, } else { all_idx[[i]] = (1+h5_chunk_size*(chunk_idx[[i]][1]-1)):(num_cells[i]) } - + for (j in chunk_idx[[i]][-1]){ if (j != num_chunks[i]){ all_idx[[i]] = c(all_idx[[i]],(1+h5_chunk_size*(j-1)):(j*h5_chunk_size)) @@ -1896,13 +1897,13 @@ online_iNMF <- function(object, } } } - + total.iters = floor(sum(num_cells_new) * max.epochs / miniBatch_size) if (verbose) { cat("Starting Online iNMF...", "\n") pb <- txtProgressBar(min = 1, max = total.iters+1, style = 3) - } - + } + while(epoch[file_idx_new[1]] < max.epochs) { # track epochs minibatch_idx = rep(list(NULL), num_files) # indices of samples in each dataest used for this iteration @@ -1925,11 +1926,11 @@ online_iNMF <- function(object, } all_idx[[i]] = all_idx[[i]][-1] # remove the first element 0 minibatch_idx[[i]] = c(minibatch_idx[[i]],all_idx[[i]][1:((iter * minibatch_sizes[i]) %% num_cells[i])]) - + } else if ((epoch_prev[i] != epoch[i]) & ((iter * minibatch_sizes[i]) %% num_cells[i] == 0)){ # if current iter finishes this cycle without start a a new cycle epoch_next[i] = TRUE epoch_prev[i] = epoch[i] - + minibatch_idx[[i]] = all_idx[[i]][((((iter-1) * minibatch_sizes[i]) %% num_cells[i]) + 1):num_cells[i]] chunk_idx[[i]] = sample(1:num_chunks[i],num_chunks[i]) all_idx[[i]] = 0 @@ -1952,21 +1953,21 @@ online_iNMF <- function(object, } epoch[file_idx_new[1]] = max.epochs # last epoch } - - + + if (length(minibatch_idx[[file_idx_new[1]]]) == minibatch_sizes_orig[file_idx_new[1]]){ X_minibatch = rep(list(NULL), num_files) for (i in file_idx_new){ X_minibatch[[i]] = object@scale.data[[i]][1:num_genes ,minibatch_idx[[i]]] } - + # update H_i by ANLS Hi_minibatch[[i]] H_minibatch = rep(list(NULL), num_files) for (i in file_idx_new){ H_minibatch[[i]] = solveNNLS(rbind(object@W + object@V[[i]], sqrt_lambda * object@V[[i]]), rbind(X_minibatch[[i]], matrix(0, num_genes, minibatch_sizes[i]))) } - + # updata A and B matrices if (iter == 1){ scale_param = c(rep(0, num_prev_files), rep(0, num_new_files)) @@ -1975,8 +1976,8 @@ online_iNMF <- function(object, } else { scale_param = c(rep(0, num_prev_files), rep((iter - 2) / (iter - 1), num_new_files)) } - - + + if (epoch[file_idx_new[1]] > 0 & epoch_next[file_idx_new[1]] == TRUE){ # remove information older than 2 epochs for (i in file_idx_new){ object@A[[i]] = object@A[[i]] - A_old[[i]] @@ -1990,19 +1991,19 @@ online_iNMF <- function(object, B_old[[i]] = scale_param[i] * B_old[[i]] } } - + for (i in file_idx_new){ object@A[[i]] = scale_param[i] * object@A[[i]] + H_minibatch[[i]] %*% t(H_minibatch[[i]]) / minibatch_sizes[i] # HiHit diag(object@A[[i]])[diag(object@A[[i]])==0] = 1e-15 object@B[[i]] = scale_param[i] * object@B[[i]] + X_minibatch[[i]] %*% t(H_minibatch[[i]]) / minibatch_sizes[i] # XiHit } - - + + # update W, V_i by HALS iter_miniBatch = 1 delta_miniBatch = Inf max_iters_miniBatch = miniBatch_max_iters - + while(iter_miniBatch <= max_iters_miniBatch){ # update W for (j in 1:k){ @@ -2012,10 +2013,10 @@ online_iNMF <- function(object, W_update_numerator = W_update_numerator + object@B[[i]][, j] - (object@W + object@V[[i]]) %*% object@A[[i]][, j] W_update_denominator = W_update_denominator + object@A[[i]][j,j] } - + object@W[, j] = nonneg(object@W[, j] + W_update_numerator / W_update_denominator) } - + # update V_i for (j in 1:k){ for (i in file_idx_new){ @@ -2023,7 +2024,7 @@ online_iNMF <- function(object, ((1 + lambda) * object@A[[i]][j, j])) } } - + iter_miniBatch = iter_miniBatch + 1 } epoch_next = rep(FALSE, num_files) # reset epoch change indicator @@ -2056,15 +2057,15 @@ online_iNMF <- function(object, } colnames(object@H[[i]]) = cell_barcodes[[i]] } - + rownames(object@W) = gene_names colnames(object@W) = NULL - + for (i in file_idx){ rownames(object@V[[i]]) = gene_names colnames(object@V[[i]]) = NULL } - + } else { if (verbose) { cat("Metagene projection", "\n") @@ -2090,7 +2091,7 @@ online_iNMF <- function(object, object@V[[i]] = matrix(0, num_genes, k) } } - + # gene x k -> k x gene & k x cell -> cell x k object@W = t(object@W) object@V = lapply(object@V, t) @@ -2098,7 +2099,7 @@ online_iNMF <- function(object, for (i in 1:length(object@raw.data)){ if (class(object@raw.data[[i]])[1] != "H5File") object@scale.data[[i]] = t(object@scale.data[[i]]) } - + if (!is.null(X_new)){ names(object@scale.data) <- names(object@raw.data) <- c(names(raw.data_prev), names(X_new)) } @@ -2155,6 +2156,7 @@ nonneg <- function(x, eps = 1e-16) { #' @param V.init Initial values to use for V matrices (default NULL) #' @param rand.seed Random seed to allow reproducible results (default 1). #' @param print.obj Print objective function values after convergence (default FALSE). +#' @param use.unshared Whether to run UANLS method to integrate datasets with previously identified unshared variable genes. Have to run selectGenes with unshared = TRUE and scaleNotCenter it. (default FALSE). #' @param verbose Print progress bar/messages (TRUE by default) #' @param ... Arguments passed to other methods #' @@ -2164,7 +2166,7 @@ nonneg <- function(x, eps = 1e-16) { #' @examples #' \dontrun{ #' # Requires preprocessed liger object (only for objected not based on HDF5 files) -#' # Get factorization using 20 factors and mini-batch of 5000 cells +#' # Get factorization using 20 factors and mini-batch of 5000 cells #' # (default setting, can be adjusted for ideal results) #' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 1) #' } @@ -2194,7 +2196,6 @@ optimizeALS.list <- function( W.init = NULL, V.init = NULL, use.unshared = FALSE, - lamda.u = NULL, rand.seed = 1, print.obj = FALSE, verbose = TRUE, @@ -2238,7 +2239,7 @@ optimizeALS.list <- function( nrow = k, ncol = g ) - + V <- lapply( X = 1:N, FUN = function(i) { @@ -2249,7 +2250,7 @@ optimizeALS.list <- function( )) } ) - + H <- lapply( X = ns, FUN = function(n) { @@ -2369,7 +2370,7 @@ optimizeALS.list <- function( sep = "" ) } - + if (verbose) { if (print.obj) { cat("Objective:", obj, "\n") @@ -2410,7 +2411,7 @@ optimizeALS.liger <- function( verbose = TRUE, ... ) { - + if (use.unshared == FALSE){ object <- removeMissingObs( object = object, @@ -2495,7 +2496,7 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters H <- object@H W <- object@W V <- object@V - + if (k.new > k) { set.seed(rand.seed) sqrt_lambda <- sqrt(lambda) @@ -2559,7 +2560,7 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters }) } object <- optimizeALS(object, k.new, - lambda = lambda, thresh = thresh, max.iters = max.iters, H.init = H, + lambda = lambda, thresh = thresh, max.iters = max.iters, H.init = H, W.init = W, V.init = V, rand.seed = rand.seed, verbose = verbose) return(object) } @@ -2737,7 +2738,7 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la object@scale.data[[i]] <- t(object@norm.data[[i]][object@var.genes, ]) } } - + names(object@raw.data) <- names(object@norm.data) <- names(object@H) <- old_names k <- ncol(H[[1]]) object <- optimizeALS(object, k = k, lambda = lambda, thresh = thresh, max.iters = max.iters, @@ -2828,7 +2829,7 @@ optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100 #' suggestLambda(ligerex, k = 20, num.cores = 4) #' } -suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, thresh = 1e-4, +suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, thresh = 1e-4, max.iters = 100, knn_k = 20, k2 = 500, ref_dataset = NULL, resolution = 1, gen.new = FALSE, nrep = 1, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { if (is.null(lambda.test)) { @@ -2888,26 +2889,26 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core parallel::stopCluster(cl) rep_data[[r]] <- data_matrix } - + aligns <- Reduce(cbind, rep_data) if (is.null(dim(aligns))) { aligns <- matrix(aligns, ncol = 1) } mean_aligns <- apply(aligns, 1, mean) - + time_elapsed <- difftime(Sys.time(), time_start, units = "auto") if (verbose) { cat(paste("\nCompleted in:", as.double(time_elapsed), units(time_elapsed))) } # make dataframe df_al <- data.frame(align = mean_aligns, lambda = lambda.test) - + p1 <- ggplot(df_al, aes_string(x = 'lambda', y = 'mean_aligns')) + geom_line(size=1) + geom_point() + theme_classic() + labs(y = 'Alignment', x = 'Lambda') + guides(col = guide_legend(title = "", override.aes = list(size = 2))) + theme(legend.position = 'top') - + if (return.data) { print(p1) if (return.raw) { @@ -3021,13 +3022,13 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, data_matrix <- data_matrix[nrow(data_matrix):1, ] rep_data[[r]] <- data_matrix } - + medians <- Reduce(cbind, lapply(rep_data, function(x) {apply(x, 1, median)})) if (is.null(dim(medians))) { medians <- matrix(medians, ncol = 1) } mean_kls <- apply(medians, 1, mean) - + time_elapsed <- difftime(Sys.time(), time_start, units = "auto") if (verbose) { cat(paste("\nCompleted in:", as.double(time_elapsed), units(time_elapsed))) @@ -3038,13 +3039,13 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, if (!plot.log2) { df_kl <- df_kl[df_kl$calc == 'KL_div', ] } - + p1 <- ggplot(df_kl, aes_string(x = 'k', y = 'median_kl', col = 'calc')) + geom_line(size=1) + geom_point() + theme_classic() + labs(y='Median KL divergence (across all cells)', x = 'K') + guides(col=guide_legend(title="", override.aes = list(size = 2))) + theme(legend.position = 'top') - + if (return.data) { print(p1) if (return.raw) { @@ -3157,7 +3158,7 @@ quantile_norm.list <- function( labels <- lapply(object, max_factor, dims_use = use_these_factors, center_cols = do.center) clusters <- as.factor(unlist(lapply(labels, as.character))) names(clusters) <- unlist(lapply(object, rownames)) - + # increase robustness of cluster assignments using knn graph if (refine.knn) { clusters <- refine_clusts_knn(object, clusters, k = knn_k, eps = eps) @@ -3168,7 +3169,7 @@ quantile_norm.list <- function( }) names(clusters) <- names(object) dims <- ncol(object[[ref_dataset]]) - + dataset <- unlist(lapply(1:length(object), function(i) { rep(names(object)[i], nrow(object[[i]])) })) @@ -3288,13 +3289,13 @@ louvainCluster <- function(object, resolution = 1.0, k = 20, prune = 1 / 15, eps output_path <- paste0('edge_', sub('\\s', '_', Sys.time()), '.txt') output_path = sub(":","_",output_path) output_path = sub(":","_",output_path) - + if (is.null(dims.use)) { use_these_factors <- 1:ncol(object@H[[1]]) } else { use_these_factors <- dims.use } - + if (dim(object@H.norm)[1] == 0){ if (verbose) { message("Louvain Clustering on unnormalized cell factor loadings.") @@ -3410,7 +3411,7 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor cat("NOTE: This function will discard the raw data previously stored in the liger object and", "replace the raw.data slot with the imputed data.\n\n") } - + if (length(reference) > 1) { stop("Can only have ONE reference dataset") } @@ -3444,11 +3445,11 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor } } } - + reference_cells <- colnames(object@raw.data[[reference]]) # cells by genes for (query in queries) { query_cells <- colnames(object@raw.data[[query]]) - + # creating a (reference cell numbers X query cell numbers) weights matrix for knn weights and unit weights nn.k <- get.knnx(object@H.norm[reference_cells, ], object@H.norm[query_cells, ], k = knn_k, algorithm = "CR") weights <- Matrix(0, nrow = ncol(object@raw.data[[reference]]), ncol = nrow(nn.k$nn.index), sparse = TRUE) @@ -3463,13 +3464,13 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor weights[nn.k$nn.index[n, ], n] <- 1/knn_k # simply count the mean } } - + # (genes by ref cell num) multiply by the weight matrix (ref cell num by query cell num) imputed_vals <- object@raw.data[[reference]] %*% weights # assigning dimnames colnames(imputed_vals) <- query_cells rownames(imputed_vals) <- rownames(object@raw.data[[reference]]) - + # formatiing the matrix if (class(object@raw.data[[reference]])[1] == "dgTMatrix" | class(object@raw.data[[reference]])[1] == "dgCMatrix") { @@ -3477,10 +3478,10 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor } else { imputed_vals <- as.matrix(imputed_vals) } - + object@raw.data[[query]] <- imputed_vals } - + if (norm) { if (verbose) { cat('\nNormalizing data...\n') @@ -3493,7 +3494,7 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor } object <- rliger::scaleNotCenter(object) } - + return(object) } @@ -3536,7 +3537,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { stop("Should have at least TWO inputs to compare between datasets") } } - + if (class(object@raw.data[[1]])[1] == "H5File"){ if (is.null(object@h5file.info[[1]][["sample.data.type"]])){ message("Need to sample data before Wilcoxon test for HDF5 input.") @@ -3544,7 +3545,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { message("Running Wilcoxon test on ", object@h5file.info[[1]][["sample.data.type"]]) } } - + ### create feature x sample matrix if (data.use[1] == "all" | length(data.use) > 1) { # at least two datasets if (data.use[1] == "all") { @@ -3588,7 +3589,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { clusters <- object@clusters[colnames(object@sample.data[[data.use]]), drop = TRUE] # from which cluster } } - + ### perform wilcoxon test if (compare.method == "clusters") { # compare between clusters across datasets len <- nrow(feature_matrix) @@ -3601,7 +3602,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { results <- wilcoxauc(log(feature_matrix + 1e-10), clusters) } } - + if (compare.method == "datasets") { # compare between datasets within each cluster results <- Reduce(rbind, lapply(levels(clusters), function(cluster) { sub_barcodes <- names(clusters[clusters == cluster]) # every barcode within this cluster @@ -3641,7 +3642,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { #' @examples #' \dontrun{ #' # some gene counts matrix: gmat.small -#' # some peak counts matrix: pmat.small +#' # some peak counts matrix: pmat.small #' regnet <- linkGenesAndPeaks(gmat.small, pmat.small, dist = "spearman", #' alpha = 0.05, path_to_coords = 'some_path') #' } @@ -3655,14 +3656,14 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist call. = FALSE ) } - + if (!requireNamespace("IRanges", quietly = TRUE)) { stop("Package \"IRanges\" needed for this function to work. Please install it by command:\n", "BiocManager::install('IRanges')", call. = FALSE ) } - + ### make Granges object for peaks peak.names <- strsplit(rownames(peak_counts), "[:-]") chrs <- Reduce(append, lapply(peak.names, function(peak) { @@ -3678,7 +3679,7 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist seqnames = chrs, ranges = IRanges::IRanges(as.numeric(chrs.start), end = as.numeric(chrs.end)) ) - + ### make Granges object for genes gene.names <- read.csv2(path_to_coords, sep = "\t", header = FALSE, stringsAsFactors = FALSE) gene.names <- gene.names[complete.cases(gene.names), ] @@ -3687,11 +3688,11 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist ranges = IRanges::IRanges(as.numeric(gene.names$V2), end = as.numeric(gene.names$V3)) ) names(genes.coords) <- gene.names$V4 - + ### construct regnet gene_counts <- t(gene_counts) # cell x genes peak_counts <- t(peak_counts) # cell x genes - + # find overlap peaks for each gene if (missing(genes.list)) { genes.list <- colnames(gene_counts) @@ -3702,13 +3703,13 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist } genes.list <- genes.list[!missing_genes] genes.coords <- genes.coords[genes.list] - + if (verbose) { message("Calculating correlation for gene-peak pairs...") } each.len <- 0 # assign('each.len', 0, envir = globalenv()) - + elements <- lapply(seq(length(genes.list)), function(pos) { gene.use <- genes.list[pos] # re-scale the window for each gene @@ -3729,7 +3730,7 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist )) pick <- res[["p"]] < alpha # filter by p-value pick[is.na(pick)] <- FALSE - + if (sum(pick) == 0) { # if no peaks are important, skip this iteration return(list(NULL, as.numeric(each.len), NULL)) } @@ -3741,7 +3742,7 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist assign('each.len', each.len + length(peaks.use), envir = parent.frame(2)) return(list(as.numeric(peaks.use), as.numeric(each.len), res.corr)) }) - + i_index <- Reduce(append, lapply(elements, function(ele) { ele[[1]] })) @@ -3751,14 +3752,14 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist value_list <- Reduce(append, lapply(elements, function(ele) { ele[[3]] })) - + # make final sparse matrix regnet <- sparseMatrix( i = i_index, p = p_index, x = value_list, dims = c(ncol(peak_counts), length(genes.list)), dimnames = list(colnames(peak_counts), genes.list) ) - + return(regnet) } @@ -3775,14 +3776,14 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist #' @param path_to_coords Path to the gene coordinates file. #' #' @return An Interact Track file stored in the specified path. -#' +#' #' @importFrom stats complete.cases #' @importFrom utils write.table #' #' @export #' @examples #' \dontrun{ -#' # some gene-peak correlation matrix: regent +#' # some gene-peak correlation matrix: regent #' makeInteractTrack(regnet, path_to_coords = 'some_path_to_gene_coordinates/hg19_genes.bed') #' } @@ -3791,7 +3792,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) if (missing(path_to_coords)) { stop("Parameter 'path_to_coords' cannot be empty.") } - + ### make Granges object for genes genes.coords <- read.csv2(path_to_coords, sep = "\t", header = FALSE, colClasses = @@ -3799,7 +3800,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) ) genes.coords <- genes.coords[complete.cases(genes.coords$V4), ] rownames(genes.coords) <- genes.coords[, 4] - + # split peak names into chrom and coordinates peak.names <- strsplit(rownames(corr.mat), "[:-]") chrs <- Reduce(append, lapply(peak.names, function(peak) { @@ -3811,25 +3812,25 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) chrs.end <- as.numeric(Reduce(append, lapply(peak.names, function(peak) { peak[3] }))) - + # check genes.list if (missing(genes.list)) { genes.list <- colnames(corr.mat) } - + # check output_path if (missing(output_path)) { output_path <- getwd() } - + output_path <- paste0(output_path, "/Interact_Track.bed") track.doc <- paste0('track type=interact name="Interaction Track" description="Gene-Peaks Links"', ' interactDirectional=true maxHeightPixels=200:100:50 visibility=full') write(track.doc, file = output_path) - + genes_not_existed <- 0 filtered_genes <- 0 - + for (gene in genes.list) { if (!gene %in% colnames(corr.mat)) { # if gene not in the corr.mat genes_not_existed <- genes_not_existed + 1 @@ -3840,7 +3841,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) filtered_genes <- filtered_genes + 1 next } - + track <- data.frame( chrom = chrs[peaks.sel], chromStart = chrs.start[peaks.sel], @@ -3868,7 +3869,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) fileEncoding = "" ) } - + message("A total of ", genes_not_existed, " genes do not exist in input matrix.") message("A total of ", filtered_genes, " genes do not have significant correlated peaks.") message("The Interaction Track is stored in Path: ", output_path) @@ -3907,29 +3908,29 @@ runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gen call. = FALSE ) } - + if (!requireNamespace("reactome.db", quietly = TRUE)) { stop("Package \"reactome.db\" needed for this function to work. Please install it by command:\n", "BiocManager::install('reactome.db')", call. = FALSE ) } - + if (!requireNamespace("fgsea", quietly = TRUE)) { stop("Package \"fgsea\" needed for this function to work. Please install it by command:\n", "BiocManager::install('fgsea')", call. = FALSE ) } - + if (length(mat_v) > length(object@V)) { stop("The gene loading input is invalid.", call. = FALSE) } - + if (!.hasSlot(object, "W") | !.hasSlot(object, "V")) { stop("There is no W or V matrix. Please do iNMF first.", call. = FALSE) } - + if (mat_w) { gene_loadings <- object@W if (mat_v) { @@ -3942,11 +3943,11 @@ runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gen object@V[[v]] })) } - + gene_ranks <- t(apply(gene_loadings, MARGIN = 1, function(x) { rank(x) })) - + colnames(gene_ranks) <- sapply(colnames(gene_ranks), toupper) gene_id <- as.character(AnnotationDbi::mapIds(org.Hs.eg.db::org.Hs.eg.db, colnames(gene_ranks), "ENTREZID", "SYMBOL")) colnames(gene_ranks) <- gene_id @@ -4237,7 +4238,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali stop("HDF5-based Liger object requires sampled scale.data for calculating agreement.") } } - + message("Reducing dimensionality using ", dr.method) set.seed(rand.seed) dr <- list() @@ -4258,7 +4259,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali for (i in 1:length(object@H)){ dr[[i]] = icafast(t(object@sample.data[[i]]), nc = ndims)$S } - + } else { dr <- lapply(object@scale.data, function(x) { icafast(x, nc = ndims)$S @@ -4274,7 +4275,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali )$rotation) rownames(dr[[i]]) = colnames(object@sample.data[[i]]) } - + } else { dr <- lapply(object@scale.data, function(x) { suppressWarnings(prcomp_irlba(t(x), @@ -4291,7 +4292,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali n <- sum(ns) jaccard_inds <- c() distorts <- c() - + for (i in 1:length(dr)) { jaccard_inds_i <- c() if (use.aligned) { @@ -4308,7 +4309,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali })) jaccard_inds_i <- jaccard_inds_i[is.finite(jaccard_inds_i)] jaccard_inds <- c(jaccard_inds, jaccard_inds_i) - + distorts <- c(distorts, mean(jaccard_inds_i)) } if (by.dataset) { @@ -4412,10 +4413,10 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel } names(dataset) <- rownames(nmf_factors) dataset <- dataset[sampled_cells] - + num_sampled <- N * min_cells num_same_dataset <- rep(k, num_sampled) - + alignment_per_cell <- c() for (i in 1:num_sampled) { inds <- knn_graph$nn.index[i, ] @@ -4559,7 +4560,7 @@ calcPurity <- function(object, classes.compare, verbose = TRUE) { } clusters <- object@clusters[names(classes.compare)] purity <- sum(apply(table(classes.compare, clusters), 2, max)) / length(clusters) - + return(purity) } @@ -4589,7 +4590,7 @@ getProportionMito <- function(object, use.norm = FALSE) { percent_mito <- unlist(lapply(unname(data.use), function(x) { colSums(x[mito.genes, ]) / colSums(x) }), use.names = TRUE) - + return(percent_mito) } @@ -4620,12 +4621,12 @@ getProportionMito <- function(object, use.norm = FALSE) { #' @param new.order new dataset factor order for plotting. must set reorder.idents = TRUE. #' @param return.plots Return ggplot plot objects instead of printing directly (default FALSE). #' @param legend.fonts.size Controls the font size of the legend. -#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if +#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if #' there are over 100,000 cells to plot. #' #' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to #' console). -#' +#' #' @importFrom ggplot2 ggplot geom_point geom_text ggtitle guides guide_legend aes theme xlab ylab #' @importFrom dplyr %>% group_by summarize #' @importFrom scattermore geom_scattermore @@ -4657,7 +4658,7 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si raster <- FALSE } } - + tsne_df <- data.frame(object@tsne.coords) colnames(tsne_df) <- c("Dim1", "Dim2") tsne_df[['Dataset']] <- unlist(lapply(1:length(object@H), function(x) { @@ -4683,18 +4684,18 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si idx <- sample(1:nrow(tsne_df)) tsne_df <- tsne_df[idx, ] } - - + + if (isTRUE(x = raster)) { p1 <- ggplot(tsne_df, aes_string(x = 'Dim1', y = 'Dim2', color = 'Dataset')) + theme_bw() + theme_cowplot(legend.fonts.size) + geom_scattermore(pointsize = pt.size) + guides(color = guide_legend(override.aes = list(size = legend.size))) - + centers <- tsne_df %>% group_by(.data[['Cluster']]) %>% summarize( Dim1 = median(x = .data[['Dim1']]), Dim2 = median(x = .data[['Dim2']]) ) - + p2 <- ggplot(tsne_df, aes_string(x = 'Dim1', y = 'Dim2', color = 'Cluster')) + theme_cowplot(legend.fonts.size) + geom_scattermore(pointsize = pt.size) + geom_text(data = centers, mapping = aes_string(label = 'Cluster'), colour = "black", size = text.size) + @@ -4703,19 +4704,19 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si p1 <- ggplot(tsne_df, aes_string(x = 'Dim1', y = 'Dim2', color = 'Dataset')) + theme_bw() + theme_cowplot(legend.fonts.size) + geom_point(size = pt.size, stroke = 0.2) + guides(color = guide_legend(override.aes = list(size = legend.size))) - + centers <- tsne_df %>% group_by(.data[['Cluster']]) %>% summarize( Dim1 = median(x = .data[['Dim1']]), Dim2 = median(x = .data[['Dim2']]) ) - + p2 <- ggplot(tsne_df, aes_string(x = 'Dim1', y = 'Dim2', color = 'Cluster')) + theme_cowplot(legend.fonts.size) + geom_point(size = pt.size, stroke = 0.2) + geom_text(data = centers, mapping = aes_string(label = 'Cluster'), colour = "black", size = text.size) + guides(color = guide_legend(override.aes = list(size = legend.size))) } - - + + if (!is.null(title)) { p1 <- p1 + ggtitle(title[1]) p2 <- p2 + ggtitle(title[2]) @@ -4767,12 +4768,12 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si #' #' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to #' console). -#' +#' #' @importFrom ggplot2 ggplot geom_point geom_text ggtitle aes guides guide_legend labs #' scale_color_viridis_c scale_color_gradientn theme xlab ylab #' @importFrom dplyr %>% group_by summarize #' @importFrom stats median -#' +#' #' @export #' @examples #' \dontrun{ @@ -4816,7 +4817,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit p_list <- list() for (sub_df in split(dr_df, f = dr_df$dataset)) { ggp <- ggplot(sub_df, aes_string(x = 'dr1', y = 'dr2', color = 'feature')) + geom_point(size = pt.size) - + # if data is discrete if (discrete) { ggp <- ggp + guides(color = guide_legend(override.aes = list(size = legend.size))) + @@ -4838,7 +4839,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit ggp <- ggp + scale_color_gradientn(colors = cols.use, na.value = zero.color) + labs(col = feature) } - + } if (by.dataset) { base <- as.character(sub_df$dataset[1]) @@ -4860,7 +4861,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit if (by.dataset) { p_list <- p_list[names(object@raw.data)] } - + if (return.plots){ if (length(p_list) == 1) { return(p_list[[1]]) @@ -4919,18 +4920,18 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn Hs_norm <- object@H.norm # restore default settings when the current function exits init_par <- graphics::par(no.readonly = TRUE) - on.exit(graphics::par(init_par)) + on.exit(graphics::par(init_par)) for (i in 1:k) { graphics::par(mfrow = c(2, 1)) top_genes.W <- rownames(W)[order(W[, i], decreasing = TRUE)[1:num.genes]] top_genes.W.string <- paste0(top_genes.W, collapse = ", ") factor_textstring <- paste0("Factor", i) - + plot_title1 <- paste(factor_textstring, "\n", top_genes.W.string, "\n") cols <- rep("gray", times = nrow(Hs_norm)) names(cols) <- rownames(Hs_norm) cols.use <- grDevices::rainbow(length(object@H)) - + for (cl in 1:length(object@H)) { cols[rownames(object@H[[cl]])] <- rep(cols.use[cl], times = nrow(object@H[[cl]])) } @@ -5013,7 +5014,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = dataset1 <- names(object@H)[1] dataset2 <- names(object@H)[2] } - + if(class(object@raw.data[[1]])[1] == "H5File"){ sample.idx = unlist(lapply(object@sample.data, colnames)) H_aligned = object@H.norm[sample.idx, ] @@ -5022,16 +5023,16 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = H_aligned <- object@H.norm tsne_coords <- object@tsne.coords } - + W <- t(object@W) V1 <- t(object@V[[dataset1]]) V2 <- t(object@V[[dataset2]]) W <- pmin(W + V1, W + V2) - + dataset.specificity <- calcDatasetSpecificity(object, dataset1 = dataset1, dataset2 = dataset2, do.plot = do.spec.plot) factors.use <- which(abs(dataset.specificity[[3]]) <= factor.share.thresh) - + markers <- getFactorMarkers(object, dataset1 = dataset1, dataset2 = dataset2, factor.share.thresh = factor.share.thresh, num.genes = num.genes, log.fc.thresh = log.fc.thresh, @@ -5039,7 +5040,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = dataset.specificity = dataset.specificity, verbose = verbose ) - + rownames(W) <- rownames(V1) <- rownames(V2) <- object@var.genes loadings_list <- list(V1, W, V2) names_list <- list(dataset1, "Shared", dataset2) @@ -5054,11 +5055,11 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = factor_ds <- paste("Factor", i, "Dataset Specificity:", dataset.specificity[[3]][i]) p1 <- ggplot(tsne_df, aes_string(x = "Dim1", y = "Dim2", color = factorlab)) + geom_point() + scale_color_gradient(low = "yellow", high = "red") + ggtitle(label = factor_ds) - + top_genes_V1 <- markers[[1]]$gene[markers[[1]]$factor_num == i] top_genes_W <- markers[[2]]$gene[markers[[2]]$factor_num == i] top_genes_V2 <- markers[[3]]$gene[markers[[3]]$factor_num == i] - + top_genes_list <- list(top_genes_V1, top_genes_W, top_genes_V2) plot_list <- lapply(seq_along(top_genes_list), function(x) { top_genes <- top_genes_list[[x]] @@ -5077,7 +5078,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = labs(x = "", y = "") + ggtitle(label = names_list[[x]]) + coord_fixed() + ggplot2::theme_void() return(out_plot) }) - + p2 <- (plot_grid(plotlist = plot_list, align = "hv", nrow = 1) + draw_grob(roundrectGrob( x = 0.33, y = 0.5, width = 0.67, height = 0.70, @@ -5135,7 +5136,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' @param axis.labels Vector of two strings to use as x and y labels respectively (default NULL). #' @param do.title Include top title with cluster and Dataset Specificity (default FALSE). #' @param verbose Print progress bar/messages (TRUE by default) -#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if +#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if #' there are over 100,000 cells to plot. #' #' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to @@ -5180,12 +5181,12 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes raster <- FALSE } } - + if (is.null(dataset1) | is.null(dataset2)) { dataset1 <- names(object@H)[1] dataset2 <- names(object@H)[2] } - + if(class(object@raw.data[[1]])[1] == "H5File"){ sample.idx = unlist(lapply(object@sample.data, colnames)) H_aligned = object@H.norm[sample.idx, ] @@ -5194,20 +5195,20 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes H_aligned <- object@H.norm tsne_coords <- object@tsne.coords } - + W_orig <- t(object@W) V1 <- t(object@V[[dataset1]]) V2 <- t(object@V[[dataset2]]) W <- pmin(W_orig + V1, W_orig + V2) - + dataset.specificity <- calcDatasetSpecificity(object, dataset1 = dataset1, dataset2 = dataset2, do.plot = do.spec.plot ) - + factors.use <- which(abs(dataset.specificity[[3]]) <= factor.share.thresh) - - + + markers <- getFactorMarkers(object, dataset1 = dataset1, dataset2 = dataset2, factor.share.thresh = factor.share.thresh, @@ -5216,7 +5217,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes dataset.specificity = dataset.specificity, verbose = verbose ) - + rownames(W) <- rownames(V1) <- rownames(V2) <- rownames(W_orig) <- object@var.genes loadings_list <- list(V1, W, V2) names_list <- list(dataset1, "Shared", dataset2) @@ -5237,7 +5238,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes } else { values <- NULL } - + if (isTRUE(x = raster)) { p1 <- ggplot(tsne_df, aes_string(x = "Dim1", y = "Dim2", color = factorlab)) + geom_scattermore(pointsize = pt.size) + @@ -5257,15 +5258,15 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes ) + theme_cowplot(12) } - - + + if (!is.null(axis.labels)) { p1 <- p1 + xlab(axis.labels[1]) + ylab(axis.labels[2]) } if (do.title) { p1 <- p1 + ggtitle(label = factor_ds) } - + # subset to specific factor and sort by p-value top_genes_V1 <- markers[[1]][markers[[1]]$factor_num == i, ] top_genes_V1 <- top_genes_V1[order(top_genes_V1$p_value), ]$gene @@ -5273,10 +5274,10 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes top_genes_W <- markers[[2]][markers[[2]]$factor_num == i, ]$gene top_genes_V2 <- markers[[3]][markers[[3]]$factor_num == i, ] top_genes_V2 <- top_genes_V2[order(top_genes_V2$p_value), ]$gene - + top_genes_list <- list(top_genes_V1, top_genes_W, top_genes_V2) # subset down to those which will be shown if sorting by p-val - + top_genes_list <- lapply(top_genes_list, function(x) { if (length(x) > num.genes.show) { # to avoid subset warning @@ -5284,7 +5285,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes } x }) - + plot_list <- lapply(seq_along(top_genes_list), function(x) { top_genes <- top_genes_list[[x]] # make dataframe for cum gene loadings plot @@ -5296,7 +5297,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes if (length(top_genes) == 0) { top_genes <- c("no genes") } - + gene_df <- data.frame( loadings = sorted, xpos = seq(0, 1, length.out = length(sorted)), @@ -5304,7 +5305,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes ) y_lim_text <- max(gene_df$loadings) # plot and annotate with top genes - + out_plot <- ggplot(gene_df, aes_string(x = 'xpos', y = 'loadings')) + geom_point(size = pt.size) + theme_bw() + @@ -5327,7 +5328,7 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes clip = "off" ) + theme(plot.margin = unit(c(1, 4, 1, 1), "lines")) - + if (mark.top.genes) { out_plot <- out_plot + geom_point( data = subset(gene_df, gene_df[['top_k']] == TRUE), @@ -5337,9 +5338,9 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes } return(out_plot) }) - + # p2 <- plot_grid(plotlist = plot_list, nrow = 1) - + return_plots[[i]] <- p1 / (plot_list[[1]] | plot_list[[2]] | plot_list[[3]]) # if can figure out how to make cowplot work, might bring this back # return_plots[[i]] <- plot_grid(p1, p2, nrow = 2, align = "h") @@ -5392,11 +5393,11 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, stop("norm.data should be sampled for making violin plots.") } } - + gene_vals <- c() gene_df <- data.frame(object@tsne.coords) rownames(gene_df) <- names(object@clusters) - + for (i in 1:length(object@raw.data)) { if (class(object@raw.data[[i]])[1] == "H5File"){ if (i %in% methylation.indices) { @@ -5426,7 +5427,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, } } } - + gene_df$Gene <- as.numeric(gene_vals[rownames(gene_df)]) colnames(gene_df) <- c("Dim1", "Dim2", "gene") gene_plots <- list() @@ -5503,7 +5504,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' @param do.legend Display legend on plots (default TRUE). #' @param return.plots Return ggplot objects instead of printing directly (default FALSE). #' @param keep.scale Maintain min/max color scale across all plots when using plot.by (default FALSE) -#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if +#' @param raster Rasterization of points (default NULL). Automatically convert to raster format if #' there are over 100,000 cells to plot. #' #' @return If returning single plot, returns ggplot object; if returning multiple plots; returns @@ -5538,7 +5539,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by warning("Provided values for plot.by and scale.by do not match; results may not be very interpretable.") } - + # check raster and set by number of cells total if NULL if (is.null(x = raster)) { if (nrow(x = object@cell.data) > 1e5) { @@ -5549,8 +5550,8 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by raster <- FALSE } } - - + + if (use.raw) { if (is.null(log2scale)) { log2scale <- FALSE @@ -5622,7 +5623,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by max_exp_val <- max(gene_vals, na.rm = TRUE) min_exp_val <- min(gene_vals, na.rm = TRUE) } - + if (class(object@raw.data[[1]])[1] == "H5File") { cells <- unlist(lapply(object@sample.data, colnames)) dr_df <- data.frame(object@tsne.coords[cells,]) @@ -5635,7 +5636,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by # get dr limits for later lim1 <- c(min(dr_df$dr1), max(dr_df$dr1)) lim2 <- c(min(dr_df$dr2), max(dr_df$dr2)) - + if (plot.by != 'none') { if (!(plot.by %in% colnames(object@cell.data))) { stop("Please select existing feature in cell.data to plot.by, or add it before calling.") @@ -5683,7 +5684,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by } sub_df$gene[sub_df$gene < min_v & !is.na(sub_df$gene)] <- min_v sub_df$gene[sub_df$gene > max_v & !is.na(sub_df$gene)] <- max_v - + if (isTRUE(x = raster)) { ggp <- ggplot(sub_df, aes_string(x = 'dr1', y = 'dr2', color = 'gene')) + geom_scattermore(pointsize = pt.size) + labs(col = gene) @@ -5691,7 +5692,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by ggp <- ggplot(sub_df, aes_string(x = 'dr1', y = 'dr2', color = 'gene')) + geom_point(size = pt.size) + labs(col = gene) } - + if (!is.null(cols.use)) { if (keep.scale) { ggp <- ggp + scale_color_gradientn(colors = cols.use, @@ -5716,14 +5717,14 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by if (set.dr.lims) { ggp <- ggp + xlim(lim1) + ylim(lim2) } - + if (plot.by != 'none') { base <- as.character(sub_df$plotby[1]) } else { base <- "" } ggp <- ggp + ggtitle(base) - + if (!is.null(axis.labels)) { ggp <- ggp + xlab(axis.labels[1]) + ylab(axis.labels[2]) } @@ -5746,7 +5747,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by if (plot.by == 'dataset') { p_list <- p_list[names(object@raw.data)] } - + if (return.plots){ if (length(p_list) == 1) { return(p_list[[1]]) @@ -5819,11 +5820,9 @@ plotGenes <- function(object, genes, ...) { #' @param node.order Order of clusters in each set (list with three vectors of ordinal numbers). #' By default will try to automatically order them appropriately. #' -#' @return A riverplot object +#' @return NULL for now. Could be back if CRAN dependency riverplot is back. #' #' @importFrom plyr mapvalues -#' @importFrom riverplot makeRiver -#' @importFrom riverplot riverplot #' @importFrom grDevices hcl #' @importFrom utils capture.output #' @@ -5844,122 +5843,124 @@ makeRiverplot <- function(object, cluster1, cluster2, cluster_consensus = NULL, min.cells = 10, river.yscale = 1, river.lty = 0, river.node_margin = 0.1, label.cex = 1, label.col = "black", lab.srt = 0, river.usr = NULL, node.order = "auto") { - cluster1 <- droplevels(cluster1) - cluster2 <- droplevels(cluster2) - if (is.null(cluster_consensus)) { - cluster_consensus <- droplevels(object@clusters) - } - # Make cluster names unique if necessary - if (length(intersect(levels(cluster1), levels(cluster2))) > 0 | - length(intersect(levels(cluster1), levels(cluster_consensus))) > 0 | - length(intersect(levels(cluster2), levels(cluster_consensus))) > 0) { - message("Duplicate cluster names detected. Adding 1- and 2- to make unique names.") - cluster1 <- mapvalues(cluster1, from = levels(cluster1), - to = paste("1", levels(cluster1), sep = "-")) - cluster2 <- mapvalues(cluster2, from = levels(cluster2), - to = paste("2", levels(cluster2), sep = "-")) - } - cluster1 <- cluster1[intersect(names(cluster1), names(cluster_consensus))] - cluster2 <- cluster2[intersect(names(cluster2), names(cluster_consensus))] - - # set node order - if (identical(node.order, "auto")) { - tab.1 <- table(cluster1, cluster_consensus[names(cluster1)]) - tab.1 <- sweep(tab.1, 1, rowSums(tab.1), "/") - tab.2 <- table(cluster2, cluster_consensus[names(cluster2)]) - tab.2 <- sweep(tab.2, 1, rowSums(tab.2), "/") - whichmax.1 <- apply(tab.1, 1, which.max) - whichmax.2 <- apply(tab.2, 1, which.max) - ord.1 <- order(whichmax.1) - ord.2 <- order(whichmax.2) - cluster1 <- factor(cluster1, levels = levels(cluster1)[ord.1]) - cluster2 <- factor(cluster2, levels = levels(cluster2)[ord.2]) - } else { - if (is.list(node.order)) { - cluster1 <- factor(cluster1, levels = levels(cluster1)[node.order[[1]]]) - cluster_consensus <- factor(cluster_consensus, - levels = levels(cluster_consensus)[node.order[[2]]]) - cluster2 <- factor(cluster2, levels = levels(cluster2)[node.order[[3]]]) - } - } - cluster1 <- cluster1[!is.na(cluster1)] - cluster2 <- cluster2[!is.na(cluster2)] - nodes1 <- levels(cluster1)[table(cluster1) > 0] - nodes2 <- levels(cluster2)[table(cluster2) > 0] - nodes_middle <- levels(cluster_consensus)[table(cluster_consensus) > 0] - node_Xs <- c( - rep(1, length(nodes1)), rep(2, length(nodes_middle)), - rep(3, length(nodes2)) - ) - - # first set of edges - edge_list <- list() - for (i in 1:length(nodes1)) { - temp <- list() - i_cells <- names(cluster1)[cluster1 == nodes1[i]] - for (j in 1:length(nodes_middle)) { - if (length(which(cluster_consensus[i_cells] == nodes_middle[j])) / length(i_cells) > min.frac & - length(which(cluster_consensus[i_cells] == nodes_middle[j])) > min.cells) { - temp[[nodes_middle[j]]] <- sum(cluster_consensus[i_cells] == - nodes_middle[j]) / length(cluster1) - } - } - edge_list[[nodes1[i]]] <- temp - } - # second set of edges - cluster3 <- cluster_consensus[names(cluster2)] - for (i in 1:length(nodes_middle)) { - temp <- list() - i_cells <- names(cluster3)[cluster3 == nodes_middle[i]] - for (j in 1:length(nodes2)) { - j_cells <- names(cluster2)[cluster2 == nodes2[j]] - if (length(which(cluster_consensus[j_cells] == nodes_middle[i])) / length(j_cells) > min.frac & - length(which(cluster_consensus[j_cells] == nodes_middle[i])) > min.cells) { - if (!is.na(sum(cluster2[i_cells] == nodes2[j]))) { - temp[[nodes2[j]]] <- sum(cluster2[i_cells] == - nodes2[j]) / length(cluster2) - } - } - } - edge_list[[nodes_middle[i]]] <- temp - } - # set cluster colors - node_cols <- list() - ggplotColors <- function(g) { - d <- 360 / g - h <- cumsum(c(15, rep(d, g - 1))) - grDevices::hcl(h = h, c = 100, l = 65) - } - pal <- ggplotColors(length(nodes1)) - for (i in 1:length(nodes1)) { - node_cols[[nodes1[i]]] <- list(col = pal[i], textcex = label.cex, - textcol = label.col, srt = lab.srt) - } - pal <- ggplotColors(length(nodes_middle)) - for (i in 1:length(nodes_middle)) { - node_cols[[nodes_middle[i]]] <- list(col = pal[i], textcex = label.cex, - textcol = label.col, srt = lab.srt) - } - pal <- ggplotColors(length(nodes2)) - for (i in 1:length(nodes2)) { - node_cols[[nodes2[i]]] <- list(col = pal[i], textcex = label.cex, - textcol = label.col, srt = lab.srt) - } - # create nodes and riverplot object - nodes <- list(nodes1, nodes_middle, nodes2) - node.limit <- max(unlist(lapply(nodes, length))) - - node_Ys <- lapply(1:length(nodes), function(i) { - seq(1, node.limit, by = node.limit / length(nodes[[i]])) - }) - rp <- makeRiver(c(nodes1, nodes_middle, nodes2), edge_list, - node_xpos = node_Xs, node_ypos = unlist(node_Ys), node_styles = node_cols - ) + .Deprecated(NULL, msg = "Cran package riverplot is archived, we have to disable this function for now.") + return(NULL) + # cluster1 <- droplevels(cluster1) + # cluster2 <- droplevels(cluster2) + # if (is.null(cluster_consensus)) { + # cluster_consensus <- droplevels(object@clusters) + # } + # # Make cluster names unique if necessary + # if (length(intersect(levels(cluster1), levels(cluster2))) > 0 | + # length(intersect(levels(cluster1), levels(cluster_consensus))) > 0 | + # length(intersect(levels(cluster2), levels(cluster_consensus))) > 0) { + # message("Duplicate cluster names detected. Adding 1- and 2- to make unique names.") + # cluster1 <- mapvalues(cluster1, from = levels(cluster1), + # to = paste("1", levels(cluster1), sep = "-")) + # cluster2 <- mapvalues(cluster2, from = levels(cluster2), + # to = paste("2", levels(cluster2), sep = "-")) + # } + # cluster1 <- cluster1[intersect(names(cluster1), names(cluster_consensus))] + # cluster2 <- cluster2[intersect(names(cluster2), names(cluster_consensus))] + # + # # set node order + # if (identical(node.order, "auto")) { + # tab.1 <- table(cluster1, cluster_consensus[names(cluster1)]) + # tab.1 <- sweep(tab.1, 1, rowSums(tab.1), "/") + # tab.2 <- table(cluster2, cluster_consensus[names(cluster2)]) + # tab.2 <- sweep(tab.2, 1, rowSums(tab.2), "/") + # whichmax.1 <- apply(tab.1, 1, which.max) + # whichmax.2 <- apply(tab.2, 1, which.max) + # ord.1 <- order(whichmax.1) + # ord.2 <- order(whichmax.2) + # cluster1 <- factor(cluster1, levels = levels(cluster1)[ord.1]) + # cluster2 <- factor(cluster2, levels = levels(cluster2)[ord.2]) + # } else { + # if (is.list(node.order)) { + # cluster1 <- factor(cluster1, levels = levels(cluster1)[node.order[[1]]]) + # cluster_consensus <- factor(cluster_consensus, + # levels = levels(cluster_consensus)[node.order[[2]]]) + # cluster2 <- factor(cluster2, levels = levels(cluster2)[node.order[[3]]]) + # } + # } + # cluster1 <- cluster1[!is.na(cluster1)] + # cluster2 <- cluster2[!is.na(cluster2)] + # nodes1 <- levels(cluster1)[table(cluster1) > 0] + # nodes2 <- levels(cluster2)[table(cluster2) > 0] + # nodes_middle <- levels(cluster_consensus)[table(cluster_consensus) > 0] + # node_Xs <- c( + # rep(1, length(nodes1)), rep(2, length(nodes_middle)), + # rep(3, length(nodes2)) + # ) + # + # # first set of edges + # edge_list <- list() + # for (i in 1:length(nodes1)) { + # temp <- list() + # i_cells <- names(cluster1)[cluster1 == nodes1[i]] + # for (j in 1:length(nodes_middle)) { + # if (length(which(cluster_consensus[i_cells] == nodes_middle[j])) / length(i_cells) > min.frac & + # length(which(cluster_consensus[i_cells] == nodes_middle[j])) > min.cells) { + # temp[[nodes_middle[j]]] <- sum(cluster_consensus[i_cells] == + # nodes_middle[j]) / length(cluster1) + # } + # } + # edge_list[[nodes1[i]]] <- temp + # } + # # second set of edges + # cluster3 <- cluster_consensus[names(cluster2)] + # for (i in 1:length(nodes_middle)) { + # temp <- list() + # i_cells <- names(cluster3)[cluster3 == nodes_middle[i]] + # for (j in 1:length(nodes2)) { + # j_cells <- names(cluster2)[cluster2 == nodes2[j]] + # if (length(which(cluster_consensus[j_cells] == nodes_middle[i])) / length(j_cells) > min.frac & + # length(which(cluster_consensus[j_cells] == nodes_middle[i])) > min.cells) { + # if (!is.na(sum(cluster2[i_cells] == nodes2[j]))) { + # temp[[nodes2[j]]] <- sum(cluster2[i_cells] == + # nodes2[j]) / length(cluster2) + # } + # } + # } + # edge_list[[nodes_middle[i]]] <- temp + # } + # # set cluster colors + # node_cols <- list() + # ggplotColors <- function(g) { + # d <- 360 / g + # h <- cumsum(c(15, rep(d, g - 1))) + # grDevices::hcl(h = h, c = 100, l = 65) + # } + # pal <- ggplotColors(length(nodes1)) + # for (i in 1:length(nodes1)) { + # node_cols[[nodes1[i]]] <- list(col = pal[i], textcex = label.cex, + # textcol = label.col, srt = lab.srt) + # } + # pal <- ggplotColors(length(nodes_middle)) + # for (i in 1:length(nodes_middle)) { + # node_cols[[nodes_middle[i]]] <- list(col = pal[i], textcex = label.cex, + # textcol = label.col, srt = lab.srt) + # } + # pal <- ggplotColors(length(nodes2)) + # for (i in 1:length(nodes2)) { + # node_cols[[nodes2[i]]] <- list(col = pal[i], textcex = label.cex, + # textcol = label.col, srt = lab.srt) + # } + # # create nodes and riverplot object + # nodes <- list(nodes1, nodes_middle, nodes2) + # node.limit <- max(unlist(lapply(nodes, length))) + # + # node_Ys <- lapply(1:length(nodes), function(i) { + # seq(1, node.limit, by = node.limit / length(nodes[[i]])) + # }) + # rp <- makeRiver(c(nodes1, nodes_middle, nodes2), edge_list, + # node_xpos = node_Xs, node_ypos = unlist(node_Ys), node_styles = node_cols + # ) # prevent normal riverplot output being printed to console - invisible(capture.output(riverplot(rp, - yscale = river.yscale, lty = river.lty, - node_margin = river.node_margin, usr = river.usr - ))) + # invisible(capture.output(riverplot(rp, + # yscale = river.yscale, lty = river.lty, + # node_margin = river.node_margin, usr = river.usr + # ))) } #' Plot cluster proportions by dataset @@ -5986,7 +5987,7 @@ makeRiverplot <- function(object, cluster1, cluster2, cluster_consensus = NULL, #' } plotClusterProportions <- function(object, return.plot = FALSE) { - + sample_names <- unlist(lapply(seq_along(object@H), function(i) { rep(names(object@H)[i], nrow(object@H[[i]])) })) @@ -6069,7 +6070,7 @@ plotClusterFactors <- function(object, use.aligned = FALSE, Rowv = NA, Colv = "R for (cluster in levels(object@clusters)) { cluster.bars[[cluster]] <- colSums(row.scaled[names(object@clusters) [which(object@clusters == cluster)], ]) - + } cluster.bars <- Reduce(rbind, cluster.bars) if (is.null(col)) { @@ -6115,7 +6116,7 @@ plotClusterFactors <- function(object, use.aligned = FALSE, Rowv = NA, Colv = "R #' @return List of shared and specific factors. First three elements are dataframes of dataset1- #' specific, shared, and dataset2-specific markers. Last two elements are tables indicating the #' number of factors in which marker appears. -#' +#' #' @importFrom stats wilcox.test #' #' @export @@ -6142,14 +6143,14 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh dataset2 = dataset2, do.plot = FALSE) } factors.use <- which(abs(dataset.specificity[[3]]) <= factor.share.thresh) - + if (length(factors.use) < 2 && verbose) { message( "Warning: only ", length(factors.use), " factors passed the dataset specificity threshold." ) } - + Hs_scaled <- lapply(object@H, function(x) { scale(x, scale = TRUE, center = TRUE) }) @@ -6165,13 +6166,13 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh } } names(labels) <- names(object@H) - + V1_matrices <- list() V2_matrices <- list() W_matrices <- list() for (j in 1:length(factors.use)) { i <- factors.use[j] - + W <- t(object@W) V1 <- t(object@V[[dataset1]]) V2 <- t(object@V[[dataset2]]) @@ -6189,7 +6190,7 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh expr_mat = Reduce(cbind, object@sample.data[c(dataset1,dataset2)])[object@var.genes, c(labels[[dataset1]] == i, labels[[dataset2]] == i)] cell_label = rep(c(dataset1, dataset2), c(sum(labels[[dataset1]] == i), sum(labels[[dataset2]] == i))) wilcoxon_result = wilcoxauc(log(expr_mat + 1e-10), cell_label) - + } else { expr_mat = cbind(object@norm.data[[dataset1]][object@var.genes, labels[[dataset1]] == i], object@norm.data[[dataset2]][object@var.genes, labels[[dataset2]] == i]) @@ -6200,11 +6201,11 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh names(log2fc) = wilcoxon_result[wilcoxon_result$group == dataset1, ]$feature filtered_genes_V1 = wilcoxon_result[wilcoxon_result$logFC > log.fc.thresh & wilcoxon_result$pval < pval.thresh, ]$feature filtered_genes_V2 = wilcoxon_result[-wilcoxon_result$logFC > log.fc.thresh & wilcoxon_result$pval < pval.thresh, ]$feature - + W <- pmin(W + V1, W + V2) V1 <- V1[filtered_genes_V1, , drop = FALSE] V2 <- V2[filtered_genes_V2, , drop = FALSE] - + if (length(filtered_genes_V1) == 0) { top_genes_V1 <- character(0) } else { @@ -6222,7 +6223,7 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh top_genes_W <- row.names(W)[order(W[, i], decreasing = TRUE)[1:num.genes] ] top_genes_W <- top_genes_W[!is.na(top_genes_W)] top_genes_W <- top_genes_W[which(W[top_genes_W, i] > 0)] - + if (print.genes && verbose) { message("Factor ", i) message('Dataset 1') @@ -6232,7 +6233,7 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh message('Dataset 2') message(top_genes_V2) } - + pvals <- list() # order is V1, V2, W top_genes <- list(top_genes_V1, top_genes_V2, top_genes_W) for (k in 1:length(top_genes)) { @@ -6297,7 +6298,7 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh #' @param by.dataset Include dataset of origin in cluster identity in Seurat object (default FALSE). #' #' @return Seurat object with raw.data, scale.data, dr$tsne, dr$inmf, and ident slots set. -#' +#' #' @import Matrix #' @importFrom methods new #' @importFrom utils packageVersion @@ -6375,7 +6376,7 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use } else { ident.use <- as.character(object@clusters) } - + if (maj_version < 3) { if (use.liger.genes) { new.seurat@var.genes <- var.genes @@ -6384,7 +6385,7 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use new.seurat@dr$tsne <- tsne.obj new.seurat@dr$inmf <- inmf.obj new.seurat <- Seurat::SetIdent(new.seurat, ident.use = ident.use) - + } else { if (use.liger.genes) { Seurat::VariableFeatures(new.seurat) <- var.genes @@ -6394,7 +6395,7 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use new.seurat[['inmf']] <- inmf.obj Seurat::Idents(new.seurat) <- ident.use } - + return(new.seurat) } @@ -6439,7 +6440,7 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use #' afterwards. (default FALSE) #' #' @return \code{liger} object. -#' +#' #' @import Matrix #' #' @export @@ -6467,7 +6468,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project call. = FALSE ) } - + # Remind to set combined.seurat if ((typeof(objects) != "list") & (!combined.seurat)) { stop("Please pass a list of objects or set combined.seurat = TRUE") @@ -6485,7 +6486,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project version <- version[1] } } - + # Only a single seurat object expected if combined.seurat if (combined.seurat) { if ((is.null(meta.var)) & (is.null(assays.use))) { @@ -6516,7 +6517,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project }) names(raw.data) <- assays.use } - + if (version > 2) { var.genes <- Seurat::VariableFeatures(objects) idents <- Seurat::Idents(objects) @@ -6566,7 +6567,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project }) # tsne coords not very meaningful for separate objects tsne.coords <- NULL - + if (version > 2) { var.genes <- Reduce(union, lapply(objects, function(x) { Seurat::VariableFeatures(x) @@ -6610,7 +6611,7 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project var.genes <- var.genes[rowSums(new.liger@raw.data[[i]][var.genes, ]) > 0] var.genes <- var.genes[!is.na(var.genes)] } - + new.liger@var.genes <- var.genes } if (use.idents) { @@ -6711,7 +6712,7 @@ subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.mi if (ncol(a@cell.data) < ncol(object@cell.data)) { a@cell.data <- droplevels(data.frame(object@cell.data[cell.names, ])) } - + a@W <- object@W a@V <- object@V a@var.genes <- object@var.genes @@ -6732,7 +6733,7 @@ subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.mi #' @param ... Additional parameters passed on to createLiger. #' #' @return \code{liger} object with rearranged raw.data slot. -#' +#' #' @import Matrix #' #' @export @@ -6757,10 +6758,10 @@ reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "o } orig.data <- object@cell.data colnames(orig.data)[colnames(orig.data) == "dataset"] <- new.label - + # make this less memory intensive for large datasets all.data <- MergeSparseDataAll(object@raw.data) - + new.raw <- lapply(levels(orig.data[[by.feature]]), function(x) { cells.keep <- rownames(orig.data)[which(orig.data[[by.feature]] == x)] all.data[, cells.keep] @@ -6769,7 +6770,7 @@ reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "o rm(all.data) gc() new.object <- createLiger(raw.data = new.raw, ...) - + if (keep.meta) { cols.to.add <- setdiff(colnames(orig.data), colnames(new.object@cell.data)) cols.to.add <- cols.to.add[which(cols.to.add != by.feature)] @@ -6792,7 +6793,7 @@ reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "o #' @param verbose Print progress bar/messages (TRUE by default) #' #' @return Updated \code{liger} object. -#' +#' #' @importFrom methods .hasSlot slot slotNames #' #' @export @@ -6810,7 +6811,7 @@ convertOldLiger = function(object, override.raw = FALSE, verbose = TRUE) { slots_exist <- sapply(slots_new, function(x) { .hasSlot(object, x) }) - + slots <- slots_new[slots_exist] for (slotname in slots) { if (!(slotname %in% c('raw.data')) | (override.raw)) { @@ -6839,17 +6840,17 @@ convertOldLiger = function(object, override.raw = FALSE, verbose = TRUE) { #' factorizations of the same dataset can be run with one rep if necessary. (default 1) #' @param rand.seed Random seed to allow reproducible results (default 1). #' @param print.obj Print objective function values after convergence (default FALSE). -#' @param vectorized.lamba Whether or not to expect a vectorized lambda parameter -#' ########################################################################## +#' @param vectorized.lamba Whether or not to expect a vectorized lambda parameter +#' @noRd optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e-10,rand.seed=1, print.obj = FALSE, vectorized.lambda = FALSE){ - + set.seed(seed =rand.seed) #Account for vectorized lambda - print('Performing Factorization using UINMF and unshared features') + print('Performing Factorization using UINMF and unshared features') if (vectorized.lambda == FALSE){ lambda = rep(lambda, length(names(object@raw.data))) } - + # Get a list of all the matrices mlist = list() xdim = list() @@ -6857,7 +6858,7 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- mlist[[i]] = t(object@scale.data[[i]]) xdim[[i]] = dim(mlist[[i]]) } - + #return what datasets have unshared features, and the dimensions of those unshared features u_dim <- c() max_feats = 0 @@ -6877,14 +6878,14 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- ############## For every set of additional features less than the maximum, append an additional zero matrix s.t. it matches the maximum for (i in 1:length(object@scale.data)){ if (i %in% unshared){ - mlist[[i]] <- rbind(mlist[[i]],object@scale.unshared.data[[i]]) + mlist[[i]] <- rbind(mlist[[i]],object@scale.unshared.data[[i]]) } #For the U matrix with the maximum amount of features, append the whole thing else { mlist[[i]] <- rbind(mlist[[i]]) } } - + X <- mlist ################# Create an 0 matrix the size of U for all U's, s.t. it can be stacked to W zero_matrix_u_full <- c() @@ -6895,14 +6896,14 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- zero_matrix_u_partial[[i]] <- matrix(0, nrow = u_dim[[i]][1], ncol = k) } } - + num_cells = c() for (i in 1:length(X)){ num_cells = c(num_cells, ncol(X[[i]])) } - + num_genes = length(object@var.genes) - + best_obj <- Inf for (i in 1:nrep){ print("Processing") @@ -6913,38 +6914,38 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- idX[[i]] = sample(1:num_cells[i], k) } V = list() - + #Establish V from only the RNA dimensions - + for (i in 1:length(X)){ V[[i]] = t(object@scale.data[[i]])[,idX[[i]]] } #Establish W from the shared gene dimensions - - W = matrix(abs(runif(num_genes * k, 0, 2)), num_genes, k) - + + W = matrix(abs(runif(num_genes * k, 0, 2)), num_genes, k) + H = list() - - #Initialize U + + #Initialize U U = list() for (i in 1:length(X)){ if (i %in% unshared){ U[[i]] = t(ulist[[i]])[,idX[[i]]] } } - + iter = 0 - total_time = 0 + total_time = 0 pb <- txtProgressBar(min = 0, max = max.iters, style = 3) sqrt_lambda = list() for (i in 1:length(X)){ sqrt_lambda[[i]]= sqrt(lambda[[i]]) } - ############################ Initial Training Objects - + ############################ Initial Training Objects + obj_train_approximation = 0 obj_train_penalty = 0 - + for (i in 1:length(X)){ H[[i]] = matrix(abs(runif(k * num_cells[i], 0, 2)), k, num_cells[i]) if (i %in% unshared){ @@ -6954,21 +6955,21 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- else { obj_train_approximation = obj_train_approximation + norm(X[[i]] - (W+ V[[i]]) %*% H[[i]],"F")^2 obj_train_penalty = obj_train_penalty + lambda[[i]]*norm(V[[i]]%*% H[[i]], "F")^2 - + } } obj_train = obj_train_approximation + obj_train_penalty - - ######################### Initialize Object Complete ########################### + + ######################### Initialize Object Complete ########################### ########################## Begin Updates######################################## delta = Inf objective_value_list = list() - - iter = 1 + + iter = 1 while(delta > thresh & iter <= max.iters){ iter_start_time = Sys.time() - - + + #H- Updates for (i in 1:length(X)){ if (!(i %in% unshared)){ @@ -6978,20 +6979,20 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- H[[i]] = solveNNLS(rbind(rbind(W,zero_matrix_u_partial[[i]]) + rbind((V[[i]]),U[[i]]), sqrt_lambda[[i]] * rbind(V[[i]],U[[i]])), rbind((X[[i]]), matrix(0, num_genes+ u_dim[[i]][1], xdim[[i]][2]))) } } - + #V - updates for (i in 1:length(X)){ V[[i]] = t(solveNNLS(rbind(t(H[[i]]), sqrt_lambda[[i]] * t(H[[i]])), rbind(t(X[[i]][0:num_genes,] - W %*% H[[i]]), matrix(0, num_cells[i], num_genes)))) } ################################################# Updating U################################## - + for (i in 1:length(X)){ if (i %in% unshared){ U[[i]] = t(solveNNLS(rbind(t(H[[i]]),sqrt_lambda[[i]]* t(H[[i]])), rbind(t(X[[i]][(num_genes+1):(u_dim[[i]][1]+num_genes), ]),t(zero_matrix_u_full[[i]])))) } } - - + + ############################################################################################## ################################################# Updating W ################################# H_t_stack = c() @@ -7003,19 +7004,19 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- diff_stack_w = rbind(diff_stack_w,t(X[[i]][0:num_genes,] - V[[i]] %*% H[[i]])) } W = t(solveNNLS(H_t_stack, diff_stack_w)) - - ############################################################################################ + + ############################################################################################ iter_end_time = Sys.time() iter_time = as.numeric(difftime(iter_end_time, iter_start_time, units = "secs")) total_time = total_time + iter_time - + #Updating training object obj_train_prev = obj_train obj_train_approximation = 0 obj_train_penalty = 0 - - - + + + for (i in 1:length(X)){ if (i %in% unshared){ obj_train_approximation = obj_train_approximation + norm(X[[i]] - (rbind(W,zero_matrix_u_partial[[i]]) + rbind(V[[i]],U[[i]])) %*% H[[i]],"F")^2 @@ -7024,10 +7025,10 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- else { obj_train_approximation = obj_train_approximation + norm(X[[i]] - (W+ V[[i]]) %*% H[[i]],"F")^2 obj_train_penalty = obj_train_penalty + lambda[[i]]*norm(V[[i]]%*% H[[i]], "F")^2 - + } } - + obj_train = obj_train_approximation + obj_train_penalty delta = abs(obj_train_prev-obj_train)/mean(c(obj_train_prev,obj_train)) iter = iter + 1 @@ -7041,13 +7042,13 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- V_m <- V U_m <- U best_obj <- obj_train - best_seed <- current + best_seed <- current } } - + rownames(W_m) = rownames(X[[1]][0:xdim[[i]][1],]) colnames(W_m) = NULL - + for (i in 1:length(X)){ if (i %in% unshared){ rownames(U_m[[i]]) = rownames(X[[i]][(num_genes+1):(u_dim[[i]][1]+num_genes), ]) @@ -7056,8 +7057,8 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- rownames(V_m[[i]]) = rownames(X[[i]][0:xdim[[i]][1],]) colnames(V_m[[i]]) = NULL colnames(H_m[[i]]) = colnames(X[[i]]) - } - + } + ################################## Returns Results Section ######################################################### object@W <- t(W_m) for (i in 1:length(X)){ @@ -7076,13 +7077,13 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- if (print.obj) { cat("\n", "Objective:", best_obj, "\n") } - + rel_cells = list() for (i in 1:length(X)){ rel_cells <- c(rel_cells, rownames(object@scale.data[[i]])) } rel_cells <- unlist(rel_cells) - + object@cell.data <- object@cell.data[rel_cells,] cat("\n", "Best results with seed ", best_seed, ".\n", sep = "") return (object) @@ -7092,13 +7093,13 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- #' Calculate loadings for each factor #' -#' Calculates the contribution of each factor of W,V, and U to the reconstruction. +#' Calculates the contribution of each factor of W,V, and U to the reconstruction. #' #' @param object \code{liger} object. Should call quantileNorm before calling. #' @return A dataframe, such that each column represents the contribution of a specific matrix (W, V_1, V_2, etc. ) #' @export calcNormLoadings = function(object) { - H_norm = object@H.norm + H_norm = object@H.norm W_norm = object@W V_norm = object@V U_norm = object@U @@ -7119,7 +7120,7 @@ calcNormLoadings = function(object) { hw = hi %*% wi forb_hw = norm(hw, type = "F")/dim(W_norm)[[2]] w_loadings = append(w_loadings, forb_hw) - + ###### Calculate V for (j in 1:length(object@raw.data)){ temp_v = t(as.matrix(V_norm[[j]][i,])) @@ -7138,12 +7139,12 @@ calcNormLoadings = function(object) { } } } - + ################# Format the return object w_loadings = unlist(w_loadings) factors = 1:dim(object@H.norm)[[2]] results = data.frame(factors, w_loadings) - + # For all V for (j in 1:length(object@raw.data)){ results = cbind(results, unlist(v_loadings[[j]])) @@ -7159,6 +7160,6 @@ calcNormLoadings = function(object) { } } } - + return(results) -} \ No newline at end of file +} diff --git a/man/liger-class.Rd b/man/liger-class.Rd index 980ab98f..f9810c9c 100644 --- a/man/liger-class.Rd +++ b/man/liger-class.Rd @@ -23,8 +23,9 @@ The key slots used in the liger object are described below. \item{\code{scale.data}}{List of scaled matrices (cells by genes)} -\item{\code{sample.data}}{List of sampled matrices (gene by cells) -#' @slot scale.unshared.data List of scaled matrices of unshared features} +\item{\code{sample.data}}{List of sampled matrices (gene by cells)} + +\item{\code{scale.unshared.data}}{List of scaled matrices of unshared features} \item{\code{h5file.info}}{List of HDF5-related information for each input dataset. Paths to raw data, indices, indptr, barcodes, genes and the pipeline through which the HDF5 file is formated (10X, AnnData, etc), @@ -34,8 +35,9 @@ type of sampled data (raw, normalized or scaled).} cells across all datasets)} \item{\code{var.genes}}{Subset of informative genes shared across datasets to be used in matrix -factorization -#' @slot var.unshared.features Highly variable unshared features selected from each dataset} +factorization} + +\item{\code{var.unshared.features}}{Highly variable unshared features selected from each dataset} \item{\code{H}}{Cell loading factors (one matrix per dataset, dimensions cells by k)} diff --git a/man/linkGenesAndPeaks.Rd b/man/linkGenesAndPeaks.Rd index f09687b0..1850e263 100644 --- a/man/linkGenesAndPeaks.Rd +++ b/man/linkGenesAndPeaks.Rd @@ -43,7 +43,7 @@ Evaluate the relationships between pairs of genes and peaks based on specified d \examples{ \dontrun{ # some gene counts matrix: gmat.small -# some peak counts matrix: pmat.small +# some peak counts matrix: pmat.small regnet <- linkGenesAndPeaks(gmat.small, pmat.small, dist = "spearman", alpha = 0.05, path_to_coords = 'some_path') } diff --git a/man/makeInteractTrack.Rd b/man/makeInteractTrack.Rd index 96faaeaa..f94b6ddd 100644 --- a/man/makeInteractTrack.Rd +++ b/man/makeInteractTrack.Rd @@ -25,7 +25,7 @@ Export the predicted gene-pair interactions calculated by upstream function 'lin } \examples{ \dontrun{ -# some gene-peak correlation matrix: regent +# some gene-peak correlation matrix: regent makeInteractTrack(regnet, path_to_coords = 'some_path_to_gene_coordinates/hg19_genes.bed') } } diff --git a/man/makeRiverplot.Rd b/man/makeRiverplot.Rd index 4b1b0b3e..556e8827 100644 --- a/man/makeRiverplot.Rd +++ b/man/makeRiverplot.Rd @@ -57,7 +57,7 @@ between the nodes (default 0.1).} By default will try to automatically order them appropriately.} } \value{ -A riverplot object +NULL for now. Could be back if CRAN dependency riverplot is back. } \description{ Creates a riverplot to show how separate cluster assignments from two datasets map onto a diff --git a/man/online_iNMF.Rd b/man/online_iNMF.Rd index ccb0f1e4..73b5ef1b 100644 --- a/man/online_iNMF.Rd +++ b/man/online_iNMF.Rd @@ -81,7 +81,7 @@ across datasets. The V matrices represent the dataset-specific components of the \examples{ \dontrun{ # Requires preprocessed liger object -# Get factorization using 20 factors and mini-batch of 5000 cells +# Get factorization using 20 factors and mini-batch of 5000 cells # (default setting, can be adjusted for ideal results) ligerex <- online_iNMF(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) } diff --git a/man/optimizeALS.Rd b/man/optimizeALS.Rd index 64438eaf..89b826f8 100644 --- a/man/optimizeALS.Rd +++ b/man/optimizeALS.Rd @@ -19,7 +19,6 @@ optimizeALS(object, ...) W.init = NULL, V.init = NULL, use.unshared = FALSE, - lamda.u = NULL, rand.seed = 1, print.obj = FALSE, verbose = TRUE, @@ -72,6 +71,8 @@ factorizations of the same dataset can be run with one rep if necessary. (defaul \item{V.init}{Initial values to use for V matrices (default NULL)} +\item{use.unshared}{Whether to run UANLS method to integrate datasets with previously identified unshared variable genes. Have to run selectGenes with unshared = TRUE and scaleNotCenter it. (default FALSE).} + \item{rand.seed}{Random seed to allow reproducible results (default 1).} \item{print.obj}{Print objective function values after convergence (default FALSE).} @@ -95,7 +96,7 @@ across datasets. The V matrices represent the dataset-specific components of the \examples{ \dontrun{ # Requires preprocessed liger object (only for objected not based on HDF5 files) -# Get factorization using 20 factors and mini-batch of 5000 cells +# Get factorization using 20 factors and mini-batch of 5000 cells # (default setting, can be adjusted for ideal results) ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 1) } diff --git a/man/optimize_UANLS.Rd b/man/optimize_UANLS.Rd deleted file mode 100644 index 7e2973e5..00000000 --- a/man/optimize_UANLS.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rliger.R -\name{optimize_UANLS} -\alias{optimize_UANLS} -\title{Perform iNMF on scaled datasets, and include unshared, scaled and normalized, features} -\usage{ -optimize_UANLS( - object, - k = 30, - lambda = 5, - max.iters = 30, - nrep = 1, - thresh = 1e-10, - rand.seed = 1, - print.obj = FALSE, - vectorized.lambda = FALSE -) -} -\arguments{ -\item{object}{\code{liger} object. Should normalize, select genes, and scale before calling.} - -\item{k}{Inner dimension of factorization (number of factors).} - -\item{lambda}{The lambda penalty. Default 5} - -\item{max.iters}{Maximum number of block coordinate descent iterations to perform (default 30).} - -\item{nrep}{Number of restarts to perform (iNMF objective function is non-convex, so taking the -best objective from multiple successive initializations is recommended). For easier -reproducibility, this increments the random seed by 1 for each consecutive restart, so future -factorizations of the same dataset can be run with one rep if necessary. (default 1)} - -\item{thresh}{Convergence threshold. Convergence occurs when |obj0-obj|/(mean(obj0,obj)) < thresh. -(default 1e-6)} - -\item{rand.seed}{Random seed to allow reproducible results (default 1).} - -\item{print.obj}{Print objective function values after convergence (default FALSE).} - -\item{vectorized.lamba}{Whether or not to expect a vectorized lambda parameter -##########################################################################} -} -\description{ -Perform iNMF on scaled datasets, and include unshared, scaled and normalized, features -} diff --git a/man/plotByDatasetAndCluster.Rd b/man/plotByDatasetAndCluster.Rd index 778d0cf1..0435b638 100644 --- a/man/plotByDatasetAndCluster.Rd +++ b/man/plotByDatasetAndCluster.Rd @@ -53,7 +53,7 @@ one after the other (default TRUE).} \item{legend.fonts.size}{Controls the font size of the legend.} -\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if +\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if there are over 100,000 cells to plot.} } \value{ diff --git a/man/plotGene.Rd b/man/plotGene.Rd index 5f382b48..2c03f1a9 100644 --- a/man/plotGene.Rd +++ b/man/plotGene.Rd @@ -87,7 +87,7 @@ mismatches in order (default NULL).} \item{keep.scale}{Maintain min/max color scale across all plots when using plot.by (default FALSE)} -\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if +\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if there are over 100,000 cells to plot.} } \value{ diff --git a/man/plotGeneLoadings.Rd b/man/plotGeneLoadings.Rd index 5ed95105..c44795de 100644 --- a/man/plotGeneLoadings.Rd +++ b/man/plotGeneLoadings.Rd @@ -75,7 +75,7 @@ NULL to revert to default gradient scaling. (default 0.1)} \item{verbose}{Print progress bar/messages (TRUE by default)} -\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if +\item{raster}{Rasterization of points (default NULL). Automatically convert to raster format if there are over 100,000 cells to plot.} } \value{ diff --git a/man/readSubset.Rd b/man/readSubset.Rd index f0462a11..d3d573ad 100644 --- a/man/readSubset.Rd +++ b/man/readSubset.Rd @@ -47,7 +47,7 @@ This function assumes that the cell barcodes are unique across all datasets. \examples{ \dontrun{ # Only for online liger object (based on HDF5 files) -# Example: sample a total amount of 5000 cells from norm.data for downstream analysis +# Example: sample a total amount of 5000 cells from norm.data for downstream analysis ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) } } diff --git a/man/selectGenes.Rd b/man/selectGenes.Rd index e63718dd..55b6d479 100644 --- a/man/selectGenes.Rd +++ b/man/selectGenes.Rd @@ -54,12 +54,12 @@ Selected genes are plotted in green. (default FALSE)} \item{chunk}{size of chunks in hdf5 file. (default 1000)} +\item{unshared}{Whether to consider unshared features (Default FALSE)} + \item{unshared.datasets}{A list of the datasets to consider unshared features for, i.e. list(2), to use the second dataset} \item{unshared.thresh}{A list of threshold values to apply to each unshared dataset. If only one value is provided, it will apply to all unshared datasets. If a list is provided, it must match the length of the unshared datasets submitted.} - -\item{unshared.features}{Whether to consider unshared features} } \value{ \code{liger} object with var.genes slot set. diff --git a/src/Makevars b/src/Makevars index 5485e26a..cd3a3c52 100755 --- a/src/Makevars +++ b/src/Makevars @@ -1,5 +1,5 @@ -## With R 3.1.0 or later, you can uncomment the following line to tell R to +## With R 3.1.0 or later, you can uncomment the following line to tell R to ## enable compilation with C++11 (where available) ## ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider @@ -9,11 +9,9 @@ ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP ## support within Armadillo prefers / requires it ## -## Ensure support for 64bit int sparse matrices with -DARMA_64BIT_WORD=1 +## Ensure support for 64bit int sparse matrices with -DARMA_64BIT_WORD=1 -CXX_STD = CXX11 - -PKG_CXXFLAGS = -DARMA_DONT_USE_OPENMP -PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 +PKG_CXXFLAGS = -DARMA_DONT_USE_OPENMP +PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/src/Makevars.win b/src/Makevars.win index e7c87ad5..405fadd3 100755 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,5 +1,5 @@ -## With R 3.1.0 or later, you can uncomment the following line to tell R to +## With R 3.1.0 or later, you can uncomment the following line to tell R to ## enable compilation with C++11 (where available) ## ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider @@ -9,10 +9,8 @@ ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP ## support within Armadillo prefers / requires it ## -## Ensure support for 64bit int sparse matrices with -DARMA_64BIT_WORD=1 +## Ensure support for 64bit int sparse matrices with -DARMA_64BIT_WORD=1 -CXX_STD = CXX11 - -PKG_CXXFLAGS = -DARMA_DONT_USE_OPENMP -PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 -PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ No newline at end of file +PKG_CXXFLAGS = -DARMA_DONT_USE_OPENMP +PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 +PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index f51c32b4..6c8630b1 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -8,6 +8,11 @@ using namespace Rcpp; +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + // RunModularityClusteringCpp IntegerVector RunModularityClusteringCpp(Eigen::SparseMatrix SNN, int modularityFunction, double resolution, int algorithm, int nRandomStarts, int nIterations, int randomSeed, bool printOutput, std::string edgefilename); RcppExport SEXP _rliger_RunModularityClusteringCpp(SEXP SNNSEXP, SEXP modularityFunctionSEXP, SEXP resolutionSEXP, SEXP algorithmSEXP, SEXP nRandomStartsSEXP, SEXP nIterationsSEXP, SEXP randomSeedSEXP, SEXP printOutputSEXP, SEXP edgefilenameSEXP) { From beafd7152ec4835ad4cc68c1da2e561219c9e154 Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Thu, 2 Nov 2023 16:16:16 -0400 Subject: [PATCH 2/8] forgot to change maintainer --- .Rbuildignore | 5 ++++- DESCRIPTION | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 9d81df18..605e7a90 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,4 +5,7 @@ ^vignettes/walkthrough_pbmc\.Rmd$ ^docs$ ^vignettes/Integrating_scRNA_and_scATAC_data\.Rmd$ -^vignettes/Integrating_multi_scRNA_data\.Rmd$ \ No newline at end of file +^vignettes/Integrating_multi_scRNA_data\.Rmd$ +^.*\.rds$ +^docs + diff --git a/DESCRIPTION b/DESCRIPTION index 233cfe94..bebf14fe 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,7 @@ Author: Joshua Welch [aut, ctb], Paul Hoffman [ctb], Ilya Korsunsky [ctb], Robert Lee [ctb] -Maintainer: Chao Gao +Maintainer: Yichen Wang BugReports: https://github.com/welch-lab/liger/issues URL: https://github.com/welch-lab/liger License: GPL-3 | file LICENSE From ff24fe6892a4c65c30af991e17bc351fe1945c6c Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Sat, 4 Nov 2023 13:17:52 -0400 Subject: [PATCH 3/8] fix date field --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bebf14fe..0fb2423d 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rliger Version: 1.0.1 -Date: 2021-03-09 +Date: 2023-11-02 Type: Package Title: Linked Inference of Genomic Experimental Relationships Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) , and Liu J, Gao C, Sodicoff J, et al (2020) for more details. From a20d64fe9d2380588dfbe1421575b3fc9c2a3564 Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Sat, 4 Nov 2023 13:19:33 -0400 Subject: [PATCH 4/8] clean up build tarball --- .Rbuildignore | 21 ++++++++++++++++----- README.md | 6 +++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 605e7a90..3de928b2 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,11 +1,22 @@ +^\.lintr$ +^\.vscode ^.*\.Rproj$ ^\.Rproj\.user$ ^\.travis\.yml$ +^travis_setup\.sh$ ^_config\.yml$ -^vignettes/walkthrough_pbmc\.Rmd$ -^docs$ +^appveyor\.yml$ +^vignettes/.*html$ +^vignettes/Integrating_multi_scRNA_data\.rmd$ ^vignettes/Integrating_scRNA_and_scATAC_data\.Rmd$ -^vignettes/Integrating_multi_scRNA_data\.Rmd$ -^.*\.rds$ +^vignettes/Parameter_selection\.Rmd$ +^vignettes/SNAREseq_walkthrough\.Rmd$ +^vignettes/STARmap_dropviz_vig\.Rmd$ +^vignettes/UINMF_vignette\.Rmd$ +^vignettes/online_iNMF_tutorial\.Rmd$ +^vignettes/pbmc_alignment\.zip$ +^vignettes/walkthrough_pbmc\.Rmd$ +^vignettes/walkthrough_pbmc\.pdf$ +^vignettes/cross_species_vig\.Rmd$ ^docs - +^devdata diff --git a/README.md b/README.md index 40da63df..c050eec8 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ LIGER (installed as `rliger` ) is a package for integrating and analyzing multiple single-cell datasets, developed by the Macosko lab and maintained/extended by the Welch lab. It relies on integrative non-negative matrix factorization to identify shared and dataset-specific factors. -Check out our [Cell paper](https://www.cell.com/cell/fulltext/S0092-8674%2819%2930504-5) for a more complete description of the methods and analyses. To access data used in our SN and BNST analyses, visit our [study](https://portals.broadinstitute.org/single_cell/study/SCP466) on the +Check out our [Cell paper](https://doi.org/10.1016/j.cell.2019.05.006) for a more complete description of the methods and analyses. To access data used in our SN and BNST analyses, visit our [study](https://portals.broadinstitute.org/single_cell/study/SCP466) on the Single Cell Portal. LIGER can be used to compare and contrast experimental datasets in a variety of contexts, for instance: @@ -72,8 +72,8 @@ Before setting up the `rliger` package, users should have R version 3.4.0 or hig LIGER is written in R and is also available on the Comprehensive R Archive Network (CRAN). Note that the package name is `rliger` to avoid a naming conflict with an unrelated package. To install the version on CRAN, follow these instructions: -1. Install [R](https://www.r-project.org/) (>= 3.4) -2. Install [Rstudio](https://rstudio.com/products/rstudio/download/) (recommended) +1. Install [R](https://www.r-project.org/) (>= 3.6) +2. Install [Rstudio](https://posit.co/download/rstudio-desktop/) (recommended) 3. Type the following R command: ``` install.packages('rliger') From 19aabb9f1845b059c4359a2fcaceefb7e83749c8 Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Sat, 4 Nov 2023 13:26:18 -0400 Subject: [PATCH 5/8] make runnable examples, not completed yet; fix wilcoxon: small pseudo-count to scale factor plus log1p; fix getMitoProportion: option for human and mouse --- R/data.R | 10 + R/rliger.R | 480 +++++++++++++++++++----------------------- data/ctrl.rda | Bin 0 -> 27820 bytes data/stim.rda | Bin 0 -> 28836 bytes man/liger-demodata.Rd | 28 +++ man/read10X.Rd | 2 +- man/selectGenes.Rd | 2 +- 7 files changed, 255 insertions(+), 267 deletions(-) create mode 100644 R/data.R create mode 100644 data/ctrl.rda create mode 100644 data/stim.rda create mode 100644 man/liger-demodata.Rd diff --git a/R/data.R b/R/data.R new file mode 100644 index 00000000..1587ec46 --- /dev/null +++ b/R/data.R @@ -0,0 +1,10 @@ +#' dgCMatrix object of PBMC subsample data with Control and Stimulated datasets +#' @format \code{dgCMatrix} object of gene expression matrix from PBMC study, +#' named by "ctrl" and "stim". +#' @source https://www.nature.com/articles/nbt.4042 +#' @references Hyun Min Kang and et. al., Nature Biotechnology, 2018 +#' @rdname liger-demodata +"ctrl" + +#' @rdname liger-demodata +"stim" diff --git a/R/rliger.R b/R/rliger.R index 292c9356..a77633ab 100755 --- a/R/rliger.R +++ b/R/rliger.R @@ -1,5 +1,7 @@ -#' @importFrom Matrix colSums rowSums t +#' @import Matrix #' @importFrom grDevices dev.off pdf +#' @import hdf5r +#' @importFrom Rcpp evalCpp NULL #' The LIGER Class @@ -45,9 +47,7 @@ NULL #' @rdname liger-class #' @aliases liger-class #' @exportClass liger -#' @importFrom Rcpp evalCpp #' @useDynLib rliger - liger <- methods::setClass( "liger", slots = c( @@ -130,9 +130,8 @@ setMethod( #' @param verbose Print messages (TRUE by default) #' #' @return List of merged matrices across data types (returns sparse matrix if only one data type -#' detected), or nested list of matrices organized by sample if merge=F. +#' detected), or nested list of matrices organized by sample if merge = FALSE. #' -#' @import Matrix #' @importFrom utils read.delim read.table #' #' @export @@ -145,7 +144,6 @@ setMethod( #' dges1 <- read10X(list(sample.dir1, sample.dir2), c("sample1", "sample2"), min.umis = 50) #' ligerex <- createLiger(expr = dges1[["Gene Expression"]], custom = dges1[["CUSTOM"]]) #' } - read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, min.umis = 0, use.filtered = FALSE, reference = NULL, data.type = "rna", verbose = TRUE) { datalist <- list() @@ -318,11 +316,7 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m #' @param indptr.name Path to the pointers stored in HDF5 file. #' @param genes.name Path to the gene names stored in HDF5 file. #' @param barcodes.name Path to the barcodes stored in HDF5 file. -#' #' @return Directly generates newly merged hdf5 file. -#' -#' @import hdf5r -#' #' @export #' @examples #' \dontrun{ @@ -332,7 +326,6 @@ read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, m #' # name for output HDF5 file: "merged.h5" #' mergeH5(list("library1.h5","library2.h5"), c("lib1","lib2"), "merged.h5") #' } - mergeH5 <- function(file.list, library.names, new.filename, @@ -406,11 +399,7 @@ mergeH5 <- function(file.list, #' #' @param object \code{liger} object. #' @param file.path List of paths to hdf5 files. -#' #' @return \code{liger} object with restored links. -#' -#' @import hdf5r -#' #' @export #' @examples #' \dontrun{ @@ -487,20 +476,10 @@ restoreOnlineLiger <- function(object, file.path = NULL) { #' @param genes.name Path to the gene names stored in HDF5 file. #' @param barcodes.name Path to the barcodes stored in HDF5 file. #' @param verbose Print messages (TRUE by default) -#' #' @return \code{liger} object with raw.data slot set. -#' -#' @import Matrix -#' @import hdf5r -#' #' @export #' @examples -#' # Demonstration using matrices with randomly generated numbers -#' Y <- matrix(runif(5000,0,2), 10,500) -#' Z <- matrix(runif(5000,0,2), 10,500) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) - - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) createLiger <- function(raw.data, take.gene.union = FALSE, remove.missing = TRUE, @@ -682,19 +661,11 @@ safe_h5_create = function(object, idx, dataset_name, dims, mode="double", chunk_ #' expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any #' dataset) (default TRUE). #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return \code{liger} object with norm.data slot set. -#' -#' @import hdf5r -#' #' @export #' @examples -#' # Demonstration using matrices with randomly generated numbers -#' Y <- matrix(runif(5000,0,2), 10,500) -#' Z <- matrix(runif(5000,0,2), 10,500) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) - normalize <- function(object, chunk = 1000, format.type = "10X", @@ -811,11 +782,7 @@ normalize <- function(object, #' Should call normalize and selectGenes before calling. #' @param chunk size of chunks in hdf5 file. (default 1000) #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return \code{liger} object with scale.data slot set. -#' -#' @import hdf5r - calcGeneVars = function (object, chunk = 1000, verbose = TRUE) { hdf5_files = names(object@raw.data) @@ -905,27 +872,18 @@ calcGeneVars = function (object, chunk = 1000, verbose = TRUE) #' @param unshared.thresh A list of threshold values to apply to each unshared dataset. If only one value is provided, it will apply to all unshared #' datasets. If a list is provided, it must match the length of the unshared datasets submitted. #' @return \code{liger} object with var.genes slot set. -#' -#' @import hdf5r #' @importFrom stats optimize #' @importFrom graphics abline plot points title #' @importFrom stats qnorm #' #' @export #' @examples -#' \dontrun{ -#' # Given datasets Y and Z -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) -#' # use default selectGenes settings (var.thresh = 0.1) #' ligerex <- selectGenes(ligerex) -#' # select a smaller subset of genes -#' ligerex <- selectGenes(ligerex, var.thresh = 0.3) -#' } - selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes = NULL, tol = 0.0001, datasets.use = 1:length(object@raw.data), combine = "union", - capitalize = FALSE, do.plot = FALSE, cex.use = 0.3, chunk=1000, unshared = F, unshared.datasets = NULL, unshared.thresh = NULL) + capitalize = FALSE, do.plot = FALSE, cex.use = 0.3, chunk=1000, unshared = FALSE, unshared.datasets = NULL, unshared.thresh = NULL) { if (class(object@raw.data[[1]])[1] == "H5File") { if (!object@raw.data[[1]]$exists("gene_vars")) { @@ -1080,7 +1038,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes object@var.genes <- genes.use } # Only for unshared Features - if (unshared == T) { + if (isTRUE(unshared)) { ind.thresh = c() # If only one threshold is provided, apply to all unshared datasets if(length(unshared.thresh == 1)){ @@ -1088,7 +1046,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes } else{ # If thresholds are provided for every dataset, use the respective threshold for each datatset if (length(unshared.thresh) != length(unshared.datasets)) { warning("The number of thresholds does not match the number of datasets; Please provide either a single threshold value or a value for each unshared dataset.", - immediate. = T) + immediate. = TRUE) } names(unshared.thresh) = unshared.datasets for (i in unshared.datasets){ @@ -1154,22 +1112,13 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes #' (default TRUE). #' @param chunk size of chunks in hdf5 file. (default 1000) #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return \code{liger} object with scale.data slot set. -#' -#' @import hdf5r -#' #' @export #' @examples -#' \dontrun{ -#' # Given datasets Y and Z -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) -#' # use default selectGenes settings (var.thresh = 0.1) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' } - scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose = TRUE) { if (class(object@raw.data[[1]])[1] == "H5File") { hdf5_files = names(object@raw.data) @@ -1263,7 +1212,7 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose object@scale.unshared.data[[i]] <- scaleNotCenterFast(t(object@norm.data[[i]][object@var.unshared.features[[i]],])) object@scale.unshared.data[[i]] <- as.matrix(object@scale.unshared.data[[i]]) } else { - object@scale.unshared.data[[i]] <- scale(t(object@norm.data[[i]][object@var.unshared.features[[i]], ]), center = F, scale = T) + object@scale.unshared.data[[i]] <- scale(t(object@norm.data[[i]][object@var.unshared.features[[i]], ]), center = FALSE, scale = TRUE) } #names(object@scale.unshared.data) <- names(object@norm.data) object@scale.unshared.data[[i]][is.na(object@scale.unshared.data[[i]])] <- 0 @@ -1291,11 +1240,12 @@ scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose #' #' @export #' @examples -#' \dontrun{ -#' # liger object: ligerex -#' ligerex <- removeMissingObs(ligerex) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' if (any(rowSums(ctrl) == 0) || any(rowSums(stim) == 0)) { +#' # example datasets do not have missing data, thus put in a condition +#' # Though the function will return unchanged object if no missing found +#' ligerex <- removeMissingObs(ligerex) #' } - removeMissingObs <- function(object, slot.use = "raw.data", use.cols = TRUE, verbose = TRUE) { filter.data <- slot(object, slot.use) removed <- ifelse(((slot.use %in% c("raw.data", "norm.data")) & (use.cols == TRUE)) | @@ -1352,7 +1302,7 @@ downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed set.seed(seed) if(is.null(datasets.use)) { - datasets.use = names(object@H) + datasets.use = names(object@raw.data) if (verbose) { message(datasets.use) } @@ -1423,19 +1373,14 @@ downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed #' @param genes.use samples from only the specified genes. Default is NULL (all genes) #' @param rand.seed for reproducibility (default 1). #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return \code{liger} object with sample.data slot set. -#' -#' @import hdf5r -#' #' @export #' @examples -#' \dontrun{ -#' # Only for online liger object (based on HDF5 files) -#' # Example: sample a total amount of 5000 cells from norm.data for downstream analysis -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' if (length(ligerex@H) > 0) { +#' # Downsampling is calculated basing on factorization result +#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 100) #' } - readSubset <- function(object, slot.use = "norm.data", balance = NULL, @@ -1604,12 +1549,11 @@ readSubset <- function(object, if (verbose) { setTxtProgressBar(pb, i) } + object@sample.data[[i]] = data.subset_i } if (verbose){ cat("\n") } - object@sample.data[[i]] = data.subset_i - object@h5file.info[[i]][["sample.data.type"]] = slot.use } names(object@sample.data) = names(object@raw.data) return(object) @@ -1657,18 +1601,17 @@ readSubset <- function(object, #' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with H, W, V, A and B slots set. -#' -#' @import hdf5r -#' #' @export #' @examples -#' \dontrun{ -#' # Requires preprocessed liger object -#' # Get factorization using 20 factors and mini-batch of 5000 cells -#' # (default setting, can be adjusted for ideal results) -#' ligerex <- online_iNMF(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' if (length(ligerex@h5file.info) > 0) { +#' # This function only works for HDF5 based liger object +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # `miniBatch_size` has to be no larger than the number of cells in the smallest dataset +#' ligerex <- online_iNMF(ligerex, miniBatch_size = 100) #' } - online_iNMF <- function(object, X_new = NULL, projection = FALSE, @@ -2164,13 +2107,12 @@ nonneg <- function(x, eps = 1e-16) { #' #' @export #' @examples -#' \dontrun{ -#' # Requires preprocessed liger object (only for objected not based on HDF5 files) -#' # Get factorization using 20 factors and mini-batch of 5000 cells -#' # (default setting, can be adjusted for ideal results) -#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 1) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Minimum specification for fast example pass +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 2) optimizeALS <- function( object, ... @@ -2181,10 +2123,8 @@ optimizeALS <- function( #' @rdname optimizeALS #' @importFrom stats runif #' @importFrom utils setTxtProgressBar txtProgressBar -#' #' @export #' @method optimizeALS list -#' optimizeALS.list <- function( object, k, @@ -2412,7 +2352,7 @@ optimizeALS.liger <- function( ... ) { - if (use.unshared == FALSE){ + if (isFALSE(use.unshared)){ object <- removeMissingObs( object = object, slot.use = 'scale.data', @@ -2445,7 +2385,7 @@ optimizeALS.liger <- function( object@parameters$lambda <- lambda return(object) } - if(use.unshared == TRUE){ + if(isTRUE(use.unshared)){ object <- optimize_UANLS(object = object, k = k, lambda = lambda, @@ -2479,11 +2419,16 @@ optimizeALS.liger <- function( #' #' @export #' @examples -#' \dontrun{ -#' # decide to run with k = 15 instead (keeping old lambda the same) -#' ligerex <- optimizeNewK(ligerex, k.new = 15) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' k <- 5 +#' # Minimum specification for fast example pass +#' ligerex <- optimizeALS(ligerex, k = k, max.iters = 2) +#' if (k != 5) { +#' ligerex <- optimizeNewK(ligerex, k.new = k, max.iters = 2) #' } - optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters = 100, rand.seed = 1, verbose = TRUE) { if (is.null(lambda)) { @@ -2588,19 +2533,30 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters #' #' @export #' @examples -#' \dontrun{ -#' # Given preprocessed liger object: ligerex (contains two datasets Y and Z) -#' # get factorization using three restarts and 20 factors -#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) -#' # acquire new data (Y_new, Z_new) from the same cell type, let's add it to existing datasets -#' new_data <- list(Y_set = Y_new, Z_set = Z_new) -#' ligerex2 <- optimizeNewData(ligerex, new.data = new_data, which.datasets = list('y_set', 'z_set')) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' \donttest{ +#' # Assume we are performing the factorization +#' # Specification for minimal example test time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 2) +#' # Suppose we have new data, namingly Y_new and Z_new from the same cell type. +#' # Add it to existing datasets. +#' new_data <- list(Y_set = ctrl, Z_set = stim) +#' # 2 iters do not lead to converge, it's for minimal test time +#' ligerex2 <- optimizeNewData(ligerex, new.data = new_data, +#' which.datasets = list('ctrl', 'stim'), +#' max.iters = 2) #' # acquire new data from different cell type (X), we'll just add another dataset -#' # it's probably most similar to y_set -#' ligerex <- optimizeNewData(ligerex, new.data = list(x_set = X), which.datasets = list('y_set'), -#' add.to.existing = FALSE) +#' # it's probably most similar to ctrl +#' X <- ctrl +#' # 2 iters do not lead to converge, it's for minimal test time +#' ligerex3 <- optimizeNewData(ligerex, new.data = list(x_set = X), +#' which.datasets = list('ctrl'), +#' add.to.existing = FALSE, +#' max.iters = 2) #' } - optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = TRUE, lambda = NULL, thresh = 1e-4, max.iters = 100, verbose = TRUE) { if (is.null(lambda)) { @@ -2700,12 +2656,22 @@ optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = #' #' @export #' @examples -#' \dontrun{ -#' # now want to look at only subset of data -#' # Requires a vector of cell names from data 1 and a vector of cell names from data 2 -#' ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2)) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' \donttest{ +#' # Assume we are performing the factorization +#' # Specification for minimal example run time, not converging. +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 2) +#' # Preparing subset with random sampling. +#' # Subset can also be obtained with prior knowledge from metadata. +#' cell_names_1 <- sample(rownames(ligerex@H[[1]]), 20) +#' cell_names_2 <- sample(rownames(ligerex@H[[2]]), 20) +#' +#' ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2), +#' max.iters = 2) #' } - optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, lambda = NULL, thresh = 1e-4, max.iters = 100, datasets.scale = NULL) { if (is.null(lambda)) { @@ -2727,7 +2693,8 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la object@raw.data <- lapply(1:length(object@raw.data), function(i) { object@raw.data[[i]][, cell.subset[[i]]] }) - object@cell.data <- droplevels(object@cell.data[cell.subset, ]) + all.cell.subset <- Reduce(c, cell.subset) + object@cell.data <- droplevels(object@cell.data[all.cell.subset, ]) for (i in 1:length(object@norm.data)) { object@norm.data[[i]] <- object@norm.data[[i]][, cell.subset[[i]]] if (names(object@norm.data)[i] %in% datasets.scale) { @@ -2735,7 +2702,7 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la scale = TRUE, center = FALSE) object@scale.data[[i]][is.na(object@scale.data[[i]])] <- 0 } else { - object@scale.data[[i]] <- t(object@norm.data[[i]][object@var.genes, ]) + object@scale.data[[i]] <- as.matrix(t(object@norm.data[[i]][object@var.genes, ])) } } @@ -2764,11 +2731,17 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la #' #' @export #' @examples -#' \dontrun{ +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' \donttest{ +#' # Assume we are performing the factorization +#' # Specification for minimal example run time, not converging. +#' ligerex <- optimizeALS(ligerex, k = 5, lambda = 5, max.iters = 2) #' # decide to run with lambda = 15 instead (keeping k the same) -#' ligerex <- optimizeNewLambda(ligerex, new.lambda = 15) +#' ligerex <- optimizeNewLambda(ligerex, new.lambda = 15, max.iters = 2) #' } - optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100, rand.seed = 1, verbose = TRUE) { k <- ncol(object@H[[1]]) H <- object@H @@ -2812,23 +2785,23 @@ optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100 #' or dataframe used to produce ggplot object. Raw data is matrix of alignment values for each #' lambda value tested (each column represents a different rep for nrep).(default FALSE) #' @param verbose Print progress bar/messages (TRUE by default) -#' #' @return Matrix of results if indicated or ggplot object. Plots alignment vs. lambda to console. -#' #' @import doParallel #' @import parallel #' @importFrom foreach foreach #' @importFrom foreach "%dopar%" #' @importFrom ggplot2 ggplot aes geom_point geom_line guides guide_legend labs theme theme_classic -#' #' @export #' @examples -#' \dontrun{ -#' # Requires preprocessed liger object +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' \donttest{ #' # examine plot for most appropriate lambda, use multiple cores for faster results +#' # This will take a long time #' suggestLambda(ligerex, k = 20, num.cores = 4) #' } - suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, thresh = 1e-4, max.iters = 100, knn_k = 20, k2 = 500, ref_dataset = NULL, resolution = 1, gen.new = FALSE, nrep = 1, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { @@ -2961,12 +2934,15 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core #' #' @export #' @examples +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) #' \dontrun{ -#' # Requires preprocessed liger object #' # examine plot for most appropriate k, use multiple cores for faster results +#' # This will take a long time #' suggestK(ligerex, num.cores = 4) #' } - suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, max.iters = 100, num.cores = 1, rand.seed = 1, gen.new = FALSE, nrep = 1, plot.log2 = TRUE, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { @@ -3096,21 +3072,15 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, #' @param ... Arguments passed to other methods #' #' @return \code{liger} object with 'H.norm' and 'clusters' slot set. -#' #' @importFrom stats approxfun -#' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # do basic quantile alignment +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 2) #' ligerex <- quantile_norm(ligerex) -#' # higher resolution for more clusters (note that SNF is conserved) -#' ligerex <- quantile_norm(ligerex, resolution = 1.2) -#' # change knn_k for more fine-grained local clustering -#' ligerex <- quantile_norm(ligerex, knn_k = 15, resolution = 1.2) -#' } - quantile_norm <- function( object, ... @@ -3213,7 +3183,6 @@ quantile_norm.list <- function( #' @rdname quantile_norm #' @export #' @method quantile_norm liger -#' quantile_norm.liger <- function( object, quantiles = 50, @@ -3273,17 +3242,16 @@ quantile_norm.liger <- function( #' @param random.seed Seed of the random number generator. (default 1) #' @param verbose Print messages (TRUE by default) #' @param dims.use Indices of factors to use for Louvain clustering (default 1:ncol(H[[1]])). -#' #' @return \code{liger} object with refined 'clusters' slot set. -#' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' ligerex <- louvainCluster(ligerex, resulotion = 0.3) -#' } -#' - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- louvainCluster(ligerex, resolution = 0.3) louvainCluster <- function(object, resolution = 1.0, k = 20, prune = 1 / 15, eps = 0.1, nRandomStarts = 10, nIterations = 100, random.seed = 1, verbose = TRUE, dims.use = NULL) { output_path <- paste0('edge_', sub('\\s', '_', Sys.time()), '.txt') @@ -3399,13 +3367,13 @@ GroupSingletons <- function(ids, SNN, group.singletons = TRUE, verbose = FALSE) #' @export #' @examples #' \dontrun{ +#' # Only runable for ATAC dataset. See tutorial on GitHub. #' # ligerex (liger object), factorization complete #' # impute every dataset other than the reference dataset #' ligerex <- imputeKNN(ligerex, reference = "y_set", weight = FALSE) #' # impute only z_set dataset #' ligerex <- imputeKNN(ligerex, reference = "y_set", queries = list("z_set"), knn_k = 50) #' } - imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, norm = TRUE, scale = FALSE, verbose = TRUE) { if (verbose) { cat("NOTE: This function will discard the raw data previously stored in the liger object and", @@ -3453,7 +3421,7 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor # creating a (reference cell numbers X query cell numbers) weights matrix for knn weights and unit weights nn.k <- get.knnx(object@H.norm[reference_cells, ], object@H.norm[query_cells, ], k = knn_k, algorithm = "CR") weights <- Matrix(0, nrow = ncol(object@raw.data[[reference]]), ncol = nrow(nn.k$nn.index), sparse = TRUE) - if (weight == TRUE){ # for weighted situation + if (isTRUE(weight)){ # for weighted situation # find nearest neighbors for query cell in normed ref datasets for (n in 1:nrow(nn.k$nn.index)) { # record ref-query cell-cell distances weights[nn.k$nn.index[n, ], n] <- exp(-nn.k$nn.dist[n, ]) / sum(exp(-nn.k$nn.dist[n, ])) @@ -3510,25 +3478,24 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete -#' wilcox.results <- runWilcoxon(ligerex, compare.method = "cluster") -#' wilcox.results <- runWilcoxon(ligerex, compare.method = "datastes", data.use = c(1, 2)) -#' # HDF5 input -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete -#' # Need to sample cells before implementing Wilcoxon test -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) -#' de_genes <- runWilcoxon(ligerex, compare.method = "clusters") +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- louvainCluster(ligerex, resolution = 0.3) +#' wilcox.results <- runWilcoxon(ligerex, compare.method = "clusters") +#' wilcox.results <- runWilcoxon(ligerex, compare.method = "datasets", data.use = c(1, 2)) +#' if (length(ligerex@h5file.info) > 0) { +#' # For HDF5 based object +#' # Need to sample cells and read into memory before running Wilcoxon test +#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) +#' wilcox.results <- runWilcoxon(ligerex, compare.method = "clusters") #' } - -runWilcoxon <- function(object, data.use = "all", compare.method) { +runWilcoxon <- function(object, data.use = "all", compare.method = c("clusters", "datasets")) { # check parameter inputs - if (missing(compare.method)) { - stop("Parameter *compare.method* cannot be empty!") - } - if (compare.method != "datasets" & compare.method != "clusters") { - stop("Parameter *compare.method* should be either *clusters* or *datasets*.") - } + compare.method <- match.arg(compare.method) if (compare.method == "datasets") { if (length(names(object@norm.data)) < 2) { stop("Should have at least TWO inputs to compare between datasets") @@ -3596,10 +3563,10 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { if (len > 100000) { message("Calculating Large-scale Input...") results <- Reduce(rbind, lapply(suppressWarnings(split(seq(len), seq(len / 100000))), function(index) { - wilcoxauc(log(feature_matrix[index, ] + 1e-10), clusters) + wilcoxauc(log1p(1e6*feature_matrix[index, ]), clusters) })) } else { - results <- wilcoxauc(log(feature_matrix + 1e-10), clusters) + results <- wilcoxauc(log1p(1e6*feature_matrix), clusters) } } @@ -3612,7 +3579,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { message("Note: Skip Cluster ", cluster, " since it has only ONE data source.") return() } - return(wilcoxauc(log(sub_matrix + 1e-10), sub_label)) + return(wilcoxauc(log1p(1e6*sub_matrix), sub_label)) })) } return(results) @@ -3641,12 +3608,12 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { #' @export #' @examples #' \dontrun{ +#' # Only runable for ATAC datasets, see tutorial on GitHub #' # some gene counts matrix: gmat.small #' # some peak counts matrix: pmat.small #' regnet <- linkGenesAndPeaks(gmat.small, pmat.small, dist = "spearman", #' alpha = 0.05, path_to_coords = 'some_path') #' } - linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist = "spearman", alpha = 0.05, path_to_coords, verbose = TRUE) { ## check dependency @@ -3783,10 +3750,10 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist #' @export #' @examples #' \dontrun{ +#' # Only runable for ATAC datasets, see tutorial on GitHub #' # some gene-peak correlation matrix: regent #' makeInteractTrack(regnet, path_to_coords = 'some_path_to_gene_coordinates/hg19_genes.bed') #' } - makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) { # get genomic coordinates if (missing(path_to_coords)) { @@ -3895,12 +3862,13 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' wilcox.results <- runGSEA(ligerex) -#' wilcox.results <- runGSEA(ligerex, mat_v = c(1, 2)) -#' } -#' +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' result <- runGSEA(ligerex) runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gene_sets = c()) { if (!requireNamespace("org.Hs.eg.db", quietly = TRUE)) { stop("Package \"org.Hs.eg.db\" needed for this function to work. Please install it by command:\n", @@ -4010,16 +3978,13 @@ runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gen #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # generate H.norm by quantile normalizig factor loadings -#' ligerex <- quantile_norm(ligerex) -#' # get tsne.coords for normalized data +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- runTSNE(ligerex) -#' # get tsne.coords for raw factor loadings -#' ligerex <- runTSNE(ligerex, use.raw = TRUE) -#' } - runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), use.pca = FALSE, perplexity = 30, theta = 0.5, method = "Rtsne", fitsne.path = NULL, rand.seed = 42) { @@ -4091,15 +4056,13 @@ runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), u #' @export #' @examples #' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # generate H.norm by quantile normalizig factor loadings -#' ligerex <- quantileAlignSNF(ligerex) -#' # get tsne.coords for normalized data +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- runUMAP(ligerex) -#' # get tsne.coords for raw factor loadings -#' ligerex <- runUMAP(ligerex, use.raw = TRUE) -#' } - runUMAP <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k = 2, distance = "euclidean", n_neighbors = 10, min_dist = 0.1, rand.seed = 42) { set.seed(rand.seed) @@ -4148,13 +4111,13 @@ runUMAP <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # generate H.norm by quantile normalizig factor loadings -#' ligerex <- quantile_norm(ligerex) -#' dataset_spec <- calcDatasetSpecificity(ligerex, do.plot = F) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' calcDatasetSpecificity(ligerex) calcDatasetSpecificity <- function(object, dataset1 = NULL, dataset2 = NULL, do.plot = TRUE) { if (is.null(dataset1) | is.null(dataset2)) { dataset1 <- names(object@H)[1] @@ -4215,17 +4178,14 @@ calcDatasetSpecificity <- function(object, dataset1 = NULL, dataset2 = NULL, do. #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete -#' # generate H.norm by quantile normalizig factor loadings -#' ligerex <- quantile_norm(ligerex) -#' agreement <- calcAgreement(ligerex, dr.method = "NMF") -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- readSubset(ligerex, slot.use = "scale.data", max.cells = 5000) -#' agreement <- calcAgreement(ligerex, dr.method = "NMF") -#' } - +#' agreement <- calcAgreement(ligerex) calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.aligned = TRUE, rand.seed = 42, by.dataset = FALSE) { # if (!requireNamespace("NNLM", quietly = TRUE) & dr.method == "NMF") { @@ -4346,12 +4306,14 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object ), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' alignment <- calcAlignment(ligerex) -#' } - +#' agreement <- calcAlignment(ligerex) calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cells.comp = NULL, clusters.use = NULL, by.cell = FALSE, by.dataset = FALSE) { if (is.null(cells.use)) { @@ -4443,7 +4405,7 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel #' #' Returns alignment for each cluster in analysiss (see documentation for calcAlignment). #' -#' @param object \code{liger} object. Should call quantileAlignSNF before calling. +#' @param object \code{liger} object. Should call quantile_norm before calling. #' @param rand.seed Random seed for reproducibility (default 1). #' @param k Number of nearest neighbors in calculating alignment (see calcAlignment for default). #' Can pass in single value or vector with same length as number of clusters. @@ -4455,13 +4417,14 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' # get alignment for each cluster -#' alignment_per_cluster <- calcAlignmentPerCluster(ligerex) -#' } - +#' agreement <- calcAlignmentPerCluster(ligerex) calcAlignmentPerCluster <- function(object, rand.seed = 1, k = NULL, by.dataset = FALSE) { clusters <- levels(object@clusters) if (typeof(k) == "double") { @@ -4503,20 +4466,14 @@ calcAlignmentPerCluster <- function(object, rand.seed = 1, k = NULL, by.dataset #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' # toy clusters -#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) -#' names(cluster1) <- colnames(ligerex@raw.data[[1]]) -#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) -#' names(cluster2) <- colnames(ligerex@raw.data[[2]]) -#' # get ARI for first clustering -#' ari1 <- calcARI(ligerex, cluster1) -#' # get ARI for second clustering -#' ari2 <- calcARI(ligerex, cluster2) -#' } - +#' agreement <- calcARI(ligerex, ligerex@clusters) calcARI <- function(object, clusters.compare, verbose = TRUE) { if (length(clusters.compare) < length(object@clusters) && verbose) { message("Calculating ARI for subset of all cells") @@ -4540,20 +4497,14 @@ calcARI <- function(object, clusters.compare, verbose = TRUE) { #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Specification for minimal example run time, not converging +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' # toy clusters -#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) -#' names(cluster1) <- colnames(ligerex@raw.data[[1]]) -#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) -#' names(cluster2) <- colnames(ligerex@raw.data[[2]]) -#' # get ARI for first clustering -#' ari1 <- calcPurity(ligerex, cluster1) -#' # get ARI for second clustering -#' ari2 <- calcPurity(ligerex, cluster2) -#' } - +#' agreement <- calcARI(ligerex, ligerex@clusters) calcPurity <- function(object, classes.compare, verbose = TRUE) { if (length(classes.compare) < length(object@clusters) && verbose) { print("Calculating purity for subset of full cells") @@ -4570,19 +4521,18 @@ calcPurity <- function(object, classes.compare, verbose = TRUE) { #' #' @param object \code{liger} object. #' @param use.norm Whether to use cell normalized data in calculating contribution (default FALSE). -#' +#' @param species Whether the data is from mouse or human? (default "mouse"). #' @return Named vector containing proportion of mitochondrial contribution for each cell. #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' ligerex@cell.data[["percent_mito"]] <- getProportionMito(ligerex) -#' } - -getProportionMito <- function(object, use.norm = FALSE) { +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex@cell.data$mito <- getProportionMito(ligerex, species = "human") +getProportionMito <- function(object, use.norm = FALSE, species = c("mouse", "human")) { + species <- match.arg(species) + ptrn <- switch(species, mouse = "^mt-", human = "^MT-") all.genes <- Reduce(union, lapply(object@raw.data, rownames)) - mito.genes <- grep(pattern = "^mt-", x = all.genes, value = TRUE) + mito.genes <- grep(pattern = ptrn, x = all.genes, value = TRUE) data.use <- object@raw.data if (use.norm) { data.use <- object@norm.data @@ -4664,7 +4614,7 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si tsne_df[['Dataset']] <- unlist(lapply(1:length(object@H), function(x) { rep(names(object@H)[x], nrow(object@H[[x]])) })) - if (reorder.idents == TRUE){ + if (isTRUE(reorder.idents)){ tsne_df$Dataset <- factor(tsne_df$Dataset, levels = new.order) } c_names <- names(object@clusters) @@ -4795,7 +4745,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit } dr_df$feature <- object@cell.data[, feature] if (is.null(discrete)) { - if (class(dr_df$feature) != "factor") { + if (!is.factor(dr_df$feature)) { discrete <- FALSE } else { discrete <- TRUE @@ -6750,7 +6700,7 @@ reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "o if (!(by.feature %in% colnames(object@cell.data))) { stop("Please select existing feature in cell.data to reorganize by, or add it before calling.") } - if(class(object@cell.data[, by.feature]) != "factor"){ + if(!is.factor(object@cell.data[, by.feature])){ stop("Error: cell.data feature must be of class 'factor' to reorganize object. Please change column to factor and re-run reorganizeLiger") } if (!is.null(object@clusters)) { diff --git a/data/ctrl.rda b/data/ctrl.rda new file mode 100644 index 0000000000000000000000000000000000000000..da53bfdea8a0531b01f2e8bbde7b2c2179f695e7 GIT binary patch literal 27820 zcmag_V{j$R_XUi`6LVtQIx#1dpo;aD96Js*-d!PSXb*paG{czWZ z)w{cPuUfl5b$8X4Ft8Qi6xXNI)DLkf(1M}-^YefI|8ST~?m`OZ?&-J&1;q{ZAK;-V z8H=HyV8x(lrJ=qf@pQi?Gs#KJ>UK*K>};%dRv#>&ZKmLp=O zhsGdl#ES_H)$-$+WplT&0O3o6E8&Rj;ZjP98OwCc0C~{GO9Xhiz>#VYQ%DFJ0o+pY zP~7Mc5<|cT6cnmZ)QDeg={x=qO5y*RfPzA~=l_pM|KIrkmtYV4pkOb6P*9k`P;j5i z*EKFsPz$jXb-{Sh$*`K3P-1k!;zTeO+}UZ!jv{nv%47+ua=f{Da*j$l003|C)C)H^ zA<_R^?tj|Q#n5GvXgr*$L1}i}^J=j{!4x4dbh(Id$iCc5*=u=nP*Ao+ehE-ZcEw8w z!T(=oR_*_jKnO-sl}eFXNs=J4;;W6iOb;d!kqNf_zXSYF`CIi8H>?5!f6;$=X#ckY1+@fAS*EJF_Mc>~?b3g>{eQp>!^9(j6mC1Ms`-p; z%|n)}xn`Zi{YFDU4xa$C5z@L3?~64?TmmJrsaz4yo2HgTvM;?q7)x5d_S z2r=B_V^`yMv=kmjqIJ+26He<|tW*Exx&QlwXpcULdek!`GXDI;{OG-!FfHF__#N@~ZRz4B8X@RvwygM`du)J>tJ{K1d}EYSqx zI&`^opj2r55T#r=HjKgucIcauk`Ni%D{ltDy(TCY7eG-cd<344DnQSl*PLha$ZbVu z@k_tuSxdA4OSv0luwWg4RXZ1Di>I)365D3?EvBT$29tggScT$(- zPsduf4?D@1%OIX%Z}GnMCl;|^%0u;Xk(ooCRhkss`lCg_Kt;_BF2&J|YD_Dqkhi?i zeVb24p;cVvr+KPc-~OxE?Yq0l9`yaG!H-c*;|KG|GtO>hMTyeQ{ZvO~Tw!(MU`vQvQ%I|Wv5;U+e;=oe#$Yb-AoK9H10WC}YXel$u2pnz{?heT1!f(#__61nM&lVgg2BZ%XY>%uqVH#W**g!u5T$$2Aq z@qj3(@KVE0O_Qpr5HlI^;U7CMqWF?wEQxS6St=4EyNra%P&lE_NLlTiy1Ye}{u422 zQ_zB7-mZa}6a^?;N2g02COK>j=~isxMFn3m28xTju(gXGGIUG|w>1JY{M2DZ6t(l(d00zu3QSr=j20{cV9GIEB&=Uff9xltq zGpVOKBCo7NkLi$`AH+3oEX z#my+xYSOONRPFY`SoYjBW=@Q-N%m9Ay5PFFo_!1%JRyN}?mN_SF&boEz9c%qGAT_1 zgrQVbd|nPe92iE@NXqkGTg+!Vyf7vmif~(?`0{%2QDAIw45ChYogQYe9rTLcv@MhP ztRY(~Y3@_t5(lG?fx#H`ayzk@n=ZgH0}8fV-It5PBrcqK-%ej+wnVIN#aqIkk$_sU zB6Fvq_+)-WKiU0Od31M(+LE3Ln{@iPLLB|cbN9YLYCyW?r(?pCjvusc8M~U{c_}_y z`h2t94sxPw<9h}{ADjjz(814$(sh+#=f0ye!Y#3`HKgwbRNid+or3w!CR#h0dZr!R z`(#RnKDg5zZG8us0?b_s~)AH@Xf=2X@@M5&;<>* zOX?yuFw)uoaay%WhBv;RdNZYoj`ZjdN!0v?ed|;1nqt3rj&Bbba#29q;ngKXC*PXh z#BKK#AfSMnKG+58bM$rlySboX=ye>G1>GZ9ekw&H^Mq3VtI?bj{y!;;|9oPJ_YJrw8})EBO{R^|w{n@rl` zW1=)Bohz_`Qa&Nn$dqA%6e_AzslJ^eYE782BVA9VgcdVgX*o+y$Q^b{oX0S?UIzkC zrZj+C9_f2T{OkyCtl-_027v_(k)kmhfV6`vB2@jGD7p~gx|UkH;lSe>ADlMD_(~R) zteSH>YfDK# zKW5H%hiU@4U3|s6(@};PuI_DoSxYmy3kp1gV*#_MI0`hSLd>-uZ=I^Uic&bj;Y`5_ z{-^AfOfY6@69>b5j8H`oo0e&Ser^Q%6CgRSXp@PxV z`qkw@_eoZsoeR1bs~?d zkVti*g8Y{qZrw+8E+1LK>`yjvgZEcKQT(sLX_dd95|^Vkbsmd^vF@T!LGQY6h=*1@ zzyK*p0F37TMEz$e%E_}80BYBuS9D2dy9_4Ig!k7`p2_>+TubCZCbehA(zH_fT!esN zQ%bXCX-cG0iyB z+%r%9`2?zdkTfdic8NtjCVNPmB#Poir-N876t^6o#?$L551BR!pXi8zGx16)OPN4_ z!{&@L0TJFoN26l^o-X5U>y21C5x-P8?YCRyqsXDR@2aUnY!|Y=R(cBcoHWZtxtl#n zoN*(e9CDM+2)fvO;mW|fc}2pG+!V=L41eONu5Sf9P1A5) zmql$N(l7K1CK;x5@1NJ&76O#kTsNe6N_mCmGAZAhX(VrmJpm3?T}MSm#2&5lcivL3 zv$QyZ_+n;7+{s81Xactv8#dowejQ}k?5;yZQ$U1_u9%*yV}TVr=O_-g&{sj#`e{R( zItq-W$$hMWc;2yZ_7$fKYE--D!~g+!k8qv8J!eET8v&O?hqlf)w$AFlBIVOAmkxu- z;pH|3Ia|&Aa&IRd0S5=hboijcSwKX{T#6>Qv4jhT>bS4_8^-V;;fckG*ask|6$q?|-pfN-o5ZR^IqFW-77@EwYkzG2WGDa}LJ! zeXsR&GuvcFqYa4Mm$D({sfX|%?+(hZh3QYoAY2a)2S8&~udzl?y}9e~!l`0} zs5Y3Tg2RI=3)cbArVP4tfZ#L6)ENlePpys;Frus!6#%{^o5A@T?Utf(rKG*>LbCeD zKYfB>>s9N$yQ?e)-n|%TBqDLDTi@)GVhV8xofR^Y0#b^+HmR8`3zJ+f+}S)e^ezti z-IjQIQednu`YUvOdo1$G$I%|!6I#W~kXgmd8Qa(kkV;KlYak+p3L8g}vhdeB(o2H& z7Lz~fG*9`;erj&ttBKkKd5j%}E<8C4wvO@>>EX)Ph@Vh>7~MpgmjpgM4dT>yWH__* z@Ki$0Yg?`fx<@AKi7VNXH-J?1Me{lV?t5t5A2*8nVf6SmZCT@2 z#z>hlBBQ^XH<(7BtYzW!F^TkPLSCfI&Op%F5--=&GoaV}%Gc|b-f(33^QK4+4p}=s z1Q;%|OLH~ZA)_gKNSdPse)O$4WmI_FQf-Lq*XWjO^h;=ti{axq7Yd@-C%h-_GPDx6|{ zA@A9Dm%aQ1rb}69Y*V?G=pJWsDk#xz(4<##jni9!*TU8pF41Z!Ibp)$Foz1wmrsS7 zTBh4lu*t_MUR|5^>mqU z%q3lvkg*!nTy*JO;L3d)Nj^QcqYmJZXZ1wZuOODS)RBhvf#ys<(u1TbURfnXO&ss!XodU~s&sL72v zo_8K(WTH7d=W2c14d?1MO!z3rO!k@4x+(y0;k!ddqswUH|C3&YE* z#&E*;G2Z+xxjP+Q7Iw0Vo(F_og4^)vm)^cZJNT%pd7~8OXD1_78YGD~coDi{B$>%v zsCt4dzaPX?it|?G-dgJ!$jqmn){NrZ+=bb%HH1dIghnBVLHq?5-r%@0DPFybQKanM0!afsSJ$P4H$(#y zO;u(ltS6~T(nfMqOEKZ{2rcR3_qCd~CmjGisi-k1L&DXi5OGA#_uH4B{bnbrMgB>ADaR67gM|Voek-OYb=5Z#! z@!MJrDQlx_JQcM5eG@$PCN)?^VEp+*Mos|XOpC^d74{g-Fp8Ojiw=T4QL}eE6U!0I zpR92vHRs*R!;>H;6&CKGBO158kt~ZTCnId~H-~i2(y+bE_H+n9n>vGaJk`)@w$`{MtxxF^P+pqSgDk4=r0!2~2loIRqh<^vb2bE+x5 zDf&pQ4<$f0zK#&~A2L?Fysoyc1t{!upW90cP~uiCfKsBG`IQ^sBE5_)A{t|SIgE&% z!92sD(Zc>4JK3ElRvWL&a*Cej2v*AL9#VP4CYEZRh<{_(?1J>nKD8wMSOXf?$trZ{ zn8p+7QcZm2NqTVyz{Py&PIP()6$D0?P%q+V)iIIh9_(9hi(|&-%;**}-_4Mjuq{&T z>btpHevaFM%=5lTYqqTEaS*}TQ&YYU{0(Sh@X3@AtSj;g{}H_^7t$Y}iPkX+CU%=r z*^*2P6&GaA#&Dza$$ECs8jLO=xry?@S7e`ZpgXAD|K6YJViMV2Ehip|yC`SL2%;p; zZ|UXLG3MPSlh!*fkSJ2pZC!@LApE?Uv-TYkpyA~0%-?7l#ZufOAiZz>!+yb1tgC6P z6%SLFZ4rs8LdbFPaMp+o!`G`*VBv8&Mwd3c&B&6!tsm9Ux$b1LXV=~NDNhFuQxwlc zRcJh%6z?`4KDJHHZoJ9^3*~l!uP-WEQ?6lodh)DEfGqpS{L)`Nb27Ct>1;Az#2Hh< z4vRj%0nVs8ZInj1<=d)eYIP{LBwpB~9Gw<@S0bV0Xd?<7o1nZF^}q>$o}w(n#bY+o zzKX)+xVQ&Yp9oA{4R1GTNTJe4 zI+p@uggG}yU-0g6>`ns>P3ht!hr1EBUw+J&8?e2mdihvtkJ?qEjRcap#VW@+XywA( zPJrJzt$Y)47a{4C+$^cA_c+@`=^ZQ^0qtLDm)G#_VK4cS>IVjPuTKxDPr4Anhtt-X982!j-KbM0k{)hH@dAv~KO|bqd zdXmC#vVh%oin6d?)$AM_mmV4LdjQtLC=cX7ORO9bbmn2V+p+e>-R4XuUso~{c}y;+ zGIg^+`K4J)np$eCt$4S4S|25jg(k`fgquV#g2PFdouErZ-J{=5`!m#K3&z==cA7>T z6&-?P-;E7Nrq)Le%`yY2f+an8dN}g|WGs_6{;zSFn+o58Q{;?L+jX|4D;7iy`B{2< zlq(>;e2q9>1kF<%_u93H6OM+H+NmsKiis*XC8V*}`hN~LQiNcViNnsX_S{5^>jj>^|RM}PNucKQnaC}x#oODWVNJiyy}%H6OKq+_~ig^*Gt4-(k!W-NtjpsPmk z)yIt< zyZkNhX}TTJYcPnidr;MK zjbjNK;mX&aesw>UN42vY9I5PL8e}XG4H{I5Pvm8IP?&RyGPg*hAKzN8{9T;+d_|kM z-x-k<+$^C3#&^-omuSRjW(J)@Bw+KZ6>qG_{At)1Gt_|J#mzC{6MwMJg6`P z_uqbrc$?Ho$?*-$IDiolV_gmi*b@0dJ$d1Hw2+@wWeVS2tDA>R;>h*~xuyTT6qOLW zN=-VkYGqqU2Tao01x@mca?@V*vN_H@DyZoB=-oS=@ZuqxI{u{iE(11^A$zzXdpY7y z?A^Tuj{i^%-_plL;u+9z01~E@y|LCei6nI}L4c0g6Oq!jy4BZoX(<|@VEW-S7c8pw%Y`mNXAcL`Wosf4h68Q2pMU2;Awgc0aY3=D)CD)y$~N+F zftoO&vlqtxsxurdFJV_S0@3OqINe-diQ-9PEu&e{_lTp(Llw`s3cixNu2;s0jf9{> z&eh3&Wd9vuon7#3NluZvBD+Q+5b+FO|E0Q+c9~-#gXz?H7|@2+rO(#(UC+j0Nn%&r zK@Y$~C1tRdbmETWFK8Rl_vKvMpa0-s3MNMwWL3b!?%xzr&-u;|DE^zleo+Uy}Z%| z-&)o= z>y-Ikb*s3=y*tR=3Ev3C=7Y>{>;C2zNLk%?m#Lv8%kN4zSyDONRuarUR{9d_m~&tw zCZ;UL>LlSIQ!COb0uI9{mEC~-=y!?{MjLDDy{hHteD;#ZjyT1Aldd3Z&$7gsD{1j) zSNq4~C%b`h{g~`AfUt1=Uj5a4&x4~pO^8YWsr|N3`AzHT!g}w_0K7VT5ul7>|FJdR zddn$j&EX5ZxBMafex_j%ua8ZrCXD#IeGl!<&q1}f`*$Orx?lN*d%|>nWXGhDaBio_7|2#v?xQWz z=jON^?HWY@fA{O3{4b=ZI39nHLY6$>mp(4Sn`}iabzeP-v9-uiX3qze%N>50%>En# z*^4TgNfPdvd%SU;ew5^w-ORK8mil9~tS@&9`f})1UGNa8yNYGF^)_p@fkP||<@4Q1 zTfU07tXJ+l^@$@m4nHknvnCQBSK5Ec)T3QqvYD2jh%wlx+U^(^38Mj|IsipJxbYpoLgs$ z7$X+8&#C(^Ve##w#j6w z);6;LAw<3>v0Bblu$uBvERTXb2d1vPL~FT5{1e&F?@FZ)@9{;9_>fw8KigB8jdL_? z9PJ8*da~jyjSU%ZgX+CfO48Rq1;hn%;5wgw#fVuK4^%X4a`8-4W?ZCsb?{zu`^QUY zS|y2g4V1oX@H>0O-BFd*Z20Xdxc#&%Pln!DTw?AaM7f$S6TRq)X$NQJqzw!#;7m~S z38JKem>;H}rK8JUR1ai@aNdh3S9Vr1&rH@Vh_}@apF@t8vHJk5knyZ22o))fS5V>i zWY@MY#yFuegs4%l0P@FjIDhOk2fWPPA7>vuz6b8RpCl@C-n7yjHSnldSrvHx&wnyI z*AsknH!Vf^w>Mm1L(>`;ZAJp+IozK_txtb=5~^}JbGZ!kA9ZT`5^8H-AVvQ1-+MYS*%xi6%LjM1-)8j1coZ8 zZ^vUMOJFW=!~ddzrlfhX-<6XVOWw`b?*X`lYM%IOw|7>VUD||5GjOwio5BRcK`xQ% zwvMv*Y&X-JUv5gKAq7jbbD~7NpnlNPukqGQezJhRf}aIc=(0V34>v{{Ly4P}}fvML$HgcXX!p z5_x{IX%5jxDN?idVlIb?IMizX=p@7>F&0eQl9#Y?>_N`BHfZvf&2(;INh%}r?j$eO zMVX(=8GUxtN$Z}8y_D)k%esmj|G*FJL7}%C|JR+Te#Ak$(yjN|wem%eeQQOEVM%vQ zgGHbe( z)_^vxH?*4^7grn3IE8nQ7NCTT)d#djx{6iJEEGRv_Ej5I_pfSA%A$KLWChMxl{7`s zY%{{5bgn#A_Msgl=SjSa-*+5tS256>;=oR%;kt}>Wrz2J@>0kqve76QZj+9#qtm+j z>qouQk3nkvuL4{g&}QFhAMEMl31!xu-rv0wjX6{2-RjV-JIU$wMJg^>{S7MxwL|CO z|NA3!8q-+uF6wtqbiaAD%OfNKuuEoOym%Bg95Q8N(3wO%m6(q6(J`yrN{cDJkMz{{ zNO0nv;NgRX%!ohD{;lz%+&~n) z0ZoZs_Fb^4s&*3lb1FO_V?UUSEmmzFY={L)^BVbx72{z|f3Ne^$8Pn(;N`37?6$FD z$e!w4pgzD`)V_$}FV`$CKg+^3=rt*Q)*gL1;=v1+`TmbPF|OS4?(Z2XBsLR~SPh1* z4!5nDw0M13U*z#RN0J~z3hcIxd>UuHqF@7FKkUrH*=P`nY%S9Q%<$rYZ$0~?g zy2y(6SWHV+Kd30hLi9cdP=ca-u+zer5j8VXXEgCeS9jHhhXc}j>#iJ%vMqRtEb>Tb zBq{Tq2G&>x%%&Y@DF~XlSLpt{9nGmAX{<6BBO^~E9_{4-e@vj4iVD^$@Jysb-oE|f zp%1%!`8R`GeGZ65K1na=4S2BP0~OW--}M`R2#fSwXqScZ>Jf`Osl3 zOC81AS<@e%>cA$m^2^E^j}v+=p6K$~sYT|ujzE|1MO&0Y!5&hJ;<8(Cg0N&4fsXeU zxa+LN=P75tX0=Ipr>H2mWC4`NhBJ%fc|w3bc`~)^gI)heF{Q&dW0klmK@_0gLbKBC;6%p-+f zu={$o8*on824}vt{9%968Tm>0iqESVawi9>r3_0J%Dl?OAz~0m(NS{_km5W}nRc7D zW-p1mZcPWCm~aWaYBy4kbJShi^XGN69~4V!z;2Csd91QaT=(URwKD5ez0I5N>*z{9 zAl5|{dbP;Qxw-G!M*P5_?q7^RYm-Uc12s^OsO$i&oV|$zR^5`{Op-1(cBWjS5YXqD7B z4mdNI0qkN!wMkP{1^L54SeFuh1RTGPO%N}`3A}7F!?wB|kK_(24cJxwRg>G7r0xUN z>7@QhkyZurJvDatfx zlp>HvRlD-$j7LE@K{iA^Dgb_{piHd^PqsbDX`FHOQyq3lOKw)k^7?d@rr!4!zM6{Y z?(sx0v)!LtEsVR0v(=Y>?pucK7}9n)_2kVO?J!IAoEqnL0NV5fvmktV4!(Y!5UqN2 zJazr;9fVV*{(ruE@!4<0npI*S7rB{laVc8~wM$uo%R{K{qcj%Cb<-bNCpIX?TXlCT zTLeF6so3guW+}_am9MSj7->H9nkV;zn(91_?-Nv7WvP{i?)+w>J>`Tf*!qRm4}J$w zQU;biVJxEiI@+lfO_G}iC0ShkmZ#FEEmVk*cru=13!RI{DabRTJdQYqPQ7jXav*8} zuHm6}`HW1l2LYBb9=I1L(y?urEYH|_WCWr`;a_wU)mqOKmz7c0WX;)v{EG7J(X^3A5bYLcnrsglS$11N zIEx%a;Wy9EztXCtPD{TE&a{2m4Lv5unU2_kLJp4D{P8!(f3%^C^GS1Q$!L7B9Z<-V z{Eik5NUx807%)G?_Njvp<99d4bG2Kb+r*Z0wwQJnm@MmBpAX2>0Jou}oC6g^M7`A* zL#omy$uSa4CetE%bV?rs86I_1`W;E;zTU@l>Olz+KjBQHFLCeyEfe%XAa}bKGR*|w zc)oq~M^-`H?BcCMp4V=AWY?UHx`M|4%-Ffpj0Q6KVx3hP*w!zZlZ5m%eWrKxBMv1+ZTR|kH z%*`$hW1k^iO{XdP1e>f-Vckm2=cBwcnRr1s=)%Gt7@_r-9%EWs0mUqOE+fFm#iev4jPYrA?<2v+>V&e(dtGn!qU)B0PS^2(~ zb@pm7AAX&|G5}3|6=3PJ6*w8IZqYGp&FuLqw2fkwXGxv5un6QZGUhvukz#sBA04n+ z(g`aw99$AhsUm7kxS$2LgPZWIboc#1<;sIG#Dy?z(D*PCKqixUmT3r3z+xmu zTG2(9)ui<}{VRY}xr4#;#-gKu&yp6G3 zx^p0ykR{CiEvN*+U%OZ#luqSpad(=_yZ)W}dXRx~lgQf-A~qt8Tt^?EG+@+K_IbJL z-PFqW`Vu9*HqiY1A91IvsU9;*wkp?EJPuL>hbhhx={Q}X3$SHfhs%x68*v<%M92cM zl{SCqtW#B?haaa=dYHrzk2I0Hv<+AssLhYp#GrfYV=<^{5*_gR+4jp|$JB&Nss7yYI$yt$zOLfiP!BNoQc)*`8O3IWnQRzHD<=6||F(WekKj z&6LG$RuD-_x%GEmda~Nc!-1vB?-*EI@vryqmOe32NzoT=r;WeoPa~8k=k3`X5T=js zSRj(H;kltKGY)DV>+`h<$dR3q-wUZQIv(5k1{gp(*Kw8SbY*_`&GzG)5eM~F{_~2W z_lzu}cKL(BVYH5Pd-1X~roCBuMX|9y6=t>Ko%*K?meF45-w)Akbb1eeZ%yQ0DqdC& z+I?BqJh>SW+(=ZJQUNsMkqk*ya5t)V0p}plv<5W|Q=i4BA{0HTu6O?UtS-Cx_Nb@) z%xSK~$jp~cKpBGPBc}hfu=1U3G{8qWGLUz^@Z);C%WA^@0arlI!ffm58U-?5qvz?- z6Tw^(n6f`0#q8%;TRmxc{;qTTgE{w2DG92@+Cjuka8hIIjO9x09Bn*}%{Lltk$c{{ z01ktkaw;Ph5-#j`90@y-OFJu1^qAA#OX>DacwBz*ndn+UaOiIo*)>El2`$S*K7{O} z-rcM|4cYy_M-HDjE#dOMYa<4105EtcMeC+_HAY|Il0F<+hGJn0_>#njKhH#st=jzS`S*oN5N=Z~^5e{xh*5S2B|MuMGJuXvb^3_Wmi_B9q z=b{T}5Mfa2V1l5&vNTze|4+SmA~9KK)=?gYRP)dgKvL{eA)Ey z6~X;Zk$KIcf)unkRjWpR!h2?{>QIJ0_Rktk-GUC6t8rc!zt3}@{Ky1uWpF`Apob?E z+KVhsR7b^&a^LmIrj01n2Y~!llB~gu&5QrpEMKRRO7T*@#eY^*&ifQa6bK95r=~4)=k@YG zd-;e&6oDn9%2MI4yjHq(O@-y7lMgtX# z2Dj9O|73qrk}aP)k#;mI-{JULwk0h7WV}t@*$L?5*F5LAT68+#bBmcA`q}lbDEq_o z5@`6g9$<==({Cq08`m6ZQDX24%mw>bN^RIjv zSESveg8I7+{Qnkh_AAS&>kN5+T>a)4NCFkZ1Vb_3#2p)O{*iP3I#WWqyb=~wZhH5s zbNLA@@*lw8*d`|r`0?^)%yt#H{qlhPgd}|*tcI+Su~y|ZA*6DBg=yDR9n~a2$Ww&4 zNV@!z^(&=2o^bZ$Gu`OD$fIg`^Jl{&5s`4jgD_}xM&%uSoV@9qkN)!S*56%7MH3sa zL!;N!?A^~1%XewtEOuj`W8n&94;hkGQP{#n`DB^{1Dbte@bTRA$m|^O4?w^(?_GS% z@t@nL;Z)sa$Eg^y+@+IlbhRkNQ%dDun{m0}vUp{p6CO63e~YgEh%qoQFe)G|h)KYI z|CY)+1FNQug{nxGkzpIUq)tVd!hpk&*KvE* z(Dbt}&Cg@5#h*=D7~Dg+*L}AOPtyywkcLIn1IAKK>^2dtMc|65K@xCVsfQ?XkTYUm z@2D^}ce<@#9YO>=2_u!^yK#=cW9yc?8Q9rJOGnEs6L8{6`91Cyy7}G7Z2*0E#(KL? zy$IUx#$CEt4733_c9vB%Fxf6AqV>_` zOdR9AT4gY0aIo@N$Qd8?3%YyNL$_J`9K3s`>(U*i7cyhX8PDGkx}Tx7 zz8Akoe~Elc>o`?9_WZN$*nHp9XJ-#&h(W8MMvk4E`|JBLZ%40@AMV*kEx1k17x~)+ zKo?ng$wg0Vnl@qaOfs8lRvlg19V;GE9&X+^Glo}=DZ3HwlMMfzL`f5$IpuIfHdW^V zkH0j&H6`-4&W#usK0}&ZH@VWWS7yyI63nT2dX{PtTp6pGh)j9<;7OQGNDE99Pd)7? zRIbB2+id~%ub=uRDtNo&?v2@a#ot+PGALc$3O~)x?-^Fs8>WHNylo6~2gM!bV+ZgE zskp$qu{*iX>%S}weY(IMCVlscVy33<$%4II)~DFb22)6>hX1wwV7b%LSjL1BRG!<` z`JVgc!W3*OSV>!?jt~=a+8dwy9yX<=tkIlbpz^?$%qzjPAVx~@0CO`u8HmEyqEDbx z(G5})RLb|D&%`iGP(z?@BD5-8J2$tk6j;-26BZnaSY@d33N}uwhfUF~_Qu3j^nGg_ z)QcI50g`IDX&U6Ixxx0x(TLfV7SkYI5CmU()|z*fH3WpP++7MmlK^p=b>uX5_rOtS zqypv6sBwoNsQ3k*y|?@wxY0Y7G85J71Js-vR>Zb-_j8UONh_~`b#|3)&H@|)ZHVV2 zYs5^`pNX2fdD3+R^5IxWv%Q?!%=K+-w|G6@ddAC7Gf6!?pCs~ms^o~6u;>YQR_%vk zVj)qOcW*ASjFe8pQY5EnO@;fpfS|9gYl+5;e#Mcc`SL34h0pSY zmOAj(>ruxxV$o3|(KLeD?wy(t?dZcg?;16_%6M}0%^gq~c!tl_&=eJf8Q}$-y>y4L zXJuMCTY`u5BlB3sftfOIMz$>T^5I0m%hk;mvC%XZX>HnWSi&@j+OQHDG!8zsr}dd- z$CkERYkJ#Cww;{nRR@0K#4^LBF+8(*(z)sqV~uw(*wb#uWpH%SYZ_eU3{#AO1KVH= z8YSBYh}f8dP+gn6qWrurOQ5b|HJ&+gP3!{dQ!M4=KCR$=#zjzw&Kk`vPArvbe%PYkDdgMNd>rtIyd#IW;CT zRY9z7qMMOMK4zgm%lGy?hXOV^^dxg^y_{FBh%zoNCzdGOjrY}r>C&TvWHa`mgs&^E z(+#CM_E3t-q9|%^Tu3hGz^&UA$@;sNHa=HZjgYmJQA+$pi1vcAU8AeO)Ncp5!$-Jq zj;oVKtYhJ6LV^9aKG(L&Ee~%V<&@forC<4eo--$x3vw?#0)Q$*)QY3|>(bpMB@5?3 zPso8Ol{JHTthMomO3995J%23o$NgPzE^75x%_XIjL z-%Z7W9D(CBjmmPbU>>4mrJCuIauKFM{5C9Q+gf(qyV?b{StWb1&o(H%@oRMk@1aoHxepbT?Xgx_i6nby%vPG<*$qcJ=zMTY2)l=WM=U zL_Kk#AkDRCiezZGvTllGz}lkT5eF*C$zEotW}k8F?KPBk&sb)tM@gAWiIy8CHlr#7 z$~EVppYyqUz#8whc<%j23wLL`V@{Bn+r7O#HuowoW4HS-2pr%>TK8@S7@CSeQwd!> zVb?|o(4rzpNHE?6;c?R@^tdrGDIOSeCX8e`eYy1Y-MY1i3kBON)u_A3oQ8`NrQvp_ z<2{_n(t8Y_Y(4c#J6XEB$0fTMC9)v{!}lVjy|n9L)S;zW|MN^3Q$Ic}9Eu``Qf-u) zUZLwSRe`*WpPF1-jRPPaIwACmchtG`Hc(=t$lrZn*Zj{P)S7tQH;kY-OpR1gB zSIslByZsBy|K&JOY~MAi9^D|_p3)-smF2O#jgR96?sYHVqJhDas}?+W2yt(`-Q;g9 z^A>WGpjb?cm4TD8O;wssxBSUvyMd09Tc1M_y(M9AIuoJ^KT6SMuBV^dGM~vYlwR=!xy+EiH}eawpym(vObk!YZ@0M2H9Z?p06`0%qvDnbXAVuTv?Ga@ z(O~!J&&w4;EI_+-HTp=xt9evChSQdFdsMdcT=pilU;sD%KRYzHCn;fMptaYme<;Ju z^;+K4e#kgw)>JXkjWX*^N2w!_Jcp(>L>*)xtIy3dRo|%~4KnPMd$aDu_mtw3uMctp zEwKG#DAN74TiQ3Pg~FN7&$lfS-uqfo2;$d^>~3FW#m4G5ofSEf(O5in<@}-cpZfRg z7>nOyPDjnUEv~$oD?C94W}}BL38Rb7ou9K;-#@%;|MW|4g?z>?_q}S0F-ya*TI{@B z4hZ&LJ#+YgX;_$zvW-U^XEyv5KxML2weiB0LuUvG94(+MJl)Rbe$gdHVad=%LnTYK z89>cOW{Stz;I2tq?N$R)y)UeifeH}6$&cNfioUfv_9*e&tr2R@168xe)JP`NYWkv z+K$_J4z8(K6cG9z?YaqGTz#0tVllt`3E^dp``REE+OeX+)Na?U?YXw06;$QgJeMJ3 z1S~7t(m>t0nqN)N&t$UAe7=0X-H~jib@I3I$F~<@{Lq_UA1cAyg=-a49IZ+EEGXHzh3!mI7w+BMhqpC}(W=`NEKOfps(~C4RbHD(e&4Hm^g`yuttD(s;V!`t zRxszS$fmDl>l3X?(^Pc*aM-tQ=A*MWb$Q96Qvm4RNcG}+*}^yks%eTO{a8%(ff^kz zHu0|AG3sgJ=Iw?cGZ#9VNF)@>sZt*bpqIfXxfn#Kcc>pdV&z>T5 z;`RCZT=dq1R^ZEP^=F`GyK;~dQTHX#YcC@vs{1jwsefAAcL}e}eQvDKLApmeZY1)9 zauU|Q9g6kvTt3L!_BMET=&#m_@pI8>)EZ}>vdXB<^&Yd8eZ4jQGT3N2f*W}#S6*`z zl_}Kr-@Erk*# zkZC7B(~U2mTf{UAq_NLvd@mi<@a|+VLtY_~!K#X4$dD?mSGP7y;j}uIFUMs(aZvNp zlFwE+nE-pEj`iudv`GHsw3-vFo?-bmo+o5n+?~I4JK+$s)X=4A>P6n!Me#WRtm#3& z&tKgUtHF5=-BH@U*EUHB$Rzgr1c;}RygL2Oe+o1v-B9+M>j=jSY}$t*#E)!agH8J@ zJ!LY zNSuaQ>EH0^>+pdKn$rfo;`t#@K(77l{%e@@2t<*&%f1n9EeMYko23tF_VPOsV{B}q zqpy)Kb5Z9o}IN9!_updb!^?l+7IEMeKDx!;+H!4N6UDrCod)EO!BmCK*$P$8^L&~Wmf(U zlDl&3MBuan`oNa2wPv5bq*pWF{VAwTHTHU(?O0v(=Ub=E z!z|D3;_DPzH|o!0nxewFpq8-O>n5%d+)9#zg1p8rb)!(-c7)yGnT1a`3I*3nPdsw{ z5wNZXyZ4=%;vu@elz0&F3RYrW=xxgWchjiCTG{<=rVP#b?2@*-)nZTq)Xg^IUt>rF zlEoig4(nJl@bKH;^3wO}H2>t<#^JRO2ZoExuTLG_?)a*!D7RGk-HR*0&aiVYy$cV_ zi;Hb{MtwNc_&e%C-d5_dLko?B?jVDam!o zt%cw%S4`J7{(cTky!t23=HB-I8vJPs+dt3plmGp+PCF{%{u2D##C}c=Y%W?t>$UQ|XWgA|S0=QWe_)xk@ZrszQ9|%3yAK*z|D>qTk0v<7 zEQv5Ld7pZu-TUJ#&}JkhREKWx$wB5BgDtPDAori3{C<9wqd%a)rXP!{{ipu(f&abX zWUH;TR+anxE_N5e%ii5c-E}kh2TCRRL}&M~ z^G3_Xbc^3Fb={3(4HW7>tXtkVM>zV^EI2uXV%nwnPnda5d49nB?=0wM0M4?&L{84Z znR}wHRt`6?#c z+G4v)(Z^%1ID_r&S$noCw9U^NRBmkwnhrf@{sZx-=R^9s`#*rmpMS%96xoccTD$MU zuWzF9&+7DO`z4LZUR_EMHk5NK;%w1*I$85$z(v&2@t)`7$K3Qz|BK#J-t@e;k z)*xLFw0J}HzOBNOTzMtR)P=}Lz=j2k*|)cIr;Xv(`FpoMbZ-5=jWuLy^Q-bm{Or{u zu0?xHj!6yS9n~fKZ{IO%;v;yD@1DHi`aR{A9MRu> z{R1#2V72_-v8|I+KaY~m*B*1!Z8H6S>9jnk=gdklOdjF%>sNA`j6txj9a}~>+2#~) z{>6yT#TRlq&-?mh)|Q>CAiInDLv$a|)0@=y)BTW_E|T}J}0Wm>7jpdT!1n! zz8A?m0&vxtyPlVPso7LoPfqpTk+SNQDr5ckdWuYSxEXX(#<}4mRPW^JXOdc{*1q4D zWOwHN32f6>JbP(oMqy|2)9?9mdT+n8A!=(srjP3@_%uXp`qX0v@pg9YTCb6JwEK71 zVwxE4D*^!F6Pv)pMXv~gX}$#K{3g#0wU zU^eRp`st@h)wElIMX!2qU6g!Rh3E->mR#k^dTu}Mp-R5yub35y@6eUq{dD|gFEO_J zeevF7yih%L74T^%`}edfU(5pEuGH9Xdh?Z$-*k9iW=kp>ZyTfOZU5jMO;x|*jA5S> zbpLfjxh(&85Bp3N(U-3iWt~I&${}_Sw}tmBD5RVkB(FE6VF%GRC?C$dmJ=w;6F*zEk3Vg zapD2*`DTmguyR=1QY&cvUHEockMSG$Q!Vy$A$v_%CSy3N?i%Duj>f+Ko-j99TgI?< zpXveI=N}B)d!__=Dj^JO6qHc*s^a6seX@#adZPuh^1CnHGO!9h0{?t{WpfveW z@r$(73$DzcpbycS6;29j1tG;RDhp|p<0yxwKF=uKB0vSg=|-Ac+6>7xi;mWWK3e#? zaQ)l*aR@Q{e!i}bzs|x^Bm$E7CZF=*8pd#51px3BFTU*0QEAHj*2K56aQT`!Anvr`oFl zW!62(yDNeJ7LtfyGn4yg7Pw_w3J!I6e)`9N-HEzP@~sGem0;EVbHtX=nA1Pv^>gms zC8BPetf!ry=T?+#xaSQ|Erkz%<62&J>Y70Ghg;;Ea)xiCff?UomA9N<`Y@}CwBNmv z!UuahheF*V7#Uw)d7AJ5hH`pRt60OmEo3lcKN=PL6d88u&qk{j#JHNP=lg(wSpp{9 zZM2}CkZpvDX-Cxe51}o*j)#{78#0WHeZui>=$0ZgMRy&d&1Q&+Hq_K1gP~UHO$?YI zwPoU(a&pSVmDs2I|24*bzxVBT*}~BsZHh{+=zrI>t6Td9jZ6h&;zcX#;i*c+4Hb;-T!3MnpEw=6#xUi;x#_0yx^{DZ;+(G3eJ ztG~t}N-0zJh+nX=%{BE3G97hM>lXuQ1gLfH856hd`_*e|j+`Z?I_5<_%ud=?wH7;@`ErdHvwh>{Pf7RkL1`Sa3J^%j zm?$`-@axx3b9BxvfE^;swBE#C&QoSf?AW2QILPqIMr*XWkGy%kiG!0|alJg}%Ic$N z$Kc7}n`7q}wu6JipkL_4Gt<;Mrq48`~jf@*R$liMn1#!(a> z{Q-u$Q^1oDQZt<5jW(iz`$GCc`rA-V9!Bb#+h|}FUyEpvRTK<}(4-p=YMB;pGuAVO z3i%EZlZa4-YJQ0(9i%vGNY0 zgA7pG2u-5y6JXYyxOz^+Mj>YgW~}@yjrDmoQwMOaBrQ|Rnwhz6^5`@Y;so408qA9c zMbNx)z_BRHwgAg?zg$$2EAp8&gy+g;=SB+SG;Am_D8B(_A-S%JXCFd_#;p+NAs&~g z5i{n;0lCJOO?-7!Oj01pHKISFN#Aro8e|U4nm~~I$sM8I<#cb9D*{Zx@(YmtNb`sZ zB(I4MRFI`dSw*>#`)M_%FwNXZHzE}18Y!eA`+VF{u1I|fRxm^gqKCChlYN;$RJ1W2 z1BRy~Nlu8u7Rh^uwwTgYE|R8An6-v!wiDCARs$41yu# z`M6c{Mk7t1;K2gv@teVY>bnzPDG)q%LJJ3o24QZ4Kp>e)IEkDb0nN;`z_iSJd^ z?%u}sT3&UhQa=Ba<+C}_Z)_#Pm*0;z{!JQ=Y5&)=SNybQ%U@9BwMLBrJ*92e4@GX* zx$bd&$ZOw_`_tF&J}`{FS3Q`M)EybS^qZ*^{TH{%Am^aJp8BJGw-}lG!vC&gOaCP) zU~Vv~u2mnof9-A(>)WZHyAOZ;+vq*Lk%T_1?76AC1}KzQSIo+G(42!qCUAv6dPQk) zEi#*WK9PTuJPsOQ6Eff=K+QySnQl(R6S_BMbDMg^)Kd;2jpyy@Gr$7^vW&?l7Tn>sBF%fC%(U_nkVwq)P zd9(;Aj@Z}%Dvm%TlbRvvI!y5kb@0Fm2PDkUHKvr!rG*BF1(U^dP)%Ap00G2HW}3)6 zUyG<5un8QBgsdb*Esto<3?VH^QKlvZR2$|f@n zn_(=A05}N<&h^HTXifo81(t<+G>C>AfD^aJdw7RXB6tx~5c0qix_y+sN5o_yk)#$u zEQG`Rv9ytr#E7O^xFv~>1ChurqS+>W=})RQ*Y#6S^b&30JEkbE)WO6= z*yE6kal#=7CLH_*W@h>~XSH%#kwgn;D`1gl1h1OUVVS~7Ibl>^Gti4ruxPL&0G`Hc z=YrWx*yet+imMdxB}kEQlFM$QmNXR(P$H(S4MOxOAoI{@Y58s%7KfKinW)vVpe)@1 z9t{B+Xah+HEM>ctra?mn`n;oHln`>=96Pp)6Bp4}LP2O7n;M|nqQM3LdRS8%kXTm_ zS27O}EFwti(IIml6b!(rG95?C3Du6n6ol~nCVbrBEGRdKkC0L;ifAgr(tuPf%e@%o zEfEmK%ia+dQ6`i9;C^rp@(GRN3*};`*oYhiA`i>2sw7hIn1+0IYo&RmgcBEo#|_(} zP$!t~9O-t)j$m_oB(La^mQ)=PI~Pr0qepRrJZc2n&#hk_Uxm#rK?-e1q^xH)G&;_h zWh#b}3X1}Wxy6A(H-Ku1BDvUv$;RUbC#572 zB;i20etcxVvkhWY(oeRM;){xXW`QY`JTtq^O2Wl~h$8Gjo{yM5KC~%LjMLZ3U4CXXZhx8%RHifiSRpGG%CUBThW5P=W!ht?KAQsSZ zY_6Z6pQ^;j5|99`()=0=Dx4gq`U$3lkSPw88eIh%(nyL%GOc2qdSS-G>CL?JiBoTU zEO2oe7BSJ%d*Y#}qT76|pxr1m2AqdtK}Y~3yW3Clz6dqqdyr$-Vpe8Wf)udXM%wD? zh+V=CiJ)Hp92+kYr32NYU2(%4xPV_AumVjZ*I066>7E4$?a+u$3CprYjYJCR^jXD} zZzGGSKy3u33`(YlBCw3!LJ?J(-*?xUHk`8x7#Lt$TZB|1VI**WGYsIZ9w5S_sJLNM z{aGPXEEb#NxJ)}vD1gRpEg-P*5pyuaIKR@^%h>`Yg#n70TV{nG9^ehXHqzEEAq@1B zLJ5rt)YCn6bOc2kY}0IjvfcgAV?J7r06m@nO_npL=8*c$6x}5AaOf)Y{)7CPANgO3pg4EV77uUoJ5%L zO>#A~NJvK^dE60#sYuF3N`U?}TudolMuWHIsK;>H*(hxgo3(3Bv~^0%g7?>uO;_Xa z7(i=jMw32=6PlDrYX1p9_ZB|L5#1WPQ;XMH!w3F}S>Dtki5($b6@U_%NNySz) zEOJ&Ri>J-vP#{rsReTdKq{YgQUmOMFl4yk2sM8!wRcO*hY7}f( zyo_Ymy@@i95;3F#Jtj<(EIuT$Wj5~ads^blgc7rGb}s0fWc*xbMP^7p;fUq40gm&2qlEgd`-oNVyCzI zG2zreVf<3}B#;{;nVAISfChNfXz8+4PjZQDEEVPLU6zDp2&dtu1HASkXMJ`Drc|IwNox zp$BWo2XfhilS%P_=u*l1*)XC=iDjk9*6?U8iQE1jOCAtRS;m3H0~`)M0KNi4@{6O9 zuTTsw)lWLI|NbatQ!$Ta%8tQK1mvxbM~JkwH;c(s@94Nvspfd2U41=R=BT7ZvD9fw zS5ib{!Pp9T9Eyq}uJy2U2_n+TwLl@EBO;IwF2Yo4KnH15gs%Zg$S*T9W=9jMrvy_X zDK_Kc`FU#yc3f2nml|aeEI@Y+bb9$2pAWzUN^R4hFZMli?W zkv2>iq4DD50YPr%0Gq4OOG3@RZ>$_3Z)3-G=+Y+pEh#iG(0mb8l{eM2%# zI0hq-niw-dBJBdHT|@?lu#y6WgR6l;3V{@-J@D}o$!MG^p~pl*rj|oUglUW8u{6AE z6Nx>Ypxvb=5ieJ97~SY3+bpp_+EK(M3x%9|K?}4ewyTGIQc}f9%d{Mp++Gz}fKSGS zCM6}Ed_R0ZvaNrl6F8ETAROy?l23DqvBs{|Nd&PLz#*YyHFAwp6~Cg`%4z4?k$S~r z5~nn|XinaHeo`WP)C-SQ%9QKJpnUP81`CzqTTt+TUD@VP6>a3F&5LS@& zBIw2FD43-9Vg1;L^*5iEALDLI3nU?2k|i3II@XX-0mc*?`wZoL@h>nW3o0xVgeBNr zio!Z`6n8+CV$o2Xp7qQ>2sPCil~-ioS}<$HiFa>i`b0UoyjL+%lyJ{+Oycg;F1-A` z9g>i}WAoSp`T6}B%gswPAed9>{GHw`w&`K4w8$|HMK4q7!ng`JP zU3!cXDnAbdJ@U~XeiNtqPZ$w>>FM&$T{+?}9_kZIAoVYiSl!}4Z|!9iBMXXP913Q$xy(L-~9WAMMZlv0&_XADKi6= zqrq<8N^YJ;GW9aYB36G~iXZz66geI^yJ;#=EfD(ttn|r{*7tFZ%%!dy*S=nR?P^=) zRe~Qm;AYnifyfyt$c;Wen)&<`WBt~iz+``@t0CEEj)v>(1l0M!;aiyiMYqd;*MzKl z+242Hy|wY@%!B9F)3l8R(#qY5^UX#vb$U=}y6e6!LCx&~?6v4Acb0oqore~riy$S9 zRFgqZlRfpC9NGK@Ah|v+NETdCFTgBx3z0mi?4>cmO66K4(tUTjNX}$VSeOp zs`yN#;fiB!6^>@y?lxLdST|IPYEP>8aAtaPjk@yO{BW?&RMtf1$GqWX9~-92Fh{Y^ z!YpR^O{Ec;h%kcA_&V{RwR&-S-jNo4v3UU79v!$3#IfwSq-s;(w?|eR4_26$_SI)y z==t7yg?)9G&BHU743hyJg{~n--;{8=Fb8mIr85E8W<5o|5tkukf-q`+GiZ=sKwBq1 zZM7paZ`!oA8Cv2ocYNEBo;ES2@C3!ogK_#|C60B1X<9m_)Xlg*F7i zXXW@D`rJ7;`BSmy?PdMdQc(&@MUmP5b6@E3>u1wbS98f>TTURm(N`iTY6`X7Zy|{? zTc0Q>sCuRP`ZjW+I(00^ir*I@`-`;7jZbH$Dj$%wPfJTn9eeN5ci^oNobTX%&!_@2 zzxJ%LNChn(IU|F1fk4uVYij3*{|?dKeH~gWZY#6Ec8>E4py)8>p;C<669h@ie^15H z`#&D6{pUjut1u5(I(HmUfjLV+zU3w~EV>g1!~>oF@|9jVhP8P9*4$I_@j&lSTGhlnBe|VZkj!qm( zlu&~6+f1|}tIS)EMI&uXW{k`t{1Yo=!8j29o}aCW^fNJRD|lX&d^x$c-(Y*qfIc%! znbw>#HJUaO$!>KG=n1r-ia5rVNXx$Pf+B-frbZ%_2gb30n$H4;3)6#~W;d%{I&Oyc zvgK_*S8(L+4(q$(l%i){14q@oMzUT$i<-g@wa?XA6-r8-E!r?DU5b7NoiR`sOE@FH zA913ks>4gn+*UqhSVFT7AD-iqv?4@x8O+Lq@XME<2fN8VUFEu=nji{0=z{q}$fM91 zfmf>!Ea>hhuD?lm(bIVb{Y<^7Zn4aIWwE-t?3Jk(DbBlh!LBD&oT8E5iE<9vaDHPN zR+CL{haeqPzuVrhu=}3Y&DFtEg{0S2oQ*-BdY0FC1B_ z@siiq>*p7>-&l6frZSNHa9XunuB~b1izF+Y#$4O) zzfR^&Ymj&zR?bIq9SRC6GQRi=*}Il1cya0`;p%I)V|&f6Q*1uGZnWzz*KQ>3Roym? z;yVA}TndrgX_E+b*HvFBXz*+mN@YwxDa$+97a!$FcHfF_oVq%K0A_}_qU$yL3n_l% zCWPIOx>hJBME`ctkdCD1}@9&FyxSlZ}{*N$XvmRg!Xf89v+D%`iF`vCaQ|v zjdRu9Oe7nU8O1@yh|jT|Nr~ZfdU`R{HOBM{&ID$S&z*#EXnAlno!MPqrbX|el~?9c z{kIu4Fk&bO^E@YHd*_RKJwMMJ6B8g2v#peCm?xY_^Otu0+u}mJ!US^-oncIWEswI; zF8`r<8$V^}NipPBX=VXcT`_vjmxkwf9hgkbf?4+e7#)~fx_JOMao|&NDmR>Kj=fR;T z>aiKc#f3GWx_=JV+&pQ*6<`QeFTCtqzCPG2FTVf}qse8Q1 z+ppd~b>JJU#?`m?%wCo%A7NV9d+moD`7}^OUA}bYyo1o=^=lXJnl|-%Bl`k}XRDI+ zjUci%KCSflQHwznTOHs|h%L6SLG{e3=tJg87e5~GGE9U1iGO`*sH-jiTWpJk{0>Lu z7Vg1wnw9p7Hyc8&?DsXi4^ypSeD@3ZN%T@sE$}cNt3Do#*%=IS2-Yo^t8)Fadum)w z{PJuuIlL(#cr|>Zn$^7iadNOJa=+Nk&Hl!Xsa{pXyc1Nk>TMCGlB}%PE4nz59`0!! z5=RLIIPFgas>vy+E}S?g`{?nr$O!5_U)!JYOD_II#Yl0Ns{Ol^ZL)O$MMe440?(r< zTV-~%!8Un%8g0IH=iB_k$A|Br50~bk-!H3}cKl649?g_E9cw9m7J4nIw#?Awi zxbcM(k811NAy+nbUjCTwRo#^w11~kbe@lD^A9?x|Z9axEp4VHpq|qgO%fg#E&;C)v z+c6m-w1qo%CLxHPL%zut_171TM$FkP@nsvkkRnWwfVn=*zcTXV%zE;j4-ea{Lm!Bu z?MlAzH-*+?HWs_@5E?+FH=m>LJUf{qoIFd;p#GQV{)XFkO^I1%KiB zxm!uwDv#O4i)iOi7xpDw7K|1oxqTf+AAKf*v#mqcSczx4kL=Q~t3ZA{vkOY?Umt#+ z2W4Hh{u-JwY^#Ca*4s7x1<;&ixAzg`nq6ttT9?P2=>o~e*v0qEAAfA&mkz0{S|rDx zl~_HhZTPdw8(eyH*Y`r%(#EB#r|DN*=fasa!e`ezB-&rkUEx?PzX{EZXK&dwoy97; z37~H8IXLsNH@+AU&N@U-c;s@rb?w)!A?zJz*THqY4}VYR+oE+-wenqqWw%j}YRU#O ztW*{CCKvi3ME|jf+h1;|uj)rxY{bc)Hn1Too*fH&jfHtJ(hdZ4jxinDeAOCtVk?jVTF|^Km(SBXcVJvp2$%@zU>2>!*v| zwY4wbzE6nf-EEo}7@M;4yRlvV!TmDl+v4EI`I%D+@!B}V6{Fw99^wN*Wj!VfYq0&Y z65A8PhoJdj=%_dU z`fx6JjTC=wlavG|_SMhVVoB8;QXP{NsAHJjf$ zCZ#9Kl&g7tax-{vf2dc+-(`!X9{$@@e%iS%_R#5yu|`ros4M5)AB$UMO%2V>+2f_B z4?6GlM|R$H-}@S~=gU}EgpHg{~Cm=oK!ZJRr`ZDVKa-1ED){#EzkR^9c` zy}G-;u73PhbxZ2o3UW#4F=*&HlN!>%aU%Zn@BbeTi+o?Pg2iDjQL12ySYp#P31MPT zQN@|UOJeEe6w=TXAj~XfWfAH%^tiB^MX0JNsm(nMhlTpTuTd)S)2)t z>3@0EG6d}Yr}tUC99-BII!A7q>1O2!7?L^6&Rx?Y0v%}>xR4}7q!4@hw2}&09b^e3 z87SCJEemG7iJN<%2od%X1_sIM@h2r^TL5`rX85;(|26#&#F77x|NpjPQJsrLMWqk* zi!eprP#9zxzl)AN#XT5Az^f>1nkn2OD43=I4i*L|E+fHX3kOCju^lN;yGrwm94CX9 zEi*^1z6Ohm|DWEA@Zv}Xr7KF6G1sQdE!H_qydt5fMWOf%smiF~8Z%BHo}|~W65MjO zl*OgWFg%N3*rNaQ^nd>Uj_gh~Q)2y?iz{%87XRPH|8X9k{|vnN|0oO$s;IhVSrKZq zT?M|9GS5;)JQoNxIRtD@W@&3{2DT)DL5(Rz7{X_;w_K#GELt@Oo7=)Oz$huBG3nW2G8Y2a`$s!eFuTnC~VA&)xGCOEI>U9B17WEaGC6DDq^x~o) z!ir2h26b{fQl$7&Dg#5J#;UC?mpSeH|%KNKbG!m9g@pQgJOKu{>dm4MwEl?W@H{i9zA@hR<>m7OvpvI)jR1I_R}`R zf&`WKx3jBziZe-RONlYXF}m1fQNAn(#?gXtc`6b6N62f1b(E@>`B$R9-+7*%n{+Ae zDeAE~3w(SO1Kam_*S~u098EvO$_1W1HoeoChaiZK7e0{4krtdOQl1)E3_lbazihrr z!s5c2wwP%kw6-(MEn?z~BD;yRECPrmQN|9naiQ|?sXKc+Ewso>RH6r7bCS4kRu2e= zSUL`vNqA{dW%TF>B;sa-vC}FYOl6YR^ctvU_jrvZ%4M_&$Om{S$s$ykrfy2|4tIJg z5YafP%5(=2VVqaS3iY8wY$WectxJj{m54+aqPKy9$__=?n4@zK!8!T;5oN6BK0U|K zp~r*(+VdAkVB?_S&yP6y8kiJF7Vr!rZ13uzd4r*-MQVeZ*UZ6X8%_FlO*&U@*<1o? z1@{{~UFC14Rn3LdU1>%&Q16N#-5}^W7Rs(>Bjbi%gGez6w-qe;vhuz)sv!`8UVJGV zsqYh~)B8FGKIZ&IqtR>*`sqU4#J-IGh;5KVxMGQpmyDssZ3q zN|{j9%S1Z9@lR1PE&{9|!{5{J$)5$$mQXU?6)BYwk_3*b^WD@m$a{rw@?r>%rV-Lf ztsuFTx~;nz2AOvHRck`;R^*Xa2XFIwdyZgoGN$a@o5QvTT`ZoM<4S>+CN~+9Rgjmx z<&qjOUiW7E2iS}D6jyY5OM*&pw>sFCpW4ga-qDs~%X5(VFodydtb9pxcR8P>41oJI zRZ(d!z`aHrrr)&4I-k;mi-Ga&VHOu^yk}nPwgw{;No&r z=ub>RB)=oT^7FEP1sF+3xcyq?P3|fmim^aAQ?6*8kO})esHSRgcj|d-xcpSL`mO3k z5)EUqf|XQOzA2{JD+Z&wRyR zIEyo*^-#t2Y-W@{ij2%Naud3&cT|k9q+MT&NKhLOTekxAli;VF=Msauoxj-6dn_Xv zin$C)st#u4a-inn^ae{(iIU6 zW!^cSxn&fMjn`-|wnRf!?d<#Ytw%N|f-7?g(^A1OZ0!pa8qwoLeq{bdc;CUiNf7m5 z8t%Eydwj|%R*cRvwgInt7LkxWKHJ2;zcvz3j&Or)W=ZKdd=QC3KDwhw)@T>F|LDI= z_{^b&LVniXDfUB6xDkDVqB5c=^TJafH!7<7t_3!lAw;u1M|0|67z3=Qv#)0&=MM}s zm`A$97wpv?vM)l5?H9DO#_Q{9pn?D1X4*Z%qMhOT+8Mo%Ti&^&C=;1y;RO+y{d9T3 zsgCS_AZ%&X+Q*NBOz~}hSJZHX0e4p)s|fc;Xx2M3xrb z=Tbb0WJ3RNas3O{&0VBY^X>Li`v*%Z!TBGuN===0X~Tm3no2iz10K5kH9~PYyi$uu zC66-ezs?pNxyv5Vb2+~G<$KBRL-(^<8hIO6zN4>84M__F9~qvTr2)oqPC?m~dSI8( zbA3(TojfP7qUNm5+}z)?rh1WU9+wFrmr;;rl9{NhjismeRJwPo?Bts*D$M&L9So)^q=~iyyjz2^9l-obVMt2j$ zzc;#W|CST3h-0wiB={a1%$2<*XH}L2@(CHEM!r?e+lI45?Yir{O~G0xSCEl&6Yrab zpro#bIF`9zz!Q|E;$ks9drs;;F<_LIfjJgNWdpkGNS_7L`nabDR`{H-hdd?=yh9oy zWu$y~K@z$BX%SPo3COS~15rhFatP#pWzLA;3nSzA+CZWNeov&j`6!!G^3k{(GGkWG zEEs+~wMv9IG(9Xwihmaf#8Ap&(oSuK(#Dw@kyaYO;I z^F@KpX3v;QZSQ!~)xI0cP^-fawZAz# zi?G%>dBm>hiclKZ*|R<++!{OacLhG3FJv#z()?`kKkik^O6JRm(Ui`=e!>`T({v(8 z28K$zY>R%6W65Hj(U8-t*|Mq_FyC6I=vbugjSYVH>z^w>r`F~TR8}08TYNG~eOh7f zhzrV&`I_x*nT2T;wcI*AP3{(%ZAE|*pg=&FiUg|Dz+ytq7YC#2y!=Am0Tjc>x_)IFWeq)n&YawW*XNml&y*ta+7o=@fOxD3*w0VS&R@JRS77UW zoYM+Qb=HZI$LNc`rQn!^6DJxW7F34#09Vrq1WrlB zfO*6kZJcNAj@mqFip>I=vuO+|zay@Qn0T6yuf1-gypE1CWB#oq%?u7?$;Df&WINm5 zACo?{b}ha78ka{>#fzjJ4UhhyYV)4dNUXcuG2AdqA9hTYQhFn;+@iZWsz%`8EUKB6 zP!)Za5_)za1P3oj~PA(*a#Tb zcyJzaLn#4juHIDIzmq5nkb{TiU7M`)M7eLRs9P_6vd#2Q(@48fUi_Ef&l84~y9O<= z=8qg|bqK(rTs5+ilkvhN#)049Lk*XI9-e)Ls;DT7mWT!>&y)`X+9eZju1Pd9*>i{P z9>9WwW_}qwtRX`2%|R-7fNt^OQzX+01R%fuGsg8@LLG)J7*ZeuHDW>U0SH}9YZ%sq za!AX$@llhhMeo;yhtQd87}q0kC}e@{6n4)#tyD0XCZ)tpa9$bMmh2=wz@uCOPw0zW4gCI7UgPyd$ukbNBcI$PiDP|1aY}Wn_#Gz%P&MH^2}D{#u8m3yH43Nh znSAFT|5;|FdF~E+;Wv0LT-ED_w=>(lw)Y#N4?QUa!O*L${&W(4YbwG*K|&|9-`_Dr z)%S}1kge(n>=d56l?C81AFt=Cj{M z&(Mn*^>bQt+g4$lro*O$@am59JZ4~DNfmgAqvBSWn5X7t>wkkRpMnlAX;Q@+-R_}cI;U9RAgFS?0(0$ zIsNx|wpCkAj3CStMe*ZB2iiIrCkws_=Og&IUA?4Q>4LhV5+FccQ)pH}!AD>&BhI}u zy<@;sR0|~et@h~2uA5vL+RoY`GZvD@uHI{~D*Ze|KSE8yvcLwS|b8QLU@5k2{1d^R_ zZxEf$qu(@+T&H)nPikpeDbM9{C*6^m&3DHi?#9|?@!aL6wC0;$=4Qg;v?LD>IAACJ zjxerXi2oF7*XsptxlUeWv|5%Wqwu!f6Q)SE<^W<&R!dE3`*8^q{XS!xpb1gw^)qNS zsP}RNP-wR6t0?&7A&64ZRPa%3!H1#lmXn5DG^b2*Nlh99ibL4Laj5$smr!&@G`pEa z;yc>kp29nT<-KknTmFeI|Lvr|h$ti%@L(jxMnO5$0x+5@mSmTh8mI?=qRy9Ck&}+EuigeNW8+^#^ z$CE{$oV5gOh)R|EyAs1P8f|@xLbN6~Ihq+h&mQ-)@pIG7Q}PUhqw=WT` zU;Wv)5^vP+ni4ZZj*wvC96SaB{L!jJKB0#_?!k0E7cu=ll0dhZFk7iZqitXP#v8}p zlx4@9QdEExaHsGaH-a|KOTqdzWI`#tB6;@kx7wM_3Uw54r`pGu{ADS zgLu)`rO5ffhL-l|H$~-M5ZXqa4|Gy=yUa@Re#?#XZ5>ZN z?l~6l&|Mhk?`Q!z@BBkl41pc`>pB%+->T{w6-bUv_~V!WKmCduyMUTn5{5ld#n2|6 zji*jBjk-@C+mO0Zc*%?uC01W#yHW$+g-t~3Vr@WRj&>0n;qm4kAmmKb489VX!tN4Qw#w13L% zW+x%I#d{4TdEMi`G)u8@qs{D1hbC5N1#_GF(NnUBIuk$*)U;4y;-^{ivHG|L5msiz zS1Ipq)_T1O^Z%;+boO!5mBK}ojFhCnms^Ixx=1!smE5bO3-f0Rrj_s2D!~scMj>pf zRN#(Y_={hFG1)aA%WB~P3s2(C&6FTa=%PCauD?mz62Gh(;oo0ER(iQ2VvsWKOFd2a zR<75-ghBsDWq~_a>lV$XjR?!$b`(4sDVaE;jRV{?5}L30b0@+I6U>y8J>+KIN`Ukx zSZ?*QNmUosJq)kiPSlch1E?Lxp&i6T@g0voUffm?Fyv*M3`1I6N^c)By6Hht9_bNPulg9xzb zPv7@&EGWW>aa?a0I(#G$2B^pii0`~FH!tdI4p670+p5s&;(k`+V*JhqnN6aGgNpm3 zj~;!HGqm^C-){VM7Gx$Psy1){V(*vqN@1&^G6hVAp(qGh#=ISSNuQd_{RTo5{H~`+ z%T*StV|4OHS=gkh_qzf?dQQS6K}e8-sGtNazGu0Ni>Q@UbTXo=avp#7?_UB-wVpB@ z1c>PdX&uPgT8SLeG;J%N9J@3QQ)Kn5z?0<{K8njLS+pdiN>gs?L|*2wJAQK#`NAtq zts+uL+^xcw=)`@o^ZJZ<5%Dv~BcgF)bmTlkg0}&wGr-q?hL#EG?_)FvuEa!l7Tpf_|8^2@A~M&8MLrnMUQemc5GBI z6)Glh@FqdZ(l7m@I^|v@v!$gU)ibuFz};w%3C_!HkMV?3`S9pgQ!pl5apj%-fEUaZ z1t3Rw#s*`Dl7rm%=c95{MVILxR~x`f=CE`_60JDL3nX-KS)Lhw9=dO}TT>LqlG)=h zBh7_X!3`xNfnKB;ia*BrGbUtI0aL%}FLiy=(=UP5O}lYzD}Dcw-0rIm-u~>AJ)aq_-S6AJLOW^qVugARcyyZY` zUMg~#F`^?_$Tx1x`#9m`lIq~d9&QMUiVn-MVxd|99Uy9VC8l4f>M+iUO zQqA!TjLvpV)>;C}l!_W{^^6@^^WU`iFXt=m^8h#7Q+~P|q~DXyQmX5n{2ze2Kl!w| zoE%RMl3lq`RfG!`@DgCoJmMELTi~nM#EfgnSC&K;lfXksk`b=aYPIc@*xsg**fvVG zj!n_F{|RRW{dXPIC?oz$4GK9y_Gu*#?QiWC(-0aVg96toDa-3?@hGwd32g>7(4wkR zuaP~Xi4v12tpGJq%a7)_%Nh8PQ9YD-!7e%q0X%WMWb>3;CjGxVpF$rxP@e~quObqzh6>dnzwh5ypl32Brrjf{8j@lLVF ztl30f0ofjLRE%GB<{In^C!!tzZ*WjHolZ!@N!vn2Ppilncu`@eTNmCXi1YK^W8asT zp%xsA3LQ!~{3XC;NBijuFZ4fepr-NUs*^<8RUleOHUxaPY@?O>W=pzAa zjbWi{#a7IZ7J*~&t@uNwJVcin;sKV&+GBh4%*gd_Tm$u<-*%zxF#}!wDy&@jin}h| zM!r)6?_fmZ)uidr1cca#kWkbRMCf_yC}Ijf!kNH1;q#5EDNd*fe4t0x@a>uAhHrHl zIW9#IK38?zTGsU@uPSi%IV!Eh(Ypmtt>_J3^2VQgT}Q@7t2Pyz)gQ8k)`L$L=b?W^ z?6yJH7P$Oj(~;q^^7;L%Mt&%wB~WHump_{({Lq@@GBxJ_kN$9CQ0gy z>3r5Xw!DM1SW^8%?1RvLmkkMdyh4 zyP37(8ijw!F~K}$<58ZLl6w&+<~1Wh7XfBOedqW|n}@+#_iYcpT|fjBZ%*6&mNKNk zD4(Xq2aqy_nIk8P4Nm-dXn{`bS`(?7-?r&jhrj&;%07Rx&GKe(T|5 zO{n#|6c(vBk{_~(k6w}I7&kKkv4v1SzsWTSP4xE?l__>wtTrVt&%w3oo7WLcfNFCi zOtCwqP!foqDm_Gq$4DVo^o!k-mV;4rj~Sf zcU!DajGIg6kyHWFI%8x0;(9H6)aQ&Qk29m&ui zRp19ABC0lQt2CPD#_UBLy!PAk?=nTVa1yBGh>@wM(+_Lch)02|XlL_Vx7+wlQ@U5+ z9h(YW0E0cQewM%_-_zN&K2FRK($VWz{S>lD6(^LilmUp-237Kj6*dXno)1c~{3VhL zCjx_8pDDlOE3cH$Ydk+P(rksMCxRq3)*xrds#SD^>$n6ISJhfD*Ac>(4f_>m1WJGzx+GO9WQmHHhEy_^7*kORdSGqr#m` zluaCw@)MAw5(Z$7jVxuLOi#1QP%W`ERYAaq{}C_xIZs9Er=TNn3Ob=UX(I#mNoJT) zQwP^16GRjle;;vi+75c$9TFv95#XF}D@;BD#~CGvt|Sjh!-3zOT`X;*)ZK*&*sI~F;iDR)_NR2vNdFaK$6q3B#{TS zF}UY8397U`o)v)Pv^5!h{%~S9q>>1PV0|_qIy`v~F zHkN-Ynii%Q#P{%s8ng05*ZdoAM?Q_pBH`vZIqw&x2HS2uOIJu(ZJNG`&(=kLd$#Tj zK7NOjL!Vkts}~em@ONBkn9mr>6~8zdU^UPpqN!_<_1(Jg-D-|tRE!~`&7u>Jp1F{G zjL}CDK^?P}2n)e(ZKY^WG4{(hY8lUlAlTIwPc`CW%>U)hr;79V0T5_rcHwE*Dn;Ip z=Xv_5c*eqycXd+P25O3E+nx;6xW7%^*P3AA+NhljJo9RGqPrh8QqCW$5bl*2}?qO~~-J0r$$` zRoZwW>ydy(tG6&>Ne~FNhr+g_-oGE5A zq@l}mTf(=RHBEVJ_LVF6A#YQ<3(s&ki!hu-_*-!3$ElIb`-zcpN_7b2ysW`3u36q< zBS1|qTvCgBM=)OP2J$uF*iixHk4$}R37RdWsHLfBaLNq2KK)>>qkfr&hg_;DH)Gr( z%wd}FNI@b%4KWwZ85(i&O=`+oIZZOXW}vsbGmy6 z?2j6`YMC{BLNjcY%}rQ_-H~< z0}@+Wf^=7oeBIAIvxrWGtWwx}{#mtoQpHTvt~C?xLo;>tf~IbGtO(W*@C%;FWFy2R zG6EWn;yjrV?ItFF$VKXMd!MKOTo((D3)Vv0#(j|pOuNimDr9*6$}BGA8_S}yi{(}g zK=R3)Cj>wuZwm((>rsY;(OgwZSoi78$Acl?wLi4Hicmm%rkjHFJbTZ%<$3S7kKDGh$o#mz zw_qrF5HfL1WTUNpG?P-@nwM+)h3~f<+vijZl|~C$#p6j*e{DW*nowNg&I-y(8_(v1cPIWm8z@M?b z7-&Nck<~wFWNb*KZY~ob+14@qLgH*iC`?9anVR`ynQ?7fQqKIY|rD>0hB zKY3b@c&iqRrCmQQ#7Pc+T)3j3Mfc=Zx{@YfGg8`gb|tPoJ6`3BU;s_Ea8?^t^bsHC zl=Z6!1uYfjdlH(C5G4#Z&*Ad-?A4nW}Mt2*H4wDJE>(dc5fHcBvtAfAqrs-Lg_=hnNYs~Q65E0O& ze-tAS987Pn!Er?DYg9-BlXjGph^f+)E4jOI_uxW9ee$oX75x`EB6Hh zPU#57E=LcYeBKmvsw413>B?`6hdi2_!-wM|)`b4-m?|Qpr}Y+RJcWUS^X+0prtT7NBW$}i(lA{U-G8*)S_3Pn%hFiuCtQJsoX@?~g7bAu3&Sm{Kg zH8cfOJ!f|ORE}uXk;bfXfX*bzNU(%jNkexf5lqQl1Y%(|NJr>4v=)bLH1#Y`nW%#s z(8C&`D8go;pe`+~kxq=ly5j*)CLGPT0av7|tSl2Fr6j@rjGhk3I5hD)<8|a4aSl}Y zZG4*f2d%(uRshDWaP4Emm6gJe`$l~g!1-c6psLPlRo5pS}=3PtsG)2?5x;TckYVyzJ&FIl1DH%=Kb)sl=&w<;jHnNdF0iXlY&MN@^r z`*Bwze_*h>Iy4pXU)FePzxXOo5GWNi3rsi*K;;lAZeb8Yn;l9J=&=~4@3ul|Aelte zOMrXKY_)SY_;qrZO()Z(6-Xl|5rKkLkS~mz1@BI^TPhi4~&;oqU^8QuT z=!Vicm;<`rw9;9y>E)4>;$Ziy8$rAxW#SI-Os*|{ zVY2TnF9E0P*^upT!{SZsAc zU6$edd!2IHyrpP+#xks_CRN1x&V`{~{Yl!6;$YdOIKM)!5!WWP`R=WNeGL?mYZt+k za8nG`30^01O^z0JBbYqkCPxW)YFESu_9qo?tMgVtO*}e1xyow@VMsOi>@cG zcX2^h@!`Ab80yOy^$+T4pNzUtcw*dX!p%^GY-+PFT4F5v_}NPSg0c|P8CW~U-mqVr z!_orkHh5319bb^$WQNDAaenGP-AhMoZ*chDVCD8%wJD>H+Zb5p9N{Yv4=cPRWX6VM zpIuMZR*J$INyR)?_0+EejY1L)_d5d zBVlHpuLhsc|bY@KS1*oH03joje7YwOJih z&W=W%xa#**oScY-F-;xvjkT)h0Fsu8WME93 zoB3G%8?T9fm44U91VtbpApR}wq2lvOPpsCN1 zC7?vfW(axYb3H`zT1&pIib9KuB|7A(zL08@WMj4&{ngWl6F0tcYXK~9P{1axKm~>k zFd6K?ckh;@;i6ng>!Rk{$4>0Mlm_#MXpGg;F#Ru5unJ&EAnn5Q-EGaeaeh~e@3mn| zy6|7ST1P&Yk2q$=c@obksj=C(-%`^s?B=MN;Zw5UZEvHtWT^H>C+jb3OZo%rYO=Jc zgMoqAc$5x!-0-v!*=@gg|FUD;YiD36vJRS|Y3x68hon>YJ2brkC~J-(u%y^G_DP6a zMM%~?V!M=#wK<@I7j<6pHW{%q?1u08PEpZ6WVq#HB0n=040nWVz$*i7QJMlHoZ_E5 zt*l$U9l8G315Ip>c2XVGHRkb=v+PKJ`AIWEU`{#I0$7^I^Ta0@=O&v9Cn7FxG9VIb z;Keq(Hd99X!&Y1hp*AzV$XlMY53#=#7sX!RS8+ICf@?2 zB3eA`qJ?U^#v02y23`QtMTkmLxa#`3V$2rI+_#sXzjM*tB8nyt9bd|?@1ET_NIYfG zlO9z{IYRKhpr25{k!8*`?4K9a-*5ERy#B?jq^k(XehSdgD32ng?rs!|U5XHTNW6l@_mw zClF%cZs9#}PNC%*=A*v}m zdwEeS*^~KJzp;OFhEs7Sy2zEGh!2+$8a^Rb=VKyvQOkm(|jppNC9X zXltEvrGMvy3zx5laUVsYqtIrH>GfR~=JeY7!@(Ob3hiixC53Vm(?!PZR*}gxD{dCk z^H3{Z-uUuf@<2V{vjM5M&40Xp7VA%>*c?XQ*0M#Y6}++rha2nIKRXUcgx7qJXR zG4gOJHrI|@n`A$|SOM{N8WpRh6qTQ=YEPo@5Kj6_Wh=hb>qJF$O;dJ9_L7j)LxnuY%v9Y|(EvjS0tK`I;3bUccG-$mCgi z2W|H%`q>z~;Pff#%z5?jRcR2%Uq~3Jkb~qqVvt4T3(7o2=nEc=bV=yvwaZGZl2NQ7 zNU`F#K*mkp2y1BBIKLDis;Gx>f1)fbIx+`x%F@UzlgJ^ZJ;R10Pu3$-FLA9E3mff@ z!y&BUF`mQ~R|w^GSt!vprdE}$p5g+M5p^S_#NI7P)Svaj%$=0v!rTHwOW=1+^rk9t zS}XLO)g=KF$1B@n26CgqvkpHlBNTp2{5tp9QASV3NDQ%`_5aW!T+>~rRXycK38bRI zZ^%(T=M9$H6-4`RUZ9eH5`1qy*ZnmOo8h{-JO;+t1K*jZ3CI8DV$a2sJ{A^ zsEyFO{ovuFbo>}S*)j}#3Y-Grp2^Q;@WwQlb+7-SperZljND^L`?htzX&R{0X-v`h zPj|8@OcwQ63=)d2ve)JBtRkF=pGlJn0_JiK`uR=P zjmg$#xdec()WR5%35M?a@MBlXj=Omr<2T}wQVZ^@GW5Ueq4&H2MD$)piy$&1QIt!4(%s>FA>_}9Fwy|5^K zmtnpLZhfBVkjj4Hd4gEOnVZpNqi2H?7*pW(Mu8-SQz&tZ2&Slfxd5QYSPJ@Me%_*K znE0?+>@3}oe*8yQG)~_PuF#C?k0TN$|NkUYzRyje%MMNRFFxT3LETreoG<2tpj0#Y zVK*pt$`ascl0+%%l%C@VN5dIm{CS`GM6Av%Nn(6Qk@*GVDl>7Mhd9gY zGK-IVI&S^L#)Hqng3Bw%He8lZf7|A=v}ovtLWMd`^yP~y2%!Q;2!k=)v1KtN5>MPo zH$}xg*{$55BO-cvsXQ=-13+RTN&!-&ZZiR8AgoOe~kOi`E$>qz=Wd>eVn_Oc`4}B$20Ob1w9Jz;>D08U89xvEpyPro2x@+g_xu|HX@# z4vra~bztxz!Yw}G1RM$mI6IhF8$A}+Rs8%4;Bi)>>9F13y4Q%?ZZAH0ARaY<-8Jf> z`SrU``QHSL)ZJ^LW#_Qqx+zv9S+3O6E@B0P{gGJ zB;!Ws(LnApg0z(cMg?R*{bb~_iNiGc6`OC~9~BW*^6v@xc?Q{=7uL5YOg&II`iTz) zWip=gmL92BBay@vn6I9Fkvl{NUaL*Zq5&!ZVXm?zjFZPKjO?1pyh1kLzv|gwH4r_Vap8Nt;UA|E z!uJCiXTaxkTvF%@;@+Sr#-!o=3e(gXm;gUn8S)>_hC`P8IydMN0Sf2+H>`)>_0wdT zQa@uamCv)-gaUqV0ZM&s*?HNLV1A!v_#I%c^SfR8J1Mmka*bzT<>|TzROF=@QeXcW zmO&`*t4KCAM02fzYowwghQRzvaDAqdP=JCKHm4)p;JQtiA{y)GSSo@ zQK3(K;d7H!--P#H;RZ32A=vdV^3jk^@W-+MOJyXgw^#BW1w*2b=|jRM{s9)dy${Y> z#%gZyd?@U^$inj4+~OvCGfL=?_*@X0%rtQiFH_Vum&Q4lzec6YmjX*56OeFyKc4ej z4dH* z=<|gL4Tf*x$>rdQShDhtlx5{`>18sPVL({KbN5o^$q(K%l(Ahe>wf_1}`C z149?8E2Po=-2A71pHAXwk8m_3_%#1e?M|_M7Ap)!t-KDD_&#tF{l%L6p;KV6llKeJ zKzz1lX&XTV;R}-@>fev)pGK7RN4G1nKyHJlQl391vwu_sgH+mncKqFdS%H52$K=b4 z&6SfC;mgfWQT+V-z;#-j*EIJQ?8A4)^q$`WsM{9zuL!h+(QjaY{A_Ag^c1qusXUNkdHXz93u z%S{?DVm1?18PhTj-3~bsUwoY%UeTfhQJB8JU%iQFW6Wcxw;LEO1{S_s4-TeI z56*RVPm)WJou_0a9cZg2a*B~UZ#Y1Bk*43b&CeU}bbi*qmr8#DoW>a^_LlE5S5~W= zHI*HPU@G*aGns>vY5FKokSSSSSZP4obamw;(D`t&VjX%Z7*S^iU{wYa64;5P;#xRX zrQvGQXxICjtf1T7Y^MsNU_42<5LdEn^kekn(=Ms9!u~A|53G9fDFXj;qd`M?|(i~RE=~uoGx=CdD1SlOe0Dsf=L1(ElbAIBP zoY;mTkY{W^rnaoZuOS<%6;U<2im-m;D`ctQuwaumV^^NKaqfI&RcNV)0p&5Glp0tGZ71$2!scsJ(0i1OR+#Hh!M}n z5>hFmg#|8{cb`(PK~eD7q*dZF#$~9pq8QyS_$F#qk#Tivc{KH?7c<$p zRuFK)RH=-3<((p>b%iZ*W9Ee}1?LexsyAqvC#8LzMeoObjP-l-?r&q^8rC>u+>9#e%RB|C4*{H5?a9hn=enWNk3D@?csZ$iUdc=g zcOwugMkAIV_X^V(WoYqAJU=`^6=xpzJy<9>M;0gw8ohH=wHf#nj2Yam9r35U&K
62wXtwL9;ibwnxigWPJ&Zui_JktdEJAsy#%3v?cCrGZ(GfBHD;8429*h1~-{HXT37!m92fz$4QH2rM)Fqmu~TzOXn zHh?K*A^GUz`W4uAfNBFx4mTYUs|}2~HCK73MZlKz3|1+ovYF+1FfHI-I$~@He_3lw zb)@OiB~Q6?ZXV@vhLzX;j4TzsVzT29c7Q13jiD+L>{=jY_!?T66w6@sTq`q3_4eNv zv>wrrhT#e#>PAC8oPd7jW^y=0Uk8^^2%yq}uzLEQ@FDcZ=@3F+N^R{a1q3<_|Jvz= zMrO}l2s-1@2)HWxp3GaQKlgwAv0L9V!WwNDarE1Hn8j$U3y!mnt8&0{Z?^4^$5`iw` z%-wdX9*FU>KF-mZI%Tmy2Bso?TSHBpfZ$fmSihuBt@HOAeF=s$2id^&dh=FCcf-wh zd3R_20we$06e9|bD@JdbG2624-frOqCTjoL*~0xDXNXef*!~CPLLQYBl+EUp&E4zX ztKGMsoikCF7OGu>JAeLbp`-S1h;8NchPAqkw$`545bF6+Z#UffY-jS zHoe>7PrSp-ZVk)8AH-1q{a1y#7>Q8fJzqNiZgkLeEWK|C5LqKj*3d3i4kpOl8+0k@ z626aFwymt-vg7n(uc5j)*3~{6kVs2>aH~lyIGZ_7sm2owNjgU-i_XSkkv6Sa%Oc1t ziU&k+Zxf{D@e0rtvz5K|_*3FACLxcBbl^p!D=z=+K$xJWqNPYgzsLl8ysBhf!D3U& z^B2^~uI%F%*$5s`MXq(k*FMHn(8(##DW#s?=c3|FlOp#y9YPhGx83E z@eYWzs4ezf)Ve%bYrGHzqg}MVZh{~w{i@Ehj*@b^`<=YKllJHNc@q>&Knl>?Ot_ZJ zL&g8e)lvnDA0nt0?j)tk9#tjMRFChoFYC3MdJKIs-^XJuf92+Aj!cbdR9C^F}zmp)Ktd$|7ze&{F(ma zIPNS*3YpkC+{qQP$x$JjqsZLHNTV9F(MEo{$d$(A963rGS&n70F^h8Mn#?tsDPI;YWQ;ban&?H3?NyldI)YS!S*oa!JtY=<4dOtPIn13IJlP3+ZS65YL(g3kjz8 z^QK+<3sMUdL#+@(0Y9V4u02(z_2{6&+^eh`CtUprL{@v01XsJcVEw-c!8l?ep3D+V zO&GLCi&wrQf-S?IcC0o6Y=*0rLIp`3ooCTNQ!?bP7aBE1;F)36TV8V3QpsduXV6uF z?D%|AnBR#0XgTod_k^@zgis#ErjZF3#48Zt6@1cor+0^#QWR3HZC6<03Fw~F&0KZ? z58p4VPvUr&4}pRt3nI@yl1m5r?y61zQmM1q(CV49pA&`_ASSR6Jj92X_Z1ft1{(T( zmz<;bu`987+>RB^&~;65+3T{kv(ga*Q-YVb5EHZl@y^@44ybUHBM!x?G(eLALO-Hw zJNOmLkF8bXV8hOIkZa@gP0Q!zPX|&N{-3gxGe(?Vj(inY?-c*YgIJKM^)Ic9w*o%1 z^$0rxHe+EoKizroSMdb{>JMDxK~uZLMA`Z8Rd=HfzK9kUk$aQ7TnpJbx2r}1%tjy= zZF9Ue&*ZmWr2&TTC0V_u9+CScLR~JER7bDmuuGQwR73<%zPYN4@7X!tY zf9|mgV|@hO!?lNr>*YTt5Z$C}^kf$EkabOZ?~67@niP+pMR3E7ObzsA#*wFyI-TdV z()-NN8Ghq5o(*ZP{q{b7LB83CkV5ARJs9gtpJf5f4@Kgb28*5YTfb}8uW%A+2(Cd)>oIz8vgSa zS5Uw&H(2z=oyt%P(Hc~>=b0d%&YGi3F0ZngJth)={A|vvoODTt%auEQGz4}34j?og zQ#L!Fesj#0$18ijryKCj^j!#Y4^y+0C>OcSRnyQb->&xFk+&^lFtrW-%fq!}`%N^3 z^XB4vN%r7Vt7qkfd${1Rym(Bw?5S&}*Y^z2!|o;N&nLyiZL$v@PlCOUd>!~Uws#c5 z97wO~xsDC-d6v1;RSkElJ9OicjzW|>SeMk)ZQiA`u6Hc6((-%poO7?IYavUor06@E z^G{!6x-;g@Pg@Zr>j%2SjNDJzwh&-EO1@a{O_Fm|){)SW`>JnP(gq5DT3Eg)Ch+oy z->tO^>4f=^yeKd>1n;klTz5O|mixlnjT0%BA5^)Uuk*wT(dS$Q)d#IJC!rxCS4Uu4 z4C;YlZu!S&PM< z%ojl!jhK%Ln9i0%1p%`jva+Be6!D1*)dTt``Jp>N_MDt5;GA@umP7W)Q8nOrAid$( zy_zpW60Y%K;d^Dsk5#wZY||t?82f8-{{O|8CW3|GG3ctwE>p`KM=f zm)BM+ivoX@ToJtQyGX?ffYoJ9;ap3N9>a6~;k^U@C7!@kRaG5mQ9A-y298!@ z`p@(A3S`QqB>nWwJ^H)#g1x4M`4aZ@ZR2{;hsx66%_CmX#z0~;SS7g0lKjjA^UQM! zM4IbE&CEK}QHn4+vhKpPF_dYXJoQ=pl$iR2hnwm2T#Y~-@sRW|3MiF98Oi_NV`%Xr zH|~3=k!sDj$Rl5oTKv8@MODlGWQyAfyylyUy!wi(w2*~sXDlAL-`@xWOpOoD-IiV6 zuh(Z6IDUEGOZ;!m-L$Lig=tlZ?x*A{yp_43_b5gEwlep-UrL>tx^?SLVHbbubZ0Hq zX&(*rF2B5gb8imO1DHj8Tlsc=GF_uL)e_JdcJ4#}rb$)e-Mqn2h1W3B;)W_vdT&^| zbce-@jx}sp(niPWGqHPiYm^Pg)DzCcE_^$y>(77pZ>!prh`vdY1*Vq$^zCv^x-b{x@@AJr6uv&B$xp9*`DBg#s$0s6A{yVl}LC>mfd?RDbX=0Er?QG_e!+t!i zl91-Ua4sI{(c#c*R=e0jkbKPm`@EIytuDN`@qgGQNJIlE=WPXsV3?XlhQvP zg_Fl3Z?kikG!q{qe9}r2o#mu9-UOIx#5tYf_3JmveNYL1ekrKo`AJvY9c6zafezIU zoYtb-I4+}qrf3@4Ryu8$$9dOlJ=jr{3?ZQ&+~K=&h>W-Y485^D#*5fOqz85g zHrx5%R=Ck%Cd4q`#20Fym7@}a8303RxK{Alrv4URXbU_gtDoD#v0s50qlba@)g4p4 zbZAjE+4@65RxcD#-Or$?K_B{JR0vke{+1g2kPZLYKV@_x)^Av2j!yy;SqZwD<-nZN z@X>yldY#dq8hyClNRh$eKcKd(lsujI)ZUxJ8s~iF;%WJ{Jqe}qcC7l1cE+JpS4@M~ zWTq!9$nnr-_hPJ#wrarI@R*M>El&?ePdI%yMtydRzJ|B`u6zt3$8~F@Cx_$ z+kZKJNPbeHdZ&t9mJ!tNUGBO4%*mRDbG{j}T1E0+!n{Bq$n&?Sadk6c4XK=9>h$Qk zwnC+A-mX1u+7h3pwe2I2dAAgmvy~4vm5sa{m(E#sn#Rp8Hd> z+1vHVz2(WT*fTD7|89&<9;>&IF5UgBIK1=fZ*cM#XLflr>hIDg4b(}XMS*lr@--0P zD&g70s-g8;0QyHu&9-dgV`Xu!?t~9J5AUuyab@6$rc-*Bl)M#N>xJ*yFAd}6uNSue zUiJliU7H9!_#gAx`<&em=ARGc8B?uJ4<&mVv6ptvS`dzy_^z#Y z+dfeDr+!x5SF-%y;GsiWgfB-~7G23w5Qy+cUflcf8}2WD@2oqVsCePA;Me1UXM>C6 zzsWZ*^Lh$yT&_F;wmB&Or8Rf>b$90Pb0gpTX(sLeB<)HbTdu!es(nv*338?nq@S?4EQ+VR`t%Qjntk2FVNGC)az&J%QsKRbdvX+uupcs@Be)#kmMx&brb*f^4Iu% zH)z`%v&=1t?vyi*p}cR`zJHDG$n4y2;ACm=>HUEEn^5b{Oxs38#*^Jo1CZ0LLX#N- z?Xi=e@9xU0>d4u-zro!~0RSA>z4*0q^{){-iU5~-RG(|5hXB{(yY{X_v>3=tV z+8mK4WF0?8s6v-m`?a5&KbuzC<<+}AFWsl;+5R~Em)c`)+pY8delFH4NVeG?+Z^KQ z5T~8F`or<|)}z!1w|wuf|IGM{`Z^ek^rocVVn7e?yXSP4V zyQKffPZSov{7mSSc7~A-6#hhPcJ9grjXo z1v7In31PPDTdq=>kzDf3RXOEjsQE9dUI586igydhDQ&ioq>Ptl<4(x#IZN`i1W14H z-&9i(X~FGtD$@PNx?{h2tGOsi&v#eFn@ftWX&OEKeS&gb8)|E4FPO`hc>Cb1F#>>ef=d~t19$A{wwPKcNRPj zd?Q@>+v#JhCvEmhxeEnxV-Q?jvn&d|vv>}dHjQ!{kD*O3rq~T4{ z7rm;3q3;v%jt>8VXhUdD($(cEi6||Z9|r;rJSP@kmts}FS!9uUTgQh>h= z{)SV-kNjDjQl4Lk%+kKgje1DfwR`2i3AOExON$~McEhvDGNWy+9H zH|YL}2ufL%y2fE(bQQ?GWF5NamC*iB)!tmBJ9Yo|`UrF1xViamq#|h`&S3evsoIpx z=;pqd=U(PM1^SmyBv|7`&q#dGziNL2M~SL2t5S$C2r z1|;`cD}Ltqg(cGSQf^(G5MLk@w64iP0kQ|qaW--NZ^2K2XNRk;u4>M5y$^4k_-6lf z1^4##e<$2_RrE2Ajvi-i4G29!<)?+#Uv~Tw1G`@u#`>FcDg4Pe;v?ZJeZt%*1ew=ezrx8m*nxbLL~SMopecT46Q zK{qX5?u$Y^if|h-r(Rb)t@#Q66=jIzo{^*wxqHc3r2$TzgyYag5Y7b8bVsg=o)CLML>Ui$t= zD=XH>Fil=Ab1dr~R$1m~T(8WWo=jqfjCsED2|5%Y2T)scH9A)Xbr^8c3FU2WwcgSj^Vz{hfZb%5^6g^2enS2>}OM?ZtKZ_uAY^O z*1C1^PM(h%RL(h|WN7h%!m-y$TDyv@Llc*kUVGk4?)&pa```6n(8Dj-pP#+3`drZb z2Xf*e@yx=f!@iyy|MG6!Eyi5;*mu3XE+r@>1+C?{puT!c%Z~Mx< zqB+lS<{R~ut5@^7+$qJ8mIm+qmThAsKW_$;YUHb-9iDFNq!4EclFIfjJ`_}($w2E& zK~N(W4V)_<51*n;H{vH9zIlFoRzz!ZkSNM`s?{GDG>3;e1G5#jbrOF5x=Zwn2p;oJ zk#=-A^qqTcL;Dju;q@~iZ$)?D>+k*^)He!%v6X@EHNWGZWwd|hL26C!SaNzAB5p-5#?45rNXSWe zCBI%+%N=PAo7pOnUCBk1+>tKyz2xXFp>)VWVpUM}?OMfS#`ROQMJoyKA}h5T!(+Q% za&mrFL07AI!?kAYffzZ^n1X_eqod0nXM%a}a3+;D} zG`~}o5kMX4tQ8bWon~Eh1>-LpjdHQuo;d}n?M7mlKZkI1FcXz8!{c$zrYa8 zQ2^aMYa=2|NT=4Ts6e4`x^pmBgGk_dV~~YFPb?IKG%_@8g6354;{^iQoJe9`c0>eL zY$cuptA&t8SNgV%Y1ct|ePhtuJOq8CC@u(LhBOko7%dO0 zz`eugSXGn3#^r2Awq4!x5DjW}BvZ(W@iUC@MNLCQEH6ohT1+S}oJ=Rv#M88pQs$Be zZkcL^AdHcW;V`4~ur3210FVMBo1|#^*#j4D+GXtGA1rwLWsaw3{E3w7@$@R zazH+UfT3!Bf>{$FL?k4(8Z7{yu*;rfXz(~8&ajH&9HK$YhOt29BuE;9D7Z|>%W4S# z2G)Tm*=$%gZdOD@ zms#KgJi>{i(Z=dDF+V;#zp@WYx@oOg6yw)}%c=)x^dbix)vY27xbWc`Dv=eEX5|~m zB~vt-11+LWBAkP5So8>ZkkJ^xVMWgE+2F^)K2wl1b(NVL@cmxFw=Cb zj(t{iXs|bb{v;^p$lZTRIWb?gbo;%!e{752)q3Q5Ez#xu`$9Pj$ruRtE&d_UU8k_? ztvT09@0rcEOUwHM|7Db%<)6PNJFd#@{N-G-BW`s)qbu&eTUlAzu*WqJR-s$=>Dht` zc6inxxfhue6Df zIHZgf&kY;^(gKW2tb+9pjWU45Y(EZw^Ms9{xPUn@9(W)vkV<(-L5PVPom@@)*W;2FKEZ|IS0yYK-R$meKlM8(@&`K_1 zrhqP(HiZUQ;o;j%af@g;*$RzdIZ}`oRW0iMEiTsXijFXP_SW%~P7j{tqcPr79B=h6 z3y<^#mvqDV#lxlbE5ym!gSP#(mj%*L3Xl#}3sw-d z>cWvCS|FjH-Ns^!1iL9>5yH4W^=t$^0N>!J;?@$(VhnUOUA#QUv+=12Z;r`;$4w2j z`(l0ADy@o@;Nml2wSo9Ho4l=~iUGhgzU31=z#{6sujO za9cm~9FfpZeZ|AX^NT8|kODe28zd6~>|^6>SRYOVA@eIH;dG1!zST$sv3Mk{ZrIP4 zuQC^fX@KK# zX5uM8VGKgVRHeH$`TF6}H5AlTVgk?x8(_4orVj*@C)I`HU=G$-)jt~!PeV4KEYam` z)+jJ_35>@hmLrgmett1$G};3}D`(fnW5u48RAAbR#rc%`gTHfFV&; z1FC<9t6r$)2N3knOoDKu?`eJ`!KNBqCP9G$4;6$Ig+OVr3oVhV#4I|i)wq~LuQTko z#j2rkPug3|g?L9M3p0YeOVnT(%+PNOn4;{E;-Q?tsaFA3aKaKqh~VP`bVRg@s44K@PqDx;yT*c!N{G zTm-*@p&V>_F#%s<5;!#^q+vK5n#SZvpeZ>UN+wt7!pLJVD!&}p5^W+&o$c=@f{-)} z3tefl6iDUPSM;<~>&pSKnLIkfpuQX%U~Qd<0WM-OR4xNJC(14_;-E|PwCIglx@9)B z5Ym#hF{zRd%)z1&v?A12cuWkR|00$5lSD{kv&8CkQ!@l`f*Qs0q7=R~#) zIkln7v}O+`D&e*U4yMS8WutbmR?`We$|sfB7_o_84sIck7N9dFm=g#B`6S4aUaO5S zvY>Pv$Ma+%94JWe=d?di{o!O$=`(+MbS zxxS1wtkF=CXxEP5wz}q`__X%|V-i^)2(V$bLRi2;G@Cldv&b1r5wDm;XwVy?#cSD! zDGqgNn}`-{O^$L zup5hlN3dEgKqXs=2t^OJ3h;C>#Ye!}qOA?vFgRK<%P(G-$_=t%;Y7J1Ww#Ay4<>AN z=*<+R$aMafoUE%>O3;_2)<(Ylmb6f-uLJ+ZltoE3g<^EhTl{~`D>W#KS zZ4p)@ZV-!_W3n7$eAtLIxGx5TXvEP2IRZK@qOa|n+z7O0$HrBt(qdhcpPzy z`*Qq-E|7`j>L=}UeKe9FgBF8nGnkp18^;htbBVzVt>7VU3tCe!ig;vRniB~IE*VZH zUSeS}02V8d2{9rfLb(Mw{$?ohVmWm06pm-Z;ItHpP1HpK0jnXna=M-*nig>xLtMh! zU?EmK#o6*cU{_420A7!46if-GSoI|)7ItjJoF$S~UpySgCn*ih(PI2CoXa(*Z74EP4#aUr7vQa0Hi$P{45M6i%Q76jEDFEEmO0 zTa8vQ92~?CM?H0xg1B^TXAPqu4#jA{k#?1+_dmGS5iZ?!x zrWQFh;f%)PgDqMiklIA!5<5#<3MvaP4oh3FdOB!b>J&gH1jWwoI9(qeqoaI?nj z7BNjc=@o&+m*ep9EKuBeZ?EXK;MBQg;;<~Po!47m65qUtkSnZ}X7*D_Dn9tt|A0musOQgn+7 zQtYPoIrOHNTAtqXcs=Y+Lan#j1$IwKx`ks3)JZPm*t_zvrvT*df$HL7QrBZ?39Snb z$hs)DcVN45eF5iEP(b5N{$o|rr}b&2G6tz19ok#cFteMq@3d1b=3%#Y2{C4bWku|} zVdb0=4%gGQ%jm-CxH}dC)f8NJJ>O||zE|#5n4)25%;uSUNl)*!2LBvuiOtBpguNSh z-iZ0Fnx%9F**rVz;IM`{cN=r}-1&OBEdg^HXTl;Tn>ckxpS1SyQOxnK+LG=cUG!!X z6-BT8*-;~kMopPs+S%rY^ti&K7m%;diNCXyl@gx3>e6=puQ>3tmV``0peZHdB|D;; ze~MUWI(ojnd&S+mh+R_!&>GgTt1s%=|7vhFXVkN4Z!S6Z22*j@HkE5uI)Vp2do&xz zhNZK3Pt2kcC}`7rAG?0t`}iVZ_MK?Co*T@#pYQy${z}Nvxf!hiW`+3;$-JQAV|ZG4 ze$wD0Lv6Jy4-H7buXz8LsfS(^o=pWGa8Y+3KWoz1QT737^`?q6-Mz;wmxt0tfBf_M zaO_=#)u##ig(rTGdX&(2F4sHBNTlGa(L>j_rB)2bToI8TzuJ3b2Xj9AH7}Nj-$=SM zf4y?5OKt6H!sgZW(F{uc+ag16xb)v+C+{T8Cm!Cp_&VT_fAdXmE)yOGYc1Q8A>s!H z-PrTy#({#yYsuf<@4Y|S`qJx8?)Rw}4Rx7RNA1*u9o-QpC4O265gXXJ{-R3{%R*2A z>Y?GLjHkLhZQ#0kv>+kpxj;cezsp=T3p)RFetX6`=n}<@7aAmg^S@h$&!iXIZfSQX zDca|@Jy9!|JvVh{;P=%eeXT1o&o@j_&FOn6m|F6=SMAHR9`L#I*vk@%ax_8Cuc~(?lO>yz6+PR6npD-pnZ&iNYPXBa#zH`S$=VF6|)9kU* zp1R6oV<-iv?OD)W6tm!6>IjYLAF6q7^7eS%?hyIN<~)cM-@A<%`E3!Wx*vB6wD4f? zit65`LjtO+-ujnKM27|P>X6p)*H$B||5;7?%zx+Jj2hN*8k%pdKa<>qL@H{ANL1?{ zx_rTBIeBw@^E1lL|4P3*M^+@9z>pR*&o@73#3tyW_eLKo%y&d!G;10fI*_tQN#5i1 zJVxhu`{|_gY1I0?G~DiAJTIi%s+BYKKA{}S1WuDFvimTy8&YjhLW?tuUmci)OIZdg?RJ9#DeR} zBpZeJwCPlGu4h#{Bx~zTGZKc;J!a7CDS`zFdAWL5isA zXi!Fv=d7+3Y&)u|$dxo8-MpBQ|0CO=$3CnI=YAZxd_QjCjCz|%KHG~fLw4LTlIlUB zAA#w_U6^xwsI2+8^YI?5d*@X}cc*t;nnXRZ+j$pUE}Leo4V-zZq8o16pwDzV|MX~1 z(c8C_7)J;cjm6jec|KKD-K%=9nDC2UkGu(oNQT9Eha{wct_+50 zQ!9ApS774{?kUYzD<4NcBPv?2zFPhUHFWY-{z$t6iFxFtZ1jD;54(>x5*%(6kkJu+ zL(n$-o{oOkUQsu35+sLbJl5d!MhfnP?54#_E4YbQOiDrK_ zH|e{V!ftm3pm&Po7sQjPb?#T0jrdlIUD6-gOmtsj6CzA!_4P;ZVNFc(F|mY_gVSo} zDy>aI3eooU4h3{dC1Zder2bCI9Q+pLgDiw>@^~Vb)TFa%AL%bH{jgVM*hwi3;BqQGt$E1Ur^LEn#uDbpE@) z=fatgT>Ej{qn_#&udnD%o2}7nn*q?s-W0>igAR_bmD^X-v2EMd9!t0@J=)gPLK*<; z7s(ldQ@+rVrB5sj)s63p+x;Llhb~$%Qr+MHCk@LKn2LJkuJR+O_ouWQYP6hydo(jY z^q67B11c$o;%JU0sNB+|T6yh}Rs7*d%*)4m%{?!*t?n8WE4e9M`gE^z@T$cAj#6)R zrl|&%bf@L~t8EJET{gwN{9@l*s-gfjcr#j7)}*B;@Ri(jOkV@NJ?B3O&C1q<@`D}> zA{UcGfyPLxzz=;a0ZtKvibvFjBg37^Y zMvx>+pL3g6(oZ2|YxE~98K8PRSVu)$uKyG|`0KW6{igj*PrHvHo16ss-G}s4zu6*a zU+wq$EnYos>Uh&xN~Xj_e*1Ix^tQ3w{qF1l)cHQiIa*!cAyW@F>ZTH9lHYga{`QzH?iRFqTG zALzm5QNe)#>4BMh64tWMWdab=U#{~6+drJh`g^c-=pQ!f`w43?@`Kdk*{0i2QT$Ks zZwFDkwv-mC5+q;>K=t&S@Pf2w?^<1Euexl{snOQb)z@`U)vMWc=@lI;d1!5aMw!pl zy85lhDTJht>>svsJ4b9xcT_dahd%#Y)>oack~?~idrL>c<>kio!<)6`0Kd=w5}VQe zKU(05RWYEGTT-Lznn4m;;zbL9-r%uvhZEb&wBS3@+rv*Tu&?ZFV9Pz)mVKtC>A}Lr zk;+O(WytL8wpryW#Icwu136&5sEM#$p7>BfyP(R{ELP&OxrDO>(NNZi>*`gM`$ERq z{@yM%_ztT&Wwm_g^b8fHolNBESxi$$KdC&{x5VH;4#&scT?ZH>b5r8*#?y-jU8@bX zsrh>Qy_J=1Z3et{nn_3nlu03o7mbS-P0d#8Q15$IP6~hBnn#>m9qHt4`}K_ZLU?KQ zbII%sX#cmDMu+#kI}&;@biAuFVl%HJU+U=*8j};w(^>o_^aV~XW99Z z_Wx>=OAs3$Uu1kXi~F=ZTotmmy^_VN?>l-h=1K0Ah^d*52{I1tliXVTC&)v0MpKm7 zyZh*Q!F1f-BOiZRefbo@e0qM$i`P1w`|;uJ7oL!{MNdinlfg4+lGSd|8#m8=_V%|ud&a-=hq3-^!yLc&H?6Z^{nyUz z{q@v1{i<%!rsPJ_m6D`8N>AsHU%s2!*%kY`&;D%YyN2cCA^G1kRz*VLs|c6A<jdl63w0{o4AH@@E@!m)G|kA02&Wrv3O{y|~8y=OroLt6S}R zWQS9<{N7!9QkELvTae-@BXjseq|P?}sJX<+gaXn$2UOo;TdmxD{U>QGld@8>0! z@AQu6%Ghu^A<=e-ks=ApCL_t~w2{*)yU$CJKj18t=g^B1nbXQ%?-!gs`lH>c9otRy z;9x)EYYn7T(%@OSpI#@BRF_|8;G zes?+gX)CCHAnJ)q-rJ7jOOH2fug^zqB)|9vN-lZ!^FVBj#^!p5^3I+eh#MjkmeN1_ z$-l|to1mePiThUN3*6@Y5{2D=YVKZJ=#h_gUH7C$sH(m?PE6oQ=f&0Jx_xC(UCK6` z-*x|{L^Q6Kx^Kz&G~<^19jX0 literal 0 HcmV?d00001 diff --git a/man/liger-demodata.Rd b/man/liger-demodata.Rd new file mode 100644 index 00000000..1c21eb8e --- /dev/null +++ b/man/liger-demodata.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{ctrl} +\alias{ctrl} +\alias{stim} +\title{dgCMatrix object of PBMC subsample data with Control and Stimulated datasets} +\format{ +\code{dgCMatrix} object of gene expression matrix from PBMC study, + named by "ctrl" and "stim". + +An object of class \code{dgCMatrix} with 262 rows and 300 columns. +} +\source{ +https://www.nature.com/articles/nbt.4042 +} +\usage{ +ctrl + +stim +} +\description{ +dgCMatrix object of PBMC subsample data with Control and Stimulated datasets +} +\references{ +Hyun Min Kang and et. al., Nature Biotechnology, 2018 +} +\keyword{datasets} diff --git a/man/read10X.Rd b/man/read10X.Rd index 62561ad6..6e57bc11 100644 --- a/man/read10X.Rd +++ b/man/read10X.Rd @@ -44,7 +44,7 @@ considered scRNA-seq data (default 'rna', alternatives: 'atac').} } \value{ List of merged matrices across data types (returns sparse matrix if only one data type - detected), or nested list of matrices organized by sample if merge=F. + detected), or nested list of matrices organized by sample if merge = FALSE. } \description{ This function generates a sparse matrix (genes x cells) from the data generated by 10X's diff --git a/man/selectGenes.Rd b/man/selectGenes.Rd index 55b6d479..1ca1a0df 100644 --- a/man/selectGenes.Rd +++ b/man/selectGenes.Rd @@ -16,7 +16,7 @@ selectGenes( do.plot = FALSE, cex.use = 0.3, chunk = 1000, - unshared = F, + unshared = FALSE, unshared.datasets = NULL, unshared.thresh = NULL ) From 06df37cc39682be110fdd0066de49605fbacfb21 Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Tue, 7 Nov 2023 12:19:57 -0500 Subject: [PATCH 6/8] Fixes for 1.0.1, addressing feedback from CRAN team --- .Rbuildignore | 2 + DESCRIPTION | 13 +- NEWS.md | 8 + R/rliger.R | 330 ++++++++++++++++----------------- R/utilities.R | 177 ++++++++---------- man/calcARI.Rd | 19 +- man/calcAgreement.Rd | 16 +- man/calcAlignment.Rd | 11 +- man/calcAlignmentPerCluster.Rd | 14 +- man/calcDatasetSpecificity.Rd | 13 +- man/calcNormLoadings.Rd | 10 + man/calcPurity.Rd | 19 +- man/convertOldLiger.Rd | 3 +- man/createLiger.Rd | 5 +- man/getFactorMarkers.Rd | 13 +- man/getGeneValues.Rd | 12 +- man/getProportionMito.Rd | 12 +- man/imputeKNN.Rd | 1 + man/ligerToSeurat.Rd | 12 +- man/linkGenesAndPeaks.Rd | 1 + man/louvainCluster.Rd | 12 +- man/makeInteractTrack.Rd | 1 + man/makeRiverplot.Rd | 1 + man/nnzeroGroups.Rd | 27 --- man/nonneg.Rd | 20 -- man/normalize.Rd | 5 +- man/online_iNMF.Rd | 13 +- man/optimizeALS.Rd | 12 +- man/optimizeNewData.Rd | 32 +++- man/optimizeNewK.Rd | 12 +- man/optimizeNewLambda.Rd | 11 +- man/optimizeSubset.Rd | 19 +- man/plotByDatasetAndCluster.Rd | 18 +- man/plotClusterFactors.Rd | 13 +- man/plotClusterProportions.Rd | 10 +- man/plotFactors.Rd | 15 +- man/plotFeature.Rd | 15 +- man/plotGene.Rd | 18 +- man/plotGeneLoadings.Rd | 18 +- man/plotGeneViolin.Rd | 17 +- man/plotGenes.Rd | 16 +- man/plotWordClouds.Rd | 17 +- man/quantile_norm.Rd | 13 +- man/rank_matrix.Rd | 23 --- man/readSubset.Rd | 8 +- man/removeMissingObs.Rd | 8 +- man/reorganizeLiger.Rd | 10 +- man/runGSEA.Rd | 13 +- man/runTSNE.Rd | 13 +- man/runUMAP.Rd | 15 +- man/runWilcoxon.Rd | 29 ++- man/scaleNotCenter.Rd | 6 +- man/selectGenes.Rd | 8 +- man/seuratToLiger.Rd | 17 +- man/show-methods.Rd | 4 + man/subsetLiger.Rd | 8 +- man/suggestK.Rd | 10 +- man/suggestLambda.Rd | 10 +- man/sumGroups.Rd | 27 --- 59 files changed, 574 insertions(+), 661 deletions(-) create mode 100644 NEWS.md delete mode 100644 man/nnzeroGroups.Rd delete mode 100644 man/nonneg.Rd delete mode 100644 man/rank_matrix.Rd delete mode 100644 man/sumGroups.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 3de928b2..928274cc 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,4 @@ +^LICENSE$ ^\.lintr$ ^\.vscode ^.*\.Rproj$ @@ -7,6 +8,7 @@ ^_config\.yml$ ^appveyor\.yml$ ^vignettes/.*html$ +^vignettes/articles ^vignettes/Integrating_multi_scRNA_data\.rmd$ ^vignettes/Integrating_scRNA_and_scATAC_data\.Rmd$ ^vignettes/Parameter_selection\.Rmd$ diff --git a/DESCRIPTION b/DESCRIPTION index 0fb2423d..09c3de78 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,7 @@ Author: Joshua Welch [aut, ctb], Maintainer: Yichen Wang BugReports: https://github.com/welch-lab/liger/issues URL: https://github.com/welch-lab/liger -License: GPL-3 | file LICENSE +License: GPL-3 Imports: Rcpp (>= 0.12.10), plyr, FNN, @@ -44,22 +44,23 @@ Imports: Rcpp (>= 0.12.10), parallel, doParallel, mclust, - stats, psych, RANN, uwot, rlang, - utils, hdf5r, - scattermore (>= 0.7) + scattermore (>= 0.7), + patchwork, + cowplot biocViews: LazyData: true LinkingTo: Rcpp, RcppArmadillo, RcppEigen, RcppProgress Depends: - cowplot, + R (>= 3.4), Matrix, methods, - patchwork + stats, + utils RoxygenNote: 7.2.3 Encoding: UTF-8 Suggests: diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..1de58af0 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,8 @@ +## rliger 1.0.1 + +- Allow setting mito pattern in `getMitoProportion()` #271 +- Fix efficiency issue when taking the log of norm.data (e.g. `runWilcoxon`) +- Add runable examples to all exported functions when possible +- Fix typo in online_iNMF matrix initialization +- Other minor fixes + diff --git a/R/rliger.R b/R/rliger.R index a77633ab..f09e69e9 100755 --- a/R/rliger.R +++ b/R/rliger.R @@ -84,7 +84,9 @@ liger <- methods::setClass( #' @aliases show,liger-method #' @docType methods #' @rdname show-methods - +#' @examples +#' ligerex <- createLiger(list(ctrl = ctrl)) +#' show(ligerex) setMethod( f = "show", signature = "liger", @@ -2060,7 +2062,7 @@ online_iNMF <- function(object, #' @param x Dense matrix. #' @param eps Threshold. Should be a small positive value. (default 1e-16) #' @return Dense matrix with smallest values equal to eps. - +#' @noRd nonneg <- function(x, eps = 1e-16) { x[x < eps] = eps return(x) @@ -2112,7 +2114,7 @@ nonneg <- function(x, eps = 1e-16) { #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) #' # Minimum specification for fast example pass -#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) optimizeALS <- function( object, ... @@ -2425,9 +2427,9 @@ optimizeALS.liger <- function( #' ligerex <- scaleNotCenter(ligerex) #' k <- 5 #' # Minimum specification for fast example pass -#' ligerex <- optimizeALS(ligerex, k = k, max.iters = 2) +#' ligerex <- optimizeALS(ligerex, k = k, max.iters = 1) #' if (k != 5) { -#' ligerex <- optimizeNewK(ligerex, k.new = k, max.iters = 2) +#' ligerex <- optimizeNewK(ligerex, k.new = k, max.iters = 1) #' } optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters = 100, rand.seed = 1, verbose = TRUE) { @@ -2540,14 +2542,14 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters #' \donttest{ #' # Assume we are performing the factorization #' # Specification for minimal example test time, not converging -#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' # Suppose we have new data, namingly Y_new and Z_new from the same cell type. #' # Add it to existing datasets. #' new_data <- list(Y_set = ctrl, Z_set = stim) #' # 2 iters do not lead to converge, it's for minimal test time #' ligerex2 <- optimizeNewData(ligerex, new.data = new_data, #' which.datasets = list('ctrl', 'stim'), -#' max.iters = 2) +#' max.iters = 1) #' # acquire new data from different cell type (X), we'll just add another dataset #' # it's probably most similar to ctrl #' X <- ctrl @@ -2555,7 +2557,7 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters #' ligerex3 <- optimizeNewData(ligerex, new.data = list(x_set = X), #' which.datasets = list('ctrl'), #' add.to.existing = FALSE, -#' max.iters = 2) +#' max.iters = 1) #' } optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = TRUE, lambda = NULL, thresh = 1e-4, max.iters = 100, verbose = TRUE) { @@ -2663,14 +2665,14 @@ optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = #' \donttest{ #' # Assume we are performing the factorization #' # Specification for minimal example run time, not converging. -#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' # Preparing subset with random sampling. #' # Subset can also be obtained with prior knowledge from metadata. #' cell_names_1 <- sample(rownames(ligerex@H[[1]]), 20) #' cell_names_2 <- sample(rownames(ligerex@H[[2]]), 20) #' #' ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2), -#' max.iters = 2) +#' max.iters = 1) #' } optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, lambda = NULL, thresh = 1e-4, max.iters = 100, datasets.scale = NULL) { @@ -2738,9 +2740,9 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la #' \donttest{ #' # Assume we are performing the factorization #' # Specification for minimal example run time, not converging. -#' ligerex <- optimizeALS(ligerex, k = 5, lambda = 5, max.iters = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, lambda = 5, max.iters = 1) #' # decide to run with lambda = 15 instead (keeping k the same) -#' ligerex <- optimizeNewLambda(ligerex, new.lambda = 15, max.iters = 2) +#' ligerex <- optimizeNewLambda(ligerex, new.lambda = 15, max.iters = 1) #' } optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100, rand.seed = 1, verbose = TRUE) { k <- ncol(object@H[[1]]) @@ -2797,11 +2799,7 @@ optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100 #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' \donttest{ -#' # examine plot for most appropriate lambda, use multiple cores for faster results -#' # This will take a long time -#' suggestLambda(ligerex, k = 20, num.cores = 4) -#' } +#' suggestLambda(ligerex, k = 20, lambda.test = c(5, 10), max.iters = 1) suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, thresh = 1e-4, max.iters = 100, knn_k = 20, k2 = 500, ref_dataset = NULL, resolution = 1, gen.new = FALSE, nrep = 1, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { @@ -2938,11 +2936,7 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' \dontrun{ -#' # examine plot for most appropriate k, use multiple cores for faster results -#' # This will take a long time -#' suggestK(ligerex, num.cores = 4) -#' } +#' suggestK(ligerex, k.test = c(5,6), max.iters = 1) suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, max.iters = 100, num.cores = 1, rand.seed = 1, gen.new = FALSE, nrep = 1, plot.log2 = TRUE, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { @@ -3079,7 +3073,7 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) quantile_norm <- function( object, @@ -3254,9 +3248,11 @@ quantile_norm.liger <- function( #' ligerex <- louvainCluster(ligerex, resolution = 0.3) louvainCluster <- function(object, resolution = 1.0, k = 20, prune = 1 / 15, eps = 0.1, nRandomStarts = 10, nIterations = 100, random.seed = 1, verbose = TRUE, dims.use = NULL) { + tmpdir <- tempdir() output_path <- paste0('edge_', sub('\\s', '_', Sys.time()), '.txt') output_path = sub(":","_",output_path) output_path = sub(":","_",output_path) + output_path <- file.path(tmpdir, output_path) if (is.null(dims.use)) { use_these_factors <- 1:ncol(object@H[[1]]) @@ -3984,6 +3980,7 @@ runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gen #' ligerex <- scaleNotCenter(ligerex) #' # Specification for minimal example run time, not converging #' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) #' ligerex <- runTSNE(ligerex) runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), use.pca = FALSE, perplexity = 30, theta = 0.5, method = "Rtsne", fitsne.path = NULL, @@ -4055,13 +4052,13 @@ runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), u #' #' @export #' @examples -#' \dontrun{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) #' # Specification for minimal example run time, not converging #' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) #' ligerex <- runUMAP(ligerex) runUMAP <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k = 2, distance = "euclidean", n_neighbors = 10, min_dist = 0.1, rand.seed = 42) { @@ -4521,18 +4518,21 @@ calcPurity <- function(object, classes.compare, verbose = TRUE) { #' #' @param object \code{liger} object. #' @param use.norm Whether to use cell normalized data in calculating contribution (default FALSE). -#' @param species Whether the data is from mouse or human? (default "mouse"). +#' @param mito.pattern Regex pattern for identifying mitochondrial genes. Default "^mt-" typically goes for mouse. +#' May use "^MT-" for human. #' @return Named vector containing proportion of mitochondrial contribution for each cell. #' #' @export #' @examples #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) -#' ligerex@cell.data$mito <- getProportionMito(ligerex, species = "human") -getProportionMito <- function(object, use.norm = FALSE, species = c("mouse", "human")) { - species <- match.arg(species) - ptrn <- switch(species, mouse = "^mt-", human = "^MT-") +#' # Expect a warning because the test data does not contain mito genes +#' ligerex@cell.data$mito <- getProportionMito(ligerex, mito.pattern = "^MT-") +getProportionMito <- function(object, use.norm = FALSE, mito.pattern = "^mt-") { all.genes <- Reduce(union, lapply(object@raw.data, rownames)) - mito.genes <- grep(pattern = ptrn, x = all.genes, value = TRUE) + mito.genes <- grep(pattern = mito.pattern, x = all.genes, value = TRUE) + if (length(mito.genes) == 0) { + warning("No mito genes identified with pattern \"", mito.pattern, "\". ") + } data.use <- object@raw.data if (use.norm) { data.use <- object@norm.data @@ -4583,16 +4583,15 @@ getProportionMito <- function(object, use.norm = FALSE, species = c("mouse", "hu #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # get tsne.coords for normalized data -#' ligerex <- runTSNE(ligerex) -#' # plot to console -#' plotByDatasetAndCluster(ligerex) -#' # return list of plots -#' plots <- plotByDatasetAndCluster(ligerex, return.plots = TRUE) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' ligerex <- louvainCluster(ligerex) +#' plotByDatasetAndCluster(ligerex, pt.size = 1) plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.size = 0.3, text.size = 3, do.shuffle = TRUE, rand.seed = 1, axis.labels = NULL, do.legend = TRUE, legend.size = 5, @@ -4726,14 +4725,14 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete -#' # get tsne.coords for normalized data -#' ligerex <- runTSNE(ligerex) -#' # plot nUMI to console -#' plotFeature(ligerex, feature = 'nUMI') -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' plotFeature(ligerex, "nUMI", pt.size = 1) plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, title = NULL, pt.size = 0.3, text.size = 3, do.shuffle = TRUE, rand.seed = 1, do.labels = FALSE, axis.labels = NULL, do.legend = TRUE, legend.size = 5, option = 'plasma', @@ -4849,17 +4848,15 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) #' ligerex <- quantile_norm(ligerex) -#' # get tsne.coords for normalized data -#' ligerex <- runTSNE(ligerex) -#' # factor plots into pdf file -#' # pdf("plot_factors.pdf") #' plotFactors(ligerex) -#' # dev.off() -#' } - +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' plotFactors(ligerex, plot.tsne = TRUE) plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsne = FALSE, verbose = TRUE) { k <- ncol(object@H.norm) if (verbose) { @@ -4945,18 +4942,14 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runTSNE(ligerex) -#' # pdf('word_clouds.pdf') -#' plotWordClouds(ligerex, num.genes = 20) -#' # dev.off() -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' plotWordClouds(ligerex, num.genes = 20) -#' } - +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' plotWordClouds(ligerex, do.spec.plot = FALSE) plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = 30, min.size = 1, max.size = 4, factor.share.thresh = 10, log.fc.thresh = 1, pval.thresh = 0.05, do.spec.plot = TRUE, return.plots = FALSE, verbose = TRUE) { @@ -5103,18 +5096,14 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex) -#' # pdf("gene_loadings.pdf") -#' plotGeneLoadings(ligerex, num.genes = 20) -#' # dev.off() -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' plotGeneLoadings(ligerex, num.genes = 20) -#' } -#' +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' plotGeneLoadings(ligerex, "stim", "ctrl", do.spec.plot = FALSE) plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes.show = 12, num.genes = 30, mark.top.genes = TRUE, factor.share.thresh = 10, log.fc.thresh = 1, umi.thresh = 30, frac.thresh = 0, @@ -5327,15 +5316,15 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete -#' # plot expression for CD4 and return plots -#' violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- louvainCluster(ligerex) +#' plotGeneViolin(ligerex, "CD74", by.dataset = FALSE) +#' plotGeneViolin(ligerex, "CD74") plotGeneViolin <- function(object, gene, methylation.indices = NULL, by.dataset = TRUE, return.plots = FALSE) { if (class(object@raw.data[[1]])[1] == "H5File"){ @@ -5345,8 +5334,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, } gene_vals <- c() - gene_df <- data.frame(object@tsne.coords) - rownames(gene_df) <- names(object@clusters) + gene_df <- data.frame(Clusters = object@clusters) for (i in 1:length(object@raw.data)) { if (class(object@raw.data[[i]])[1] == "H5File"){ @@ -5354,7 +5342,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, gene_vals <- c(gene_vals, object@sample.data[[i]][gene, ]) } else { if (gene %in% rownames(object@sample.data[[i]])) { - gene_vals_int <- log2(10000 * object@sample.data[[i]][gene, ] + 1) + gene_vals_int <- log1p(10000 * object@sample.data[[i]][gene, ]) } else { gene_vals_int <- rep(list(0), ncol(object@sample.data[[i]])) @@ -5367,7 +5355,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, gene_vals <- c(gene_vals, object@norm.data[[i]][gene, ]) } else { if (gene %in% rownames(object@norm.data[[i]])) { - gene_vals_int <- log2(10000 * object@norm.data[[i]][gene, ] + 1) + gene_vals_int <- log1p(10000 * object@norm.data[[i]][gene, ]) } else { gene_vals_int <- rep(list(0), ncol(object@norm.data[[i]])) @@ -5379,22 +5367,21 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, } gene_df$Gene <- as.numeric(gene_vals[rownames(gene_df)]) - colnames(gene_df) <- c("Dim1", "Dim2", "gene") gene_plots <- list() for (i in 1:length(object@scale.data)) { if (by.dataset) { - gene_df.sub <- gene_df[rownames(object@H[[i]]), ] - gene_df.sub$Cluster <- object@clusters[rownames(object@H[[i]])] - title <- names(object@scale.data)[i] + gene_df.sub <- gene_df[colnames(object@norm.data[[i]]), ] + gene_df.sub$Cluster <- object@clusters[colnames(object@norm.data[[i]])] + title <- names(object@norm.data)[i] } else { gene_df.sub <- gene_df gene_df.sub$Cluster <- object@clusters title <- "All Datasets" } - max_v <- max(gene_df.sub["gene"], na.rm = TRUE) - min_v <- min(gene_df.sub["gene"], na.rm = TRUE) + max_v <- max(gene_df.sub["Gene"], na.rm = TRUE) + min_v <- min(gene_df.sub["Gene"], na.rm = TRUE) midpoint <- (max_v - min_v) / 2 - plot_i <- ggplot(gene_df.sub, aes_string(x = "Cluster", y = "gene", fill = "Cluster")) + + plot_i <- ggplot(gene_df.sub, aes_string(x = "Cluster", y = "Gene", fill = "Cluster")) + geom_boxplot(position = "dodge", width = 0.4, outlier.shape = NA, alpha = 0.7) + geom_violin(position = "dodge", alpha = 0.7) + ggtitle(title) @@ -5468,17 +5455,14 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), factorization complete -#' ligerex -#' ligerex <- runTSNE(ligerex) -#' # plot expression for CD4 and return plots -#' gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) -#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input -#' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' plotGene(ligerex, "CD74", pt.size = 1) plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by = 'dataset', log2scale = NULL, methylation.indices = NULL, plot.by = 'dataset', set.dr.lims = FALSE, pt.size = 0.1, min.clip = NULL, max.clip = NULL, @@ -5727,15 +5711,14 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete input -#' ligerex <- runTSNE(ligerex) -#' # plot expression for CD4 and FCGR3A -#' # pdf("gene_plots.pdf") -#' plotGenes(ligerex, c("CD4", "FCGR3A")) -#' # dev.off() -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' plotGenes(ligerex, c("CD74", "NKG7"), pt.size = 1) plotGenes <- function(object, genes, ...) { for (i in 1:length(genes)) { print(genes[i]) @@ -5779,6 +5762,7 @@ plotGenes <- function(object, genes, ...) { #' @export #' @examples #' \dontrun{ +#' # Riverplot currently archived, cannot run this example #' # ligerex (liger object), factorization complete input #' # toy clusters #' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) @@ -5788,7 +5772,6 @@ plotGenes <- function(object, genes, ...) { #' # create riverplot #' makeRiverplot(ligerex, cluster1, cluster2) #' } - makeRiverplot <- function(object, cluster1, cluster2, cluster_consensus = NULL, min.frac = 0.05, min.cells = 10, river.yscale = 1, river.lty = 0, river.node_margin = 0.1, label.cex = 1, label.col = "black", lab.srt = 0, river.usr = NULL, @@ -5929,13 +5912,14 @@ makeRiverplot <- function(object, cluster1, cluster2, cluster_consensus = NULL, #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete input +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) #' ligerex <- quantile_norm(ligerex) -#' # plot cluster proportions +#' ligerex <- louvainCluster(ligerex) #' plotClusterProportions(ligerex) -#' } - plotClusterProportions <- function(object, return.plot = FALSE) { sample_names <- unlist(lapply(seq_along(object@H), function(i) { @@ -5997,12 +5981,14 @@ plotClusterProportions <- function(object, return.plot = FALSE) { #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete input -#' # plot expression for CD4 and return plots -#' loading.matrix <- plotClusterFactors(ligerex, return.data = TRUE) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- louvainCluster(ligerex) +#' plotClusterFactors(ligerex) plotClusterFactors <- function(object, use.aligned = FALSE, Rowv = NA, Colv = "Rowv", col = NULL, return.data = FALSE, ...) { if (use.aligned) { @@ -6071,13 +6057,13 @@ plotClusterFactors <- function(object, use.aligned = FALSE, Rowv = NA, Colv = "R #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object), factorization complete input -#' markers <- getFactorMarkers(ligerex, num.genes = 10) -#' # look at shared markers -#' head(markers[[2]]) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- quantile_norm(ligerex) +#' fm <- getFactorMarkers(ligerex, dataset1 = "stim", dataset2 = "ctrl") getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.share.thresh = 10, dataset.specificity = NULL, log.fc.thresh = 1, pval.thresh = 0.05, num.genes = 30, print.genes = FALSE, verbose = TRUE) { @@ -6255,11 +6241,16 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets ONLY), factorization complete input -#' s.object <- ligerToSeurat(ligerex) +#' if (requireNamespace("Seurat")) { +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' srt <- ligerToSeurat(ligerex) #' } - ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use.liger.genes = TRUE, by.dataset = FALSE) { if (!requireNamespace("Seurat", quietly = TRUE)) { @@ -6340,7 +6331,7 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use if (use.liger.genes) { Seurat::VariableFeatures(new.seurat) <- var.genes } - Seurat::SetAssayData(new.seurat, slot = "scale.data", t(scale.data), assay = "RNA") + Seurat::SetAssayData(new.seurat, slot = "scale.data", new.data = t(scale.data), assay = "RNA") new.seurat[['tsne']] <- tsne.obj new.seurat[['inmf']] <- inmf.obj Seurat::Idents(new.seurat) <- ident.use @@ -6395,20 +6386,12 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use #' #' @export #' @examples -#' \dontrun{ -#' # Seurat objects for two pbmc datasets -#' tenx <- readRDS('tenx.RDS') -#' seqwell <- readRDS('seqwell.RDS') -#' # create liger object, using project names -#' ligerex <- seuratToLiger(list(tenx, seqwell)) -#' # create liger object, passing in names explicitly, using hvg.info genes -#' ligerex2 <- seuratToLiger(list(tenx, seqwell), names = c('tenx', 'seqwell'), num.hvg.info = 2000) -#' # Seurat object for joint analysis -#' pbmc <- readRDS('pbmc.RDS') -#' # create liger object, using 'protocol' for dataset names -#' ligerex3 <- seuratToLiger(pbmc, combined.seurat = TRUE, meta.var = 'protocol', num.hvg.info = 2000) +#' if (requireNamespace("Seurat")) { +#' ctrl.srt <- Seurat::CreateSeuratObject(ctrl, project = "ctrl") +#' stim.srt <- Seurat::CreateSeuratObject(stim, project = "stim") +#' ligerex <- seuratToLiger(list(ctrl = ctrl.srt, stim = stim.srt), +#' use.seurat.genes = FALSE) # because no var.gene now #' } - seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-projects", meta.var = NULL, assays.use = NULL, raw.assay = "RNA", remove.missing = TRUE, renormalize = TRUE, use.seurat.genes = TRUE, num.hvg.info = NULL, use.idents = TRUE, use.tsne = TRUE, @@ -6616,13 +6599,8 @@ seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-project #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory datasets), with clusters 0:10 -#' # factorization, alignment, and t-SNE calculation have been performed -#' # subset by clusters -#' ligerex_subset <- subsetLiger(ligerex, clusters.use = c(1, 4, 5)) -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' lig.small <- subsetLiger(ligerex, cells.use = c(colnames(ctrl)[1:100], colnames(stim)[1:100])) subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.missing = TRUE) { if (!is.null(clusters.use)) { cells.use <- names(object@clusters)[which(object@clusters %in% clusters.use)] @@ -6688,13 +6666,10 @@ subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.mi #' #' @export #' @examples -#' \dontrun{ -#' # ligerex (liger object based on in-memory objects) organized by species -#' # with column designating sex in cell.data -#' # rearrange by sex -#' ligerex_new <- reorganizeLiger(ligerex, by.feature = "sex", new.label = "species") -#' } - +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' # Create a random variable of two categories +#' ligerex@cell.data$foo <- factor(sample(c(1,2), 600, replace = TRUE)) +#' ligerexFoo <- reorganizeLiger(ligerex, "foo") reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "orig.dataset", ...) { if (!(by.feature %in% colnames(object@cell.data))) { @@ -6703,7 +6678,7 @@ reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "o if(!is.factor(object@cell.data[, by.feature])){ stop("Error: cell.data feature must be of class 'factor' to reorganize object. Please change column to factor and re-run reorganizeLiger") } - if (!is.null(object@clusters)) { + if (length(object@clusters) > 0) { object@cell.data[['orig.clusters']] <- object@clusters } orig.data <- object@cell.data @@ -6749,11 +6724,9 @@ reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "o #' @export #' @examples #' \dontrun{ -#' # analogy (old Analogizer object) -#' # convert to latest class definition +#' # Not able to generate old object from current version, thus not run #' ligerex <- convertOldLiger(analogy) #' } - convertOldLiger = function(object, override.raw = FALSE, verbose = TRUE) { new.liger <- createLiger(object@raw.data) slots_new <- slotNames(new.liger) @@ -7048,6 +7021,15 @@ optimize_UANLS = function(object, k=30,lambda= 5, max.iters=30,nrep=1,thresh=1e- #' @param object \code{liger} object. Should call quantileNorm before calling. #' @return A dataframe, such that each column represents the contribution of a specific matrix (W, V_1, V_2, etc. ) #' @export +#' @examples +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' # Minimum specification for fast example pass +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' calcNormLoadings(ligerex) calcNormLoadings = function(object) { H_norm = object@H.norm W_norm = object@W diff --git a/R/utilities.R b/R/utilities.R index ac01d101..abfc2ce1 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -2,6 +2,8 @@ #' @importFrom grDevices heat.colors #' @importFrom methods as is #' @importFrom rlang .data +#' @importFrom FNN get.knnx +#' @importFrom stats p.adjust NULL # Utility functions for iiger methods. Some published, some not. @@ -11,7 +13,7 @@ fplot = function(tsne,NMFfactor,title,cols.use=heat.colors(10),pt.size=0.7,pch.u data.cut=as.numeric(as.factor(cut(as.numeric(NMFfactor),breaks=length(cols.use)))) data.col=rev(cols.use)[data.cut] plot(tsne[,1],tsne[,2],col=data.col,cex=pt.size,pch=pch.use,main=title) - + } # Binds list of matrices row-wise (vertical stack) @@ -22,7 +24,6 @@ rbindlist = function(mat_list) # helper function for calculating KL divergence from uniform distribution # (related to Shannon entropy) for factorization -#' @importFrom methods is kl_divergence_uniform = function(object, Hs=NULL) { if (is.null(Hs)) {Hs = object@H} @@ -31,7 +32,7 @@ kl_divergence_uniform = function(object, Hs=NULL) dataset_list = list() for (i in 1:length(Hs)) { scaled = scale(Hs[[i]], center=FALSE, scale=TRUE) - + inflated = t(apply(scaled, 1, function(x) { replace(x, x == 0, 1e-20) })) @@ -42,11 +43,11 @@ kl_divergence_uniform = function(object, Hs=NULL) return(dataset_list) } -# Function takes in a list of DGEs, with gene rownames and cell colnames, +# Function takes in a list of DGEs, with gene rownames and cell colnames, # and merges them into a single DGE. # Also adds library.names to cell.names if expected to be overlap (common with 10X barcodes) MergeSparseDataAll <- function(datalist, library.names = NULL) { - + # Use summary to convert the sparse matrices into three-column indexes where i are the # row numbers, j are the column numbers, and x are the nonzero entries col_offset <- 0 @@ -55,11 +56,11 @@ MergeSparseDataAll <- function(datalist, library.names = NULL) { for (i in 1:length(datalist)) { curr <- datalist[[i]] curr_s <- summary(curr) - + # Now, alter the indexes so that the two 3-column matrices can be properly merged. # First, make the current and full column numbers non-overlapping. curr_s[, 2] <- curr_s[, 2] + col_offset - + # Update full cell names if (!is.null(library.names)) { cellnames <- paste0(library.names[i], "_", colnames(curr)) @@ -67,13 +68,13 @@ MergeSparseDataAll <- function(datalist, library.names = NULL) { cellnames <- colnames(curr) } allCells <- c(allCells, cellnames) - + # Next, change the row (gene) indexes so that they index on the union of the gene sets, # so that proper merging can occur. idx <- match(rownames(curr), allGenes) newgenescurr <- idx[curr_s[, 1]] curr_s[, 1] <- newgenescurr - + # Now bind the altered 3-column matrices together, and convert into a single sparse matrix. if (!exists("full_mat")) { full_mat <- curr_s @@ -120,7 +121,7 @@ sparse.var = function(x){ sparse.transpose = function(x){ h = summary(x) sparseMatrix(i = h[,2],j=h[,1],x=h[,3]) - + } # After running modularity clustering, assign singleton communities to the mode of the cluster @@ -137,8 +138,6 @@ assign.singletons <- function(object, idents, k.use = 15, center = FALSE) { )) } -#' @importFrom FNN get.knnx -# assign.singletons.list <- function(object, idents, k.use = 15, center = FALSE) { if (!is.list(x = object) || !all(sapply(X = object, FUN = is.matrix))) { stop("'assign.singletons.list' expects a list of matrices") @@ -187,20 +186,20 @@ assign.singletons.list <- function(object, idents, k.use = 15, center = FALSE) { return(idents) } -# Run modularity based clustering on edge file -SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R=1, - modularity=1, ModularityJarFile="",random.seed=1, +# Run modularity based clustering on edge file +SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R=1, + modularity=1, ModularityJarFile="",random.seed=1, id.number=NULL, print.mod=F) { - + # Prepare data for modularity based clustering edge = edge[which(edge[,3]>prune.thresh),] - + message("making edge file.") edge_file <- paste0("edge", id.number, fileext=".txt") # Make sure no scientific notation written to edge file # restore default settings when the current function exits - init_option <- options() + init_option <- options() on.exit(options(init_option)) saveScipen=options(scipen=10000)[[1]] @@ -213,7 +212,7 @@ SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R= } liger.dir <- system.file(package = "rliger") ModularityJarFile <- paste0(liger.dir, "/java/ModularityOptimizer.jar") - command <- paste("java -jar", ModularityJarFile, edge_file, output_file, modularity, R, + command <- paste("java -jar", ModularityJarFile, edge_file, output_file, modularity, R, algorithm, nstart, iter.max, random.seed, as.numeric(print.mod), sep = " ") message ("Starting SLM") @@ -223,7 +222,7 @@ SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R= } unlink(edge_file) ident.use <- factor(read.table(file = output_file, header = FALSE, sep = "\t")[, 1]) - + return(ident.use) } @@ -232,7 +231,7 @@ scaleL2norm <- function(x) { return(x / sqrt(sum(x^2))) } -# get mode of identities +# get mode of identities getMode <- function(x, na.rm = FALSE) { if(na.rm){ x = x[!is.na(x)] @@ -242,8 +241,8 @@ getMode <- function(x, na.rm = FALSE) { } # utility function for seuratToLiger function -# Compares colnames in reference matrix1 and adds back any missing -# column names to matrix.subset as rows +# Compares colnames in reference matrix1 and adds back any missing +# column names to matrix.subset as rows # Set transpose = TRUE if rownames of matrix1 should be referenced addMissingCells <- function(matrix1, matrix.subset, transpose = F) { if (transpose) { @@ -264,15 +263,15 @@ addMissingCells <- function(matrix1, matrix.subset, transpose = F) { #' Get gene expression values from list of expression matrices. #' #' @description -#' Returns single vector of gene values across all datasets in list provided. Data can be in raw, -#' normalized or scaled form. If matrices are in cell x gene format, set use.cols = TRUE. +#' Returns single vector of gene values across all datasets in list provided. Data can be in raw, +#' normalized or scaled form. If matrices are in cell x gene format, set use.cols = TRUE. #' #' @param list List of gene x cell (or cell x gene) matrices #' @param gene Gene for which to return values (if gene is not found in appropriate dimnames will #' return vector of NA). -#' @param use.cols Whether to query columns for desired gene (set to TRUE if matrices are cell x +#' @param use.cols Whether to query columns for desired gene (set to TRUE if matrices are cell x #' gene) (default FALSE). -#' @param methylation.indices Indices of datasets with methylation data (never log2scaled) +#' @param methylation.indices Indices of datasets with methylation data (never log2scaled) #' (default NULL). #' @param log2scale Whether to log2+1 scale (with multiplicative factor) values (default FALSE). #' @param scale.factor Scale factor to use with log2 scaling (default 10000). @@ -280,19 +279,14 @@ addMissingCells <- function(matrix1, matrix.subset, transpose = F) { #' @return Plots to console (1-2 pages per factor) #' @export #' @examples -#' \dontrun{ -#' # liger object with factorization complete -#' # ligerex -#' gene_values <- getGeneValues(ligerex@raw.data, 'MALAT1') -#' } - +#' NKG7 <- getGeneValues(list(ctrl = ctrl, stim = stim), "NKG7") getGeneValues <- function(list, gene, use.cols = FALSE, methylation.indices = NULL, log2scale = FALSE, scale.factor = 10000) { gene_vals <- unlist(lapply(seq_along(list), function(i) { mtx <- unname(list)[[i]] if (use.cols) { mtx <- t(mtx) - } + } if (gene %in% rownames(mtx)) { gene_vals_int <- mtx[gene, ] } else { @@ -301,7 +295,7 @@ getGeneValues <- function(list, gene, use.cols = FALSE, methylation.indices = NU } if (log2scale & !(i %in% methylation.indices)) { gene_vals_int <- log2(scale.factor * gene_vals_int + 1) - } + } return(gene_vals_int) }), use.names = TRUE) @@ -331,7 +325,6 @@ refine_clusts_knn = function(H,clusts,k,eps=0.1) ################################## For fast Wilcoxon test ################################ # helper function for wilcoxon tests on general variables like matrix and dgCMatrix # related to function runWilcoxon -#' @importFrom stats p.adjust wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { ## Check and possibly correct input values if (is(X, 'dgeMatrix')) X <- as.matrix(X) @@ -347,22 +340,22 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { y <- y[idx_use] X <- X[, idx_use] } - - + + y <- factor(y) idx_use <- which(!is.na(y)) if (length(idx_use) < length(y)) { y <- y[idx_use] X <- X[, idx_use] - if (verbose) - message('Removing NA values from labels') + if (verbose) + message('Removing NA values from labels') } - + group.size <- as.numeric(table(y)) if (length(group.size[group.size > 0]) < 2) { stop('Must have at least 2 groups defined.') } - + # features_use <- which(apply(!is.na(X), 1, all)) # if (verbose & length(features_use) < nrow(X)) { # message('Removing features with NA values') @@ -371,27 +364,27 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { if (is.null(row.names(X))) { row.names(X) <- paste0('Feature', seq_len(nrow(X))) } - + ## Compute primary statistics group.size <- as.numeric(table(y)) n1n2 <- group.size * (ncol(X) - group.size) if (is(X, 'dgCMatrix')) { - rank_res <- rank_matrix(Matrix::t(X)) + rank_res <- rank_matrix(Matrix::t(X)) } else { rank_res <- rank_matrix(X) } - - ustat <- compute_ustat(rank_res$X_ranked, y, n1n2, group.size) + + ustat <- compute_ustat(rank_res$X_ranked, y, n1n2, group.size) auc <- t(ustat / n1n2) - pvals <- compute_pval(ustat, rank_res$ties, ncol(X), n1n2) + pvals <- compute_pval(ustat, rank_res$ties, ncol(X), n1n2) fdr <- apply(pvals, 2, function(x) p.adjust(x, 'BH')) - + ### Auxiliary Statistics (AvgExpr, PctIn, LFC, etc) group_sums <- sumGroups(X, y, 1) group_nnz <- nnzeroGroups(X, y, 1) group_pct <- sweep(group_nnz, 1, as.numeric(table(y)), "/") %>% t() - group_pct_out <- -group_nnz %>% - sweep(2, colSums(group_nnz) , "+") %>% + group_pct_out <- -group_nnz %>% + sweep(2, colSums(group_nnz) , "+") %>% sweep(1, as.numeric(length(y) - table(y)), "/") %>% t() group_means <- sweep(group_sums, 1, as.numeric(table(y)), "/") %>% t() cs <- colSums(group_sums) @@ -399,13 +392,13 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { lfc <- Reduce(cbind, lapply(seq_len(length(levels(y))), function(g) { group_means[, g] - ((cs - group_sums[g, ]) / (length(y) - gs[g])) })) - - res_list <- list(auc = auc, + + res_list <- list(auc = auc, pval = pvals, - padj = fdr, - pct_in = 100 * group_pct, + padj = fdr, + pct_in = 100 * group_pct, pct_out = 100 * group_pct_out, - avgExpr = group_means, + avgExpr = group_means, statistic = t(ustat), logFC = lfc) return(tidy_results(res_list, row.names(X), levels(y))) @@ -413,20 +406,20 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { tidy_results <- function(wide_res, features, groups) { - res <- Reduce(cbind, lapply(wide_res, as.numeric)) %>% data.frame() + res <- Reduce(cbind, lapply(wide_res, as.numeric)) %>% data.frame() colnames(res) <- names(wide_res) res$feature <- rep(features, times = length(groups)) res$group <- rep(groups, each = length(features)) res %>% dplyr::select( - .data$feature, - .data$group, - .data$avgExpr, - .data$logFC, - .data$statistic, - .data$auc, - .data$pval, - .data$padj, - .data$pct_in, + .data$feature, + .data$group, + .data$avgExpr, + .data$logFC, + .data$statistic, + .data$auc, + .data$pval, + .data$padj, + .data$pct_in, .data$pct_out ) } @@ -434,12 +427,12 @@ tidy_results <- function(wide_res, features, groups) { compute_ustat <- function(Xr, cols, n1n2, group.size) { grs <- sumGroups(Xr, cols) - + if (is(Xr, 'dgCMatrix')) { gnz <- (group.size - nnzeroGroups(Xr, cols)) zero.ranks <- (nrow(Xr) - diff(Xr@p) + 1) / 2 ustat <- t((t(gnz) * zero.ranks)) + grs - group.size * - (group.size + 1 ) / 2 + (group.size + 1 ) / 2 } else { ustat <- grs - group.size * (group.size + 1 ) / 2 } @@ -457,71 +450,66 @@ compute_pval <- function(ustat, ties, N, n1n2) { }) %>% unlist usigma <- sqrt(matrix(n1n2, ncol = 1) %*% matrix(rhs, nrow = 1)) z <- t(z / usigma) - + pvals <- matrix(2 * pnorm(-abs(as.numeric(z))), ncol = ncol(z)) return(pvals) } #' rank_matrix -#' +#' #' Utility function to rank columns of matrix -#' -#' @param X feature by observation matrix. -#' +#' +#' @param X feature by observation matrix. +#' #' @return List with 2 items - - +#' @noRd rank_matrix <- function(X) { UseMethod('rank_matrix') } -##' @rdname rank_matrix rank_matrix.dgCMatrix <- function(X) { Xr <- Matrix(X, sparse = TRUE) ties <- cpp_rank_matrix_dgc(Xr@x, Xr@p, nrow(Xr), ncol(Xr)) return(list(X_ranked = Xr, ties = ties)) } -##' @rdname rank_matrix rank_matrix.matrix <- function(X) { cpp_rank_matrix_dense(X) } #' sumGroups -#' +#' #' Utility function to sum over group labels -#' +#' #' @param X matrix #' @param y group labels #' @param MARGIN whether observations are rows (=2) or columns (=1) -#' +#' #' @return Matrix of groups by features - +#' @noRd sumGroups <- function(X, y, MARGIN=2) { if (MARGIN == 2 & nrow(X) != length(y)) { stop('wrong dims') } else if (MARGIN == 1 & ncol(X) != length(y)) { - stop('wrong dims') + stop('wrong dims') } UseMethod('sumGroups') } -##' @rdname sumGroups sumGroups.dgCMatrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { cpp_sumGroups_dgc_T(X@x, X@p, X@i, ncol(X), nrow(X), as.integer(y) - 1, - length(unique(y))) + length(unique(y))) } else { cpp_sumGroups_dgc(X@x, X@p, X@i, ncol(X), as.integer(y) - 1, length(unique(y))) } } -##' @rdname sumGroups sumGroups.matrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { - cpp_sumGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) + cpp_sumGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) } else { cpp_sumGroups_dense(X, as.integer(y) - 1, length(unique(y))) } @@ -530,39 +518,37 @@ sumGroups.matrix <- function(X, y, MARGIN=2) { #' nnzeroGroups -#' +#' #' Utility function to compute number of zeros-per-feature within group -#' +#' #' @param X matrix #' @param y group labels #' @param MARGIN whether observations are rows (=2) or columns (=1) -#' +#' #' @return Matrix of groups by features - +#' @noRd nnzeroGroups <- function(X, y, MARGIN=2) { if (MARGIN == 2 & nrow(X) != length(y)) { stop('wrong dims') } else if (MARGIN == 1 & ncol(X) != length(y)) { - stop('wrong dims') + stop('wrong dims') } UseMethod('nnzeroGroups') } -##' @rdname nnzeroGroups nnzeroGroups.dgCMatrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { cpp_nnzeroGroups_dgc_T(X@p, X@i, ncol(X), nrow(X), as.integer(y) - 1, - length(unique(y))) + length(unique(y))) } else { cpp_nnzeroGroups_dgc(X@p, X@i, ncol(X), as.integer(y) - 1, length(unique(y))) } } -##' @rdname nnzeroGroups nnzeroGroups.matrix <- function(X, y, MARGIN=2) { - if (MARGIN == 1) { - cpp_nnzeroGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) + if (MARGIN == 1) { + cpp_nnzeroGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) } else { cpp_nnzeroGroups_dense(X, as.integer(y) - 1, length(unique(y))) } @@ -651,7 +637,6 @@ nmf_hals <- function(A, k, max_iters = 500, thresh = 1e-4, reps = 20, W0 = NULL, # commit d2cf403 on Feb 8, 2019 # #' @importFrom utils file_test -# fftRtsne <- function(X, dims = 2, perplexity = 30, diff --git a/man/calcARI.Rd b/man/calcARI.Rd index 222711f6..eb1bc6d3 100644 --- a/man/calcARI.Rd +++ b/man/calcARI.Rd @@ -22,17 +22,12 @@ The Rand index ranges from 0 to 1, with 0 indicating no agreement between cluste indicating perfect agreement. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# toy clusters -cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) -names(cluster1) <- colnames(ligerex@raw.data[[1]]) -cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) -names(cluster2) <- colnames(ligerex@raw.data[[2]]) -# get ARI for first clustering -ari1 <- calcARI(ligerex, cluster1) -# get ARI for second clustering -ari2 <- calcARI(ligerex, cluster2) -} +agreement <- calcARI(ligerex, ligerex@clusters) } diff --git a/man/calcAgreement.Rd b/man/calcAgreement.Rd index 1ce3ad5a..22d338ac 100644 --- a/man/calcAgreement.Rd +++ b/man/calcAgreement.Rd @@ -49,14 +49,12 @@ expected to be most similar to iNMF. Although agreement can theoretically approa it is usually no higher than 0.2-0.3 (particularly for non-deterministic approaches like NMF). } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete -# generate H.norm by quantile normalizig factor loadings +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -agreement <- calcAgreement(ligerex, dr.method = "NMF") -# ligerex (liger object based on datasets in HDF5 format), factorization complete -ligerex <- quantile_norm(ligerex) -ligerex <- readSubset(ligerex, slot.use = "scale.data", max.cells = 5000) -agreement <- calcAgreement(ligerex, dr.method = "NMF") -} +agreement <- calcAgreement(ligerex) } diff --git a/man/calcAlignment.Rd b/man/calcAlignment.Rd index 4b7bc143..bba763a7 100644 --- a/man/calcAlignment.Rd +++ b/man/calcAlignment.Rd @@ -48,9 +48,12 @@ value for perfectly mixed datasets, and scale the value from 0 to 1. Note that i alignment can be greater than 1 occasionally. } \examples{ -\dontrun{ -# ligerex (liger object ), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -alignment <- calcAlignment(ligerex) -} +agreement <- calcAlignment(ligerex) } diff --git a/man/calcAlignmentPerCluster.Rd b/man/calcAlignmentPerCluster.Rd index 22890873..430b931f 100644 --- a/man/calcAlignmentPerCluster.Rd +++ b/man/calcAlignmentPerCluster.Rd @@ -7,7 +7,7 @@ calcAlignmentPerCluster(object, rand.seed = 1, k = NULL, by.dataset = FALSE) } \arguments{ -\item{object}{\code{liger} object. Should call quantileAlignSNF before calling.} +\item{object}{\code{liger} object. Should call quantile_norm before calling.} \item{rand.seed}{Random seed for reproducibility (default 1).} @@ -23,10 +23,12 @@ Vector of alignment statistics (with names of clusters). Returns alignment for each cluster in analysiss (see documentation for calcAlignment). } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# get alignment for each cluster -alignment_per_cluster <- calcAlignmentPerCluster(ligerex) -} +agreement <- calcAlignmentPerCluster(ligerex) } diff --git a/man/calcDatasetSpecificity.Rd b/man/calcDatasetSpecificity.Rd index cc7f39a4..3ff1f6eb 100644 --- a/man/calcDatasetSpecificity.Rd +++ b/man/calcDatasetSpecificity.Rd @@ -32,10 +32,11 @@ calculate the norm of the sum of each factor's shared loadings (W) and dataset-s description. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# generate H.norm by quantile normalizig factor loadings -ligerex <- quantile_norm(ligerex) -dataset_spec <- calcDatasetSpecificity(ligerex, do.plot = F) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +calcDatasetSpecificity(ligerex) } diff --git a/man/calcNormLoadings.Rd b/man/calcNormLoadings.Rd index fcd39da9..7979a887 100644 --- a/man/calcNormLoadings.Rd +++ b/man/calcNormLoadings.Rd @@ -15,3 +15,13 @@ A dataframe, such that each column represents the contribution of a specific mat \description{ Calculates the contribution of each factor of W,V, and U to the reconstruction. } +\examples{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Minimum specification for fast example pass +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) +calcNormLoadings(ligerex) +} diff --git a/man/calcPurity.Rd b/man/calcPurity.Rd index 4a466d5e..8d9d9ddb 100644 --- a/man/calcPurity.Rd +++ b/man/calcPurity.Rd @@ -23,17 +23,12 @@ subgroups or clusters than the true clusters (or classes). Purity also ranges fr with a score of 1 representing a pure, or accurate, clustering. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# toy clusters -cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) -names(cluster1) <- colnames(ligerex@raw.data[[1]]) -cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) -names(cluster2) <- colnames(ligerex@raw.data[[2]]) -# get ARI for first clustering -ari1 <- calcPurity(ligerex, cluster1) -# get ARI for second clustering -ari2 <- calcPurity(ligerex, cluster2) -} +agreement <- calcARI(ligerex, ligerex@clusters) } diff --git a/man/convertOldLiger.Rd b/man/convertOldLiger.Rd index 88aa09f4..2de152b6 100644 --- a/man/convertOldLiger.Rd +++ b/man/convertOldLiger.Rd @@ -24,8 +24,7 @@ class NULL. } \examples{ \dontrun{ -# analogy (old Analogizer object) -# convert to latest class definition +# Not able to generate old object from current version, thus not run ligerex <- convertOldLiger(analogy) } } diff --git a/man/createLiger.Rd b/man/createLiger.Rd index c1719a28..e0442c8e 100644 --- a/man/createLiger.Rd +++ b/man/createLiger.Rd @@ -51,8 +51,5 @@ By default, it converts all passed data into sparse matrices (dgCMatrix) to redu It initializes cell.data with nUMI and nGene calculated for every cell. } \examples{ -# Demonstration using matrices with randomly generated numbers -Y <- matrix(runif(5000,0,2), 10,500) -Z <- matrix(runif(5000,0,2), 10,500) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) } diff --git a/man/getFactorMarkers.Rd b/man/getFactorMarkers.Rd index 84a47632..317b6a9d 100644 --- a/man/getFactorMarkers.Rd +++ b/man/getFactorMarkers.Rd @@ -53,10 +53,11 @@ factorization, before selecting those which load most significantly on each fact or dataset-specific way). } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete input -markers <- getFactorMarkers(ligerex, num.genes = 10) -# look at shared markers -head(markers[[2]]) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +fm <- getFactorMarkers(ligerex, dataset1 = "stim", dataset2 = "ctrl") } diff --git a/man/getGeneValues.Rd b/man/getGeneValues.Rd index cc72db70..941850f1 100644 --- a/man/getGeneValues.Rd +++ b/man/getGeneValues.Rd @@ -19,10 +19,10 @@ getGeneValues( \item{gene}{Gene for which to return values (if gene is not found in appropriate dimnames will return vector of NA).} -\item{use.cols}{Whether to query columns for desired gene (set to TRUE if matrices are cell x +\item{use.cols}{Whether to query columns for desired gene (set to TRUE if matrices are cell x gene) (default FALSE).} -\item{methylation.indices}{Indices of datasets with methylation data (never log2scaled) +\item{methylation.indices}{Indices of datasets with methylation data (never log2scaled) (default NULL).} \item{log2scale}{Whether to log2+1 scale (with multiplicative factor) values (default FALSE).} @@ -33,13 +33,9 @@ gene) (default FALSE).} Plots to console (1-2 pages per factor) } \description{ -Returns single vector of gene values across all datasets in list provided. Data can be in raw, +Returns single vector of gene values across all datasets in list provided. Data can be in raw, normalized or scaled form. If matrices are in cell x gene format, set use.cols = TRUE. } \examples{ -\dontrun{ -# liger object with factorization complete -# ligerex -gene_values <- getGeneValues(ligerex@raw.data, 'MALAT1') -} +NKG7 <- getGeneValues(list(ctrl = ctrl, stim = stim), "NKG7") } diff --git a/man/getProportionMito.Rd b/man/getProportionMito.Rd index 67e7e9e5..93737a8d 100644 --- a/man/getProportionMito.Rd +++ b/man/getProportionMito.Rd @@ -4,12 +4,15 @@ \alias{getProportionMito} \title{Calculate proportion mitochondrial contribution} \usage{ -getProportionMito(object, use.norm = FALSE) +getProportionMito(object, use.norm = FALSE, mito.pattern = "^mt-") } \arguments{ \item{object}{\code{liger} object.} \item{use.norm}{Whether to use cell normalized data in calculating contribution (default FALSE).} + +\item{mito.pattern}{Regex pattern for identifying mitochondrial genes. Default "^mt-" typically goes for mouse. +May use "^MT-" for human.} } \value{ Named vector containing proportion of mitochondrial contribution for each cell. @@ -18,8 +21,7 @@ Named vector containing proportion of mitochondrial contribution for each cell. Calculates proportion of mitochondrial contribution based on raw or normalized data. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -ligerex@cell.data[["percent_mito"]] <- getProportionMito(ligerex) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +# Expect a warning because the test data does not contain mito genes +ligerex@cell.data$mito <- getProportionMito(ligerex, mito.pattern = "^MT-") } diff --git a/man/imputeKNN.Rd b/man/imputeKNN.Rd index 861fb3fd..dcbfbf8d 100644 --- a/man/imputeKNN.Rd +++ b/man/imputeKNN.Rd @@ -40,6 +40,7 @@ Impute query features from a reference dataset using KNN. } \examples{ \dontrun{ +# Only runable for ATAC dataset. See tutorial on GitHub. # ligerex (liger object), factorization complete # impute every dataset other than the reference dataset ligerex <- imputeKNN(ligerex, reference = "y_set", weight = FALSE) diff --git a/man/ligerToSeurat.Rd b/man/ligerToSeurat.Rd index 4129fa3b..f46a4f2f 100644 --- a/man/ligerToSeurat.Rd +++ b/man/ligerToSeurat.Rd @@ -37,8 +37,14 @@ Stores original dataset identity by default in new object metadata if dataset na in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets ONLY), factorization complete input -s.object <- ligerToSeurat(ligerex) +if (requireNamespace("Seurat")) { + ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) + ligerex <- normalize(ligerex) + ligerex <- selectGenes(ligerex) + ligerex <- scaleNotCenter(ligerex) + ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) + ligerex <- quantile_norm(ligerex) + ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) + srt <- ligerToSeurat(ligerex) } } diff --git a/man/linkGenesAndPeaks.Rd b/man/linkGenesAndPeaks.Rd index 1850e263..16b16612 100644 --- a/man/linkGenesAndPeaks.Rd +++ b/man/linkGenesAndPeaks.Rd @@ -42,6 +42,7 @@ Evaluate the relationships between pairs of genes and peaks based on specified d } \examples{ \dontrun{ +# Only runable for ATAC datasets, see tutorial on GitHub # some gene counts matrix: gmat.small # some peak counts matrix: pmat.small regnet <- linkGenesAndPeaks(gmat.small, pmat.small, dist = "spearman", diff --git a/man/louvainCluster.Rd b/man/louvainCluster.Rd index ab533b34..ef9936a1 100644 --- a/man/louvainCluster.Rd +++ b/man/louvainCluster.Rd @@ -52,9 +52,11 @@ for community detection, which is widely used in single-cell analysis and excels small clusters into broad cell classes. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -ligerex <- louvainCluster(ligerex, resulotion = 0.3) -} - +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) +ligerex <- louvainCluster(ligerex, resolution = 0.3) } diff --git a/man/makeInteractTrack.Rd b/man/makeInteractTrack.Rd index f94b6ddd..552f8456 100644 --- a/man/makeInteractTrack.Rd +++ b/man/makeInteractTrack.Rd @@ -25,6 +25,7 @@ Export the predicted gene-pair interactions calculated by upstream function 'lin } \examples{ \dontrun{ +# Only runable for ATAC datasets, see tutorial on GitHub # some gene-peak correlation matrix: regent makeInteractTrack(regnet, path_to_coords = 'some_path_to_gene_coordinates/hg19_genes.bed') } diff --git a/man/makeRiverplot.Rd b/man/makeRiverplot.Rd index 556e8827..6eba96d9 100644 --- a/man/makeRiverplot.Rd +++ b/man/makeRiverplot.Rd @@ -66,6 +66,7 @@ can also be passed in. Uses the riverplot package to construct riverplot object } \examples{ \dontrun{ +# Riverplot currently archived, cannot run this example # ligerex (liger object), factorization complete input # toy clusters cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) diff --git a/man/nnzeroGroups.Rd b/man/nnzeroGroups.Rd deleted file mode 100644 index 1844d764..00000000 --- a/man/nnzeroGroups.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{nnzeroGroups} -\alias{nnzeroGroups} -\alias{nnzeroGroups.dgCMatrix} -\alias{nnzeroGroups.matrix} -\title{nnzeroGroups} -\usage{ -nnzeroGroups(X, y, MARGIN = 2) - -\method{nnzeroGroups}{dgCMatrix}(X, y, MARGIN = 2) - -\method{nnzeroGroups}{matrix}(X, y, MARGIN = 2) -} -\arguments{ -\item{X}{matrix} - -\item{y}{group labels} - -\item{MARGIN}{whether observations are rows (=2) or columns (=1)} -} -\value{ -Matrix of groups by features -} -\description{ -Utility function to compute number of zeros-per-feature within group -} diff --git a/man/nonneg.Rd b/man/nonneg.Rd deleted file mode 100644 index 9b82c0b2..00000000 --- a/man/nonneg.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rliger.R -\name{nonneg} -\alias{nonneg} -\title{Perform thresholding on dense matrix} -\usage{ -nonneg(x, eps = 1e-16) -} -\arguments{ -\item{x}{Dense matrix.} - -\item{eps}{Threshold. Should be a small positive value. (default 1e-16)} -} -\value{ -Dense matrix with smallest values equal to eps. -} -\description{ -Perform thresholding on the input dense matrix. Remove any values samller than eps by eps. -Helper function for online_iNMF -} diff --git a/man/normalize.Rd b/man/normalize.Rd index daa761c7..8cc22440 100644 --- a/man/normalize.Rd +++ b/man/normalize.Rd @@ -32,9 +32,6 @@ dataset) (default TRUE).} This function normalizes data to account for total gene expression across a cell. } \examples{ -# Demonstration using matrices with randomly generated numbers -Y <- matrix(runif(5000,0,2), 10,500) -Z <- matrix(runif(5000,0,2), 10,500) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) } diff --git a/man/online_iNMF.Rd b/man/online_iNMF.Rd index 73b5ef1b..cb974a0a 100644 --- a/man/online_iNMF.Rd +++ b/man/online_iNMF.Rd @@ -79,10 +79,13 @@ W is identical among all datasets, as it represents the shared components of the across datasets. The V matrices represent the dataset-specific components of the metagenes. } \examples{ -\dontrun{ -# Requires preprocessed liger object -# Get factorization using 20 factors and mini-batch of 5000 cells -# (default setting, can be adjusted for ideal results) -ligerex <- online_iNMF(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +if (length(ligerex@h5file.info) > 0) { + # This function only works for HDF5 based liger object + ligerex <- normalize(ligerex) + ligerex <- selectGenes(ligerex) + ligerex <- scaleNotCenter(ligerex) + # `miniBatch_size` has to be no larger than the number of cells in the smallest dataset + ligerex <- online_iNMF(ligerex, miniBatch_size = 100) } } diff --git a/man/optimizeALS.Rd b/man/optimizeALS.Rd index 89b826f8..d027050f 100644 --- a/man/optimizeALS.Rd +++ b/man/optimizeALS.Rd @@ -94,10 +94,10 @@ W is held consistent among all datasets, as it represents the shared components across datasets. The V matrices represent the dataset-specific components of the metagenes. } \examples{ -\dontrun{ -# Requires preprocessed liger object (only for objected not based on HDF5 files) -# Get factorization using 20 factors and mini-batch of 5000 cells -# (default setting, can be adjusted for ideal results) -ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 1) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Minimum specification for fast example pass +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) } diff --git a/man/optimizeNewData.Rd b/man/optimizeNewData.Rd index e8877d1b..c25aa412 100644 --- a/man/optimizeNewData.Rd +++ b/man/optimizeNewData.Rd @@ -45,16 +45,28 @@ Uses an efficient strategy for updating that takes advantage of the information factorization. Assumes that selected genes (var.genes) are represented in the new datasets. } \examples{ -\dontrun{ -# Given preprocessed liger object: ligerex (contains two datasets Y and Z) -# get factorization using three restarts and 20 factors -ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) -# acquire new data (Y_new, Z_new) from the same cell type, let's add it to existing datasets -new_data <- list(Y_set = Y_new, Z_set = Z_new) -ligerex2 <- optimizeNewData(ligerex, new.data = new_data, which.datasets = list('y_set', 'z_set')) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +\donttest{ +# Assume we are performing the factorization +# Specification for minimal example test time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +# Suppose we have new data, namingly Y_new and Z_new from the same cell type. +# Add it to existing datasets. +new_data <- list(Y_set = ctrl, Z_set = stim) +# 2 iters do not lead to converge, it's for minimal test time +ligerex2 <- optimizeNewData(ligerex, new.data = new_data, + which.datasets = list('ctrl', 'stim'), + max.iters = 1) # acquire new data from different cell type (X), we'll just add another dataset -# it's probably most similar to y_set -ligerex <- optimizeNewData(ligerex, new.data = list(x_set = X), which.datasets = list('y_set'), - add.to.existing = FALSE) +# it's probably most similar to ctrl +X <- ctrl +# 2 iters do not lead to converge, it's for minimal test time +ligerex3 <- optimizeNewData(ligerex, new.data = list(x_set = X), + which.datasets = list('ctrl'), + add.to.existing = FALSE, + max.iters = 1) } } diff --git a/man/optimizeNewK.Rd b/man/optimizeNewK.Rd index 3279c5e7..047727a3 100644 --- a/man/optimizeNewK.Rd +++ b/man/optimizeNewK.Rd @@ -40,8 +40,14 @@ existing factorization. It is most recommended for values of k smaller than curr where it is more likely to speed up the factorization. } \examples{ -\dontrun{ -# decide to run with k = 15 instead (keeping old lambda the same) -ligerex <- optimizeNewK(ligerex, k.new = 15) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +k <- 5 +# Minimum specification for fast example pass +ligerex <- optimizeALS(ligerex, k = k, max.iters = 1) +if (k != 5) { + ligerex <- optimizeNewK(ligerex, k.new = k, max.iters = 1) } } diff --git a/man/optimizeNewLambda.Rd b/man/optimizeNewLambda.Rd index dd6f1f87..c30b0198 100644 --- a/man/optimizeNewLambda.Rd +++ b/man/optimizeNewLambda.Rd @@ -36,8 +36,15 @@ factorization; uses previous k. Recommended mainly when re-optimizing for higher new lambda value is significantly different; otherwise may not return optimal results. } \examples{ -\dontrun{ +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +\donttest{ +# Assume we are performing the factorization +# Specification for minimal example run time, not converging. +ligerex <- optimizeALS(ligerex, k = 5, lambda = 5, max.iters = 1) # decide to run with lambda = 15 instead (keeping k the same) -ligerex <- optimizeNewLambda(ligerex, new.lambda = 15) +ligerex <- optimizeNewLambda(ligerex, new.lambda = 15, max.iters = 1) } } diff --git a/man/optimizeSubset.Rd b/man/optimizeSubset.Rd index 7cc719ec..58d170c4 100644 --- a/man/optimizeSubset.Rd +++ b/man/optimizeSubset.Rd @@ -42,9 +42,20 @@ factorization. Can use either cell names or cluster names to subset. For more ba functionality (without automatic optimization), see subsetLiger. } \examples{ -\dontrun{ -# now want to look at only subset of data -# Requires a vector of cell names from data 1 and a vector of cell names from data 2 -ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +\donttest{ +# Assume we are performing the factorization +# Specification for minimal example run time, not converging. +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +# Preparing subset with random sampling. +# Subset can also be obtained with prior knowledge from metadata. +cell_names_1 <- sample(rownames(ligerex@H[[1]]), 20) +cell_names_2 <- sample(rownames(ligerex@H[[2]]), 20) + +ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2), + max.iters = 1) } } diff --git a/man/plotByDatasetAndCluster.Rd b/man/plotByDatasetAndCluster.Rd index 0435b638..1e815350 100644 --- a/man/plotByDatasetAndCluster.Rd +++ b/man/plotByDatasetAndCluster.Rd @@ -68,13 +68,13 @@ single color for second plot. It is also possible to pass in another clustering names match those of cells). } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# get tsne.coords for normalized data -ligerex <- runTSNE(ligerex) -# plot to console -plotByDatasetAndCluster(ligerex) -# return list of plots -plots <- plotByDatasetAndCluster(ligerex, return.plots = TRUE) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +ligerex <- louvainCluster(ligerex) +plotByDatasetAndCluster(ligerex, pt.size = 1) } diff --git a/man/plotClusterFactors.Rd b/man/plotClusterFactors.Rd index 6d813bea..2d01a550 100644 --- a/man/plotClusterFactors.Rd +++ b/man/plotClusterFactors.Rd @@ -42,9 +42,12 @@ loadings for a factor, black low. Optionally can also include dendrograms and so factors and clusters. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete input -# plot expression for CD4 and return plots -loading.matrix <- plotClusterFactors(ligerex, return.data = TRUE) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +ligerex <- louvainCluster(ligerex) +plotClusterFactors(ligerex) } diff --git a/man/plotClusterProportions.Rd b/man/plotClusterProportions.Rd index 5fbe344e..1de49c65 100644 --- a/man/plotClusterProportions.Rd +++ b/man/plotClusterProportions.Rd @@ -19,10 +19,12 @@ print plot to console (return.plot = FALSE); ggplot object (return.plot = TRUE) Generates plot of clusters sized by the proportion of total cells } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete input +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) ligerex <- quantile_norm(ligerex) -# plot cluster proportions +ligerex <- louvainCluster(ligerex) plotClusterProportions(ligerex) } -} diff --git a/man/plotFactors.Rd b/man/plotFactors.Rd index 621c8858..4df0f3bb 100644 --- a/man/plotFactors.Rd +++ b/man/plotFactors.Rd @@ -36,14 +36,13 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) ligerex <- quantile_norm(ligerex) -# get tsne.coords for normalized data -ligerex <- runTSNE(ligerex) -# factor plots into pdf file -# pdf("plot_factors.pdf") plotFactors(ligerex) -# dev.off() -} +ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +plotFactors(ligerex, plot.tsne = TRUE) } diff --git a/man/plotFeature.Rd b/man/plotFeature.Rd index d9a1dbb0..9fcf5ed8 100644 --- a/man/plotFeature.Rd +++ b/man/plotFeature.Rd @@ -72,11 +72,12 @@ Feature can be categorical (factor) or continuous. Can also plot all datasets combined with by.dataset = FALSE. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# get tsne.coords for normalized data -ligerex <- runTSNE(ligerex) -# plot nUMI to console -plotFeature(ligerex, feature = 'nUMI') -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +plotFeature(ligerex, "nUMI", pt.size = 1) } diff --git a/man/plotGene.Rd b/man/plotGene.Rd index 2c03f1a9..3647138d 100644 --- a/man/plotGene.Rd +++ b/man/plotGene.Rd @@ -100,14 +100,12 @@ specified gene. Data can be scaled by dataset or selected feature column from ce all cells). Data plots can be split by feature. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete -ligerex -ligerex <- runTSNE(ligerex) -# plot expression for CD4 and return plots -gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) -# ligerex (liger object based on datasets in HDF5 format), factorization complete input -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +plotGene(ligerex, "CD74", pt.size = 1) } diff --git a/man/plotGeneLoadings.Rd b/man/plotGeneLoadings.Rd index c44795de..ecdf6db9 100644 --- a/man/plotGeneLoadings.Rd +++ b/man/plotGeneLoadings.Rd @@ -91,16 +91,12 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex) -# pdf("gene_loadings.pdf") -plotGeneLoadings(ligerex, num.genes = 20) -# dev.off() -# ligerex (liger object based on datasets in HDF5 format), factorization complete input -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -plotGeneLoadings(ligerex, num.genes = 20) -} - +ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +plotGeneLoadings(ligerex, "stim", "ctrl", do.spec.plot = FALSE) } diff --git a/man/plotGeneViolin.Rd b/man/plotGeneViolin.Rd index 66e59c28..1ca65a6b 100644 --- a/man/plotGeneViolin.Rd +++ b/man/plotGeneViolin.Rd @@ -33,12 +33,13 @@ List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots t Generates violin plots of expression of specified gene for each dataset. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete -# plot expression for CD4 and return plots -violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) -# ligerex (liger object based on datasets in HDF5 format), factorization complete input -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +ligerex <- louvainCluster(ligerex) +plotGeneViolin(ligerex, "CD74", by.dataset = FALSE) +plotGeneViolin(ligerex, "CD74") } diff --git a/man/plotGenes.Rd b/man/plotGenes.Rd index 22fec2d5..bb5aafd8 100644 --- a/man/plotGenes.Rd +++ b/man/plotGenes.Rd @@ -22,12 +22,12 @@ Uses plotGene to plot each gene (and dataset) on a separate page. It is recommen function into a PDF due to the large number of plots produced. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete input -ligerex <- runTSNE(ligerex) -# plot expression for CD4 and FCGR3A -# pdf("gene_plots.pdf") -plotGenes(ligerex, c("CD4", "FCGR3A")) -# dev.off() -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- quantile_norm(ligerex) +ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +plotGenes(ligerex, c("CD74", "NKG7"), pt.size = 1) } diff --git a/man/plotWordClouds.Rd b/man/plotWordClouds.Rd index f946523a..739062a7 100644 --- a/man/plotWordClouds.Rd +++ b/man/plotWordClouds.Rd @@ -60,15 +60,12 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) ligerex <- quantile_norm(ligerex) -ligerex <- runTSNE(ligerex) -# pdf('word_clouds.pdf') -plotWordClouds(ligerex, num.genes = 20) -# dev.off() -# ligerex (liger object based on datasets in HDF5 format), factorization complete input -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -plotWordClouds(ligerex, num.genes = 20) -} +ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +plotWordClouds(ligerex, do.spec.plot = FALSE) } diff --git a/man/quantile_norm.Rd b/man/quantile_norm.Rd index 5f86098f..ce060d53 100644 --- a/man/quantile_norm.Rd +++ b/man/quantile_norm.Rd @@ -86,13 +86,10 @@ stretching/compressing datasets' quantiles to better match those of the referenc aligned factor loadings are combined into a single matrix and returned as H.norm. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# do basic quantile alignment +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# higher resolution for more clusters (note that SNF is conserved) -ligerex <- quantile_norm(ligerex, resolution = 1.2) -# change knn_k for more fine-grained local clustering -ligerex <- quantile_norm(ligerex, knn_k = 15, resolution = 1.2) -} } diff --git a/man/rank_matrix.Rd b/man/rank_matrix.Rd deleted file mode 100644 index 77a78398..00000000 --- a/man/rank_matrix.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{rank_matrix} -\alias{rank_matrix} -\alias{rank_matrix.dgCMatrix} -\alias{rank_matrix.matrix} -\title{rank_matrix} -\usage{ -rank_matrix(X) - -\method{rank_matrix}{dgCMatrix}(X) - -\method{rank_matrix}{matrix}(X) -} -\arguments{ -\item{X}{feature by observation matrix.} -} -\value{ -List with 2 items -} -\description{ -Utility function to rank columns of matrix -} diff --git a/man/readSubset.Rd b/man/readSubset.Rd index d3d573ad..c0c7a4be 100644 --- a/man/readSubset.Rd +++ b/man/readSubset.Rd @@ -45,9 +45,9 @@ This function samples raw/normalized/scaled data from on-disk HDF5 files for plo This function assumes that the cell barcodes are unique across all datasets. } \examples{ -\dontrun{ -# Only for online liger object (based on HDF5 files) -# Example: sample a total amount of 5000 cells from norm.data for downstream analysis -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +if (length(ligerex@H) > 0) { + # Downsampling is calculated basing on factorization result + ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 100) } } diff --git a/man/removeMissingObs.Rd b/man/removeMissingObs.Rd index 1135dee1..3d4c3a0f 100644 --- a/man/removeMissingObs.Rd +++ b/man/removeMissingObs.Rd @@ -27,8 +27,10 @@ removeMissingObs( Removes cells/genes from chosen slot with no expression in any genes or cells respectively. } \examples{ -\dontrun{ -# liger object: ligerex -ligerex <- removeMissingObs(ligerex) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +if (any(rowSums(ctrl) == 0) || any(rowSums(stim) == 0)) { + # example datasets do not have missing data, thus put in a condition + # Though the function will return unchanged object if no missing found + ligerex <- removeMissingObs(ligerex) } } diff --git a/man/reorganizeLiger.Rd b/man/reorganizeLiger.Rd index 09174895..1a401137 100644 --- a/man/reorganizeLiger.Rd +++ b/man/reorganizeLiger.Rd @@ -32,10 +32,8 @@ Using the same data, rearrange functional datasets using another discrete featur This removes most computed data slots, though cell.data and current clustering can be retained. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory objects) organized by species -# with column designating sex in cell.data -# rearrange by sex -ligerex_new <- reorganizeLiger(ligerex, by.feature = "sex", new.label = "species") -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +# Create a random variable of two categories +ligerex@cell.data$foo <- factor(sample(c(1,2), 600, replace = TRUE)) +ligerexFoo <- reorganizeLiger(ligerex, "foo") } diff --git a/man/runGSEA.Rd b/man/runGSEA.Rd index a32e9ea9..438a7c9a 100644 --- a/man/runGSEA.Rd +++ b/man/runGSEA.Rd @@ -33,10 +33,11 @@ A list of matrices with GSEA analysis for each factor Identify the biological pathways (gene sets from Reactome) that each metagene (factor) might belongs to. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -wilcox.results <- runGSEA(ligerex) -wilcox.results <- runGSEA(ligerex, mat_v = c(1, 2)) -} - +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +result <- runGSEA(ligerex) } diff --git a/man/runTSNE.Rd b/man/runTSNE.Rd index b1590917..8f7150cb 100644 --- a/man/runTSNE.Rd +++ b/man/runTSNE.Rd @@ -54,13 +54,12 @@ FIt-SNE directory as the fitsne.path parameter, though this is only necessary fo to runTSNE. For more detailed FIt-SNE installation instructions, see the liger repo README. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# generate H.norm by quantile normalizig factor loadings +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -# get tsne.coords for normalized data ligerex <- runTSNE(ligerex) -# get tsne.coords for raw factor loadings -ligerex <- runTSNE(ligerex, use.raw = TRUE) -} } diff --git a/man/runUMAP.Rd b/man/runUMAP.Rd index 00087453..d2b41a21 100644 --- a/man/runUMAP.Rd +++ b/man/runUMAP.Rd @@ -53,13 +53,12 @@ Note that this method requires that the package uwot is installed. It does not d on reticulate or python umap-learn. } \examples{ -\dontrun{ -# ligerex (liger object), factorization complete -# generate H.norm by quantile normalizig factor loadings -ligerex <- quantileAlignSNF(ligerex) -# get tsne.coords for normalized data +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +# Specification for minimal example run time, not converging +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) ligerex <- runUMAP(ligerex) -# get tsne.coords for raw factor loadings -ligerex <- runUMAP(ligerex, use.raw = TRUE) -} } diff --git a/man/runWilcoxon.Rd b/man/runWilcoxon.Rd index 4dfbbdc2..6e03d2a1 100644 --- a/man/runWilcoxon.Rd +++ b/man/runWilcoxon.Rd @@ -4,7 +4,11 @@ \alias{runWilcoxon} \title{Perform Wilcoxon rank-sum test} \usage{ -runWilcoxon(object, data.use = "all", compare.method) +runWilcoxon( + object, + data.use = "all", + compare.method = c("clusters", "datasets") +) } \arguments{ \item{object}{\code{liger} object.} @@ -20,14 +24,19 @@ A 10-columns data.frame with test results. Perform Wilcoxon rank-sum tests on specified dataset using given method. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), factorization complete -wilcox.results <- runWilcoxon(ligerex, compare.method = "cluster") -wilcox.results <- runWilcoxon(ligerex, compare.method = "datastes", data.use = c(1, 2)) -# HDF5 input -# ligerex (liger object based on datasets in HDF5 format), factorization complete -# Need to sample cells before implementing Wilcoxon test -ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) -de_genes <- runWilcoxon(ligerex, compare.method = "clusters") +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) +ligerex <- louvainCluster(ligerex, resolution = 0.3) +wilcox.results <- runWilcoxon(ligerex, compare.method = "clusters") +wilcox.results <- runWilcoxon(ligerex, compare.method = "datasets", data.use = c(1, 2)) +if (length(ligerex@h5file.info) > 0) { + # For HDF5 based object + # Need to sample cells and read into memory before running Wilcoxon test + ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) + wilcox.results <- runWilcoxon(ligerex, compare.method = "clusters") } } diff --git a/man/scaleNotCenter.Rd b/man/scaleNotCenter.Rd index 6082c595..20d57ea0 100644 --- a/man/scaleNotCenter.Rd +++ b/man/scaleNotCenter.Rd @@ -26,12 +26,8 @@ positive (NMF only accepts positive values). It also removes cells which do not expression across the genes selected, by default. } \examples{ -\dontrun{ -# Given datasets Y and Z -ligerex <- createLiger(list(y_set = Y, z_set = Z)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) -# use default selectGenes settings (var.thresh = 0.1) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) } -} diff --git a/man/selectGenes.Rd b/man/selectGenes.Rd index 1ca1a0df..e29d5f59 100644 --- a/man/selectGenes.Rd +++ b/man/selectGenes.Rd @@ -73,13 +73,7 @@ It also provides a log plot of gene variance vs gene expression (with a line ind expression across genes and cells). Selected genes are plotted in green. } \examples{ -\dontrun{ -# Given datasets Y and Z -ligerex <- createLiger(list(y_set = Y, z_set = Z)) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) -# use default selectGenes settings (var.thresh = 0.1) ligerex <- selectGenes(ligerex) -# select a smaller subset of genes -ligerex <- selectGenes(ligerex, var.thresh = 0.3) -} } diff --git a/man/seuratToLiger.Rd b/man/seuratToLiger.Rd index 39a2e071..5e85eb9c 100644 --- a/man/seuratToLiger.Rd +++ b/man/seuratToLiger.Rd @@ -76,17 +76,10 @@ identities from the original Seurat objects. Seurat V2 and V3 supported (though should share the same major version). } \examples{ -\dontrun{ -# Seurat objects for two pbmc datasets -tenx <- readRDS('tenx.RDS') -seqwell <- readRDS('seqwell.RDS') -# create liger object, using project names -ligerex <- seuratToLiger(list(tenx, seqwell)) -# create liger object, passing in names explicitly, using hvg.info genes -ligerex2 <- seuratToLiger(list(tenx, seqwell), names = c('tenx', 'seqwell'), num.hvg.info = 2000) -# Seurat object for joint analysis -pbmc <- readRDS('pbmc.RDS') -# create liger object, using 'protocol' for dataset names -ligerex3 <- seuratToLiger(pbmc, combined.seurat = TRUE, meta.var = 'protocol', num.hvg.info = 2000) +if (requireNamespace("Seurat")) { + ctrl.srt <- Seurat::CreateSeuratObject(ctrl, project = "ctrl") + stim.srt <- Seurat::CreateSeuratObject(stim, project = "stim") + ligerex <- seuratToLiger(list(ctrl = ctrl.srt, stim = stim.srt), + use.seurat.genes = FALSE) # because no var.gene now } } diff --git a/man/show-methods.Rd b/man/show-methods.Rd index 55da3d41..ff8cdfac 100644 --- a/man/show-methods.Rd +++ b/man/show-methods.Rd @@ -14,3 +14,7 @@ \description{ show method for liger } +\examples{ +ligerex <- createLiger(list(ctrl = ctrl)) +show(ligerex) +} diff --git a/man/subsetLiger.Rd b/man/subsetLiger.Rd index 98c98480..cacf47cb 100644 --- a/man/subsetLiger.Rd +++ b/man/subsetLiger.Rd @@ -31,10 +31,6 @@ raw.data, norm.data, scale.data, cell.data, H, W, V, H.norm, tsne.coords, and cl Note that it does NOT reoptimize the factorization. See optimizeSubset for this functionality. } \examples{ -\dontrun{ -# ligerex (liger object based on in-memory datasets), with clusters 0:10 -# factorization, alignment, and t-SNE calculation have been performed -# subset by clusters -ligerex_subset <- subsetLiger(ligerex, clusters.use = c(1, 4, 5)) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +lig.small <- subsetLiger(ligerex, cells.use = c(colnames(ctrl)[1:100], colnames(stim)[1:100])) } diff --git a/man/suggestK.Rd b/man/suggestK.Rd index 09b2a1e9..ccbd0ef6 100644 --- a/man/suggestK.Rd +++ b/man/suggestK.Rd @@ -66,9 +66,9 @@ are not uniformly distributed when an appropriate number of factors is reached. Depending on number of cores used, this process can take 10-20 minutes. } \examples{ -\dontrun{ -# Requires preprocessed liger object -# examine plot for most appropriate k, use multiple cores for faster results -suggestK(ligerex, num.cores = 4) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +suggestK(ligerex, k.test = c(5,6), max.iters = 1) } diff --git a/man/suggestLambda.Rd b/man/suggestLambda.Rd index f055850e..a9769caa 100644 --- a/man/suggestLambda.Rd +++ b/man/suggestLambda.Rd @@ -73,9 +73,9 @@ likely also correspond to slower decrease in agreement. Depending on number of c this process can take 10-20 minutes. } \examples{ -\dontrun{ -# Requires preprocessed liger object -# examine plot for most appropriate lambda, use multiple cores for faster results -suggestLambda(ligerex, k = 20, num.cores = 4) -} +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +suggestLambda(ligerex, k = 20, lambda.test = c(5, 10), max.iters = 1) } diff --git a/man/sumGroups.Rd b/man/sumGroups.Rd deleted file mode 100644 index 5572a97b..00000000 --- a/man/sumGroups.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{sumGroups} -\alias{sumGroups} -\alias{sumGroups.dgCMatrix} -\alias{sumGroups.matrix} -\title{sumGroups} -\usage{ -sumGroups(X, y, MARGIN = 2) - -\method{sumGroups}{dgCMatrix}(X, y, MARGIN = 2) - -\method{sumGroups}{matrix}(X, y, MARGIN = 2) -} -\arguments{ -\item{X}{matrix} - -\item{y}{group labels} - -\item{MARGIN}{whether observations are rows (=2) or columns (=1)} -} -\value{ -Matrix of groups by features -} -\description{ -Utility function to sum over group labels -} From c5292ec8fde9366dcf41754071c1ec3316a05a48 Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Tue, 7 Nov 2023 17:02:54 -0500 Subject: [PATCH 7/8] fix attempt --- R/rliger.R | 39 +++++++++++++++++++++++++++++---------- README.md | 4 ++-- man/ligerToSeurat.Rd | 23 +++++++++++++---------- man/plotFactors.Rd | 2 ++ man/plotGeneLoadings.Rd | 2 ++ man/plotGenes.Rd | 2 ++ man/plotWordClouds.Rd | 2 ++ man/runGSEA.Rd | 2 ++ man/runUMAP.Rd | 2 ++ man/suggestK.Rd | 2 ++ man/suggestLambda.Rd | 2 ++ 11 files changed, 60 insertions(+), 22 deletions(-) diff --git a/R/rliger.R b/R/rliger.R index f09e69e9..04096c1e 100755 --- a/R/rliger.R +++ b/R/rliger.R @@ -2795,11 +2795,13 @@ optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100 #' @importFrom ggplot2 ggplot aes geom_point geom_line guides guide_legend labs theme theme_classic #' @export #' @examples +#' \donttest{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) #' suggestLambda(ligerex, k = 20, lambda.test = c(5, 10), max.iters = 1) +#' } suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, thresh = 1e-4, max.iters = 100, knn_k = 20, k2 = 500, ref_dataset = NULL, resolution = 1, gen.new = FALSE, nrep = 1, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { @@ -2932,11 +2934,13 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core #' #' @export #' @examples +#' \donttest{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) #' suggestK(ligerex, k.test = c(5,6), max.iters = 1) +#' } suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, max.iters = 100, num.cores = 1, rand.seed = 1, gen.new = FALSE, nrep = 1, plot.log2 = TRUE, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { @@ -3858,6 +3862,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) #' #' @export #' @examples +#' \donttest{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) @@ -3865,6 +3870,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) #' # Specification for minimal example run time, not converging #' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' result <- runGSEA(ligerex) +#' } runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gene_sets = c()) { if (!requireNamespace("org.Hs.eg.db", quietly = TRUE)) { stop("Package \"org.Hs.eg.db\" needed for this function to work. Please install it by command:\n", @@ -4052,6 +4058,7 @@ runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), u #' #' @export #' @examples +#' \donttest{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) @@ -4060,6 +4067,7 @@ runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), u #' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) #' ligerex <- runUMAP(ligerex) +#' } runUMAP <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k = 2, distance = "euclidean", n_neighbors = 10, min_dist = 0.1, rand.seed = 42) { set.seed(rand.seed) @@ -4848,6 +4856,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit #' #' @export #' @examples +#' \donttest{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) @@ -4857,6 +4866,7 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit #' plotFactors(ligerex) #' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) #' plotFactors(ligerex, plot.tsne = TRUE) +#' } plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsne = FALSE, verbose = TRUE) { k <- ncol(object@H.norm) if (verbose) { @@ -4942,6 +4952,7 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn #' #' @export #' @examples +#' \donttest{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) @@ -4950,6 +4961,7 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn #' ligerex <- quantile_norm(ligerex) #' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) #' plotWordClouds(ligerex, do.spec.plot = FALSE) +#' } plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = 30, min.size = 1, max.size = 4, factor.share.thresh = 10, log.fc.thresh = 1, pval.thresh = 0.05, do.spec.plot = TRUE, return.plots = FALSE, verbose = TRUE) { @@ -5096,6 +5108,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' #' @export #' @examples +#' \donttest{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) @@ -5104,6 +5117,7 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' ligerex <- quantile_norm(ligerex) #' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) #' plotGeneLoadings(ligerex, "stim", "ctrl", do.spec.plot = FALSE) +#' } plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes.show = 12, num.genes = 30, mark.top.genes = TRUE, factor.share.thresh = 10, log.fc.thresh = 1, umi.thresh = 30, frac.thresh = 0, @@ -5711,6 +5725,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by #' #' @export #' @examples +#' \donttest{ #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) @@ -5719,6 +5734,7 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by #' ligerex <- quantile_norm(ligerex) #' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) #' plotGenes(ligerex, c("CD74", "NKG7"), pt.size = 1) +#' } plotGenes <- function(object, genes, ...) { for (i in 1:length(genes)) { print(genes[i]) @@ -6241,17 +6257,20 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh #' #' @export #' @examples -#' if (requireNamespace("Seurat")) { -#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) -#' ligerex <- normalize(ligerex) -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) -#' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) -#' srt <- ligerToSeurat(ligerex) +#' \donttest{ +#' library(Seurat) +#' print(packageVersion("Seurat")) +#' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +#' ligerex <- normalize(ligerex) +#' ligerex <- selectGenes(ligerex) +#' ligerex <- scaleNotCenter(ligerex) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +#' ligerex <- quantile_norm(ligerex) +#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' print(packageVersion("Seurat")) +#' srt <- ligerToSeurat(ligerex, renormalize = FALSE) #' } -ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use.liger.genes = TRUE, +ligerToSeurat <- function(object, nms = names(object@raw.data), renormalize = TRUE, use.liger.genes = TRUE, by.dataset = FALSE) { if (!requireNamespace("Seurat", quietly = TRUE)) { stop("Package \"Seurat\" needed for this function to work. Please install it.", diff --git a/README.md b/README.md index c050eec8..306c9be0 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ LIGER (installed as `rliger` ) is a package for integrating and analyzing multiple single-cell datasets, developed by the Macosko lab and maintained/extended by the Welch lab. It relies on integrative non-negative matrix factorization to identify shared and dataset-specific factors. -Check out our [Cell paper](https://doi.org/10.1016/j.cell.2019.05.006) for a more complete description of the methods and analyses. To access data used in our SN and BNST analyses, visit our [study](https://portals.broadinstitute.org/single_cell/study/SCP466) on the -Single Cell Portal. +Check out our [Cell paper](https://doi.org/10.1016/j.cell.2019.05.006) for a more complete description of the methods and analyses. To access data used in our SN and BNST analyses, visit our study "SCP466" on the +[Single Cell Portal](https://singlecell.broadinstitute.org/single_cell). LIGER can be used to compare and contrast experimental datasets in a variety of contexts, for instance: diff --git a/man/ligerToSeurat.Rd b/man/ligerToSeurat.Rd index f46a4f2f..2dce12fe 100644 --- a/man/ligerToSeurat.Rd +++ b/man/ligerToSeurat.Rd @@ -6,7 +6,7 @@ \usage{ ligerToSeurat( object, - nms = names(object@H), + nms = names(object@raw.data), renormalize = TRUE, use.liger.genes = TRUE, by.dataset = FALSE @@ -37,14 +37,17 @@ Stores original dataset identity by default in new object metadata if dataset na in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". } \examples{ -if (requireNamespace("Seurat")) { - ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) - ligerex <- normalize(ligerex) - ligerex <- selectGenes(ligerex) - ligerex <- scaleNotCenter(ligerex) - ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) - ligerex <- quantile_norm(ligerex) - ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) - srt <- ligerToSeurat(ligerex) +\donttest{ +library(Seurat) +print(packageVersion("Seurat")) +ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) +ligerex <- normalize(ligerex) +ligerex <- selectGenes(ligerex) +ligerex <- scaleNotCenter(ligerex) +ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) +ligerex <- quantile_norm(ligerex) +ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +print(packageVersion("Seurat")) +srt <- ligerToSeurat(ligerex, renormalize = FALSE) } } diff --git a/man/plotFactors.Rd b/man/plotFactors.Rd index 4df0f3bb..6bdf8f8c 100644 --- a/man/plotFactors.Rd +++ b/man/plotFactors.Rd @@ -36,6 +36,7 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ +\donttest{ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) @@ -46,3 +47,4 @@ plotFactors(ligerex) ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) plotFactors(ligerex, plot.tsne = TRUE) } +} diff --git a/man/plotGeneLoadings.Rd b/man/plotGeneLoadings.Rd index ecdf6db9..b3ba706f 100644 --- a/man/plotGeneLoadings.Rd +++ b/man/plotGeneLoadings.Rd @@ -91,6 +91,7 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ +\donttest{ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) @@ -100,3 +101,4 @@ ligerex <- quantile_norm(ligerex) ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) plotGeneLoadings(ligerex, "stim", "ctrl", do.spec.plot = FALSE) } +} diff --git a/man/plotGenes.Rd b/man/plotGenes.Rd index bb5aafd8..f831ca55 100644 --- a/man/plotGenes.Rd +++ b/man/plotGenes.Rd @@ -22,6 +22,7 @@ Uses plotGene to plot each gene (and dataset) on a separate page. It is recommen function into a PDF due to the large number of plots produced. } \examples{ +\donttest{ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) @@ -31,3 +32,4 @@ ligerex <- quantile_norm(ligerex) ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) plotGenes(ligerex, c("CD74", "NKG7"), pt.size = 1) } +} diff --git a/man/plotWordClouds.Rd b/man/plotWordClouds.Rd index 739062a7..af0a9112 100644 --- a/man/plotWordClouds.Rd +++ b/man/plotWordClouds.Rd @@ -60,6 +60,7 @@ It is recommended to call this function into a PDF due to the large number of plots produced. } \examples{ +\donttest{ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) @@ -69,3 +70,4 @@ ligerex <- quantile_norm(ligerex) ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) plotWordClouds(ligerex, do.spec.plot = FALSE) } +} diff --git a/man/runGSEA.Rd b/man/runGSEA.Rd index 438a7c9a..b2c5f16c 100644 --- a/man/runGSEA.Rd +++ b/man/runGSEA.Rd @@ -33,6 +33,7 @@ A list of matrices with GSEA analysis for each factor Identify the biological pathways (gene sets from Reactome) that each metagene (factor) might belongs to. } \examples{ +\donttest{ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) @@ -41,3 +42,4 @@ ligerex <- scaleNotCenter(ligerex) ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) result <- runGSEA(ligerex) } +} diff --git a/man/runUMAP.Rd b/man/runUMAP.Rd index d2b41a21..c90a424d 100644 --- a/man/runUMAP.Rd +++ b/man/runUMAP.Rd @@ -53,6 +53,7 @@ Note that this method requires that the package uwot is installed. It does not d on reticulate or python umap-learn. } \examples{ +\donttest{ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) @@ -62,3 +63,4 @@ ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) ligerex <- runUMAP(ligerex) } +} diff --git a/man/suggestK.Rd b/man/suggestK.Rd index ccbd0ef6..8398f988 100644 --- a/man/suggestK.Rd +++ b/man/suggestK.Rd @@ -66,9 +66,11 @@ are not uniformly distributed when an appropriate number of factors is reached. Depending on number of cores used, this process can take 10-20 minutes. } \examples{ +\donttest{ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) suggestK(ligerex, k.test = c(5,6), max.iters = 1) } +} diff --git a/man/suggestLambda.Rd b/man/suggestLambda.Rd index a9769caa..8620a779 100644 --- a/man/suggestLambda.Rd +++ b/man/suggestLambda.Rd @@ -73,9 +73,11 @@ likely also correspond to slower decrease in agreement. Depending on number of c this process can take 10-20 minutes. } \examples{ +\donttest{ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) suggestLambda(ligerex, k = 20, lambda.test = c(5, 10), max.iters = 1) } +} From b77fd015666bb9735066360dc6849d3670ac2b30 Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Wed, 8 Nov 2023 12:19:43 -0500 Subject: [PATCH 8/8] v1.0.1, fixes for R-devel with Matrix 1.6-2 --- DESCRIPTION | 3 +- NEWS.md | 1 + R/RcppExports.R | 226 ++++++++++++++++----------------- R/rliger.R | 177 ++++++++++---------------- man/ligerToSeurat.Rd | 26 ++-- man/plotByDatasetAndCluster.Rd | 4 +- man/plotFactors.Rd | 4 +- man/plotFeature.Rd | 4 +- man/plotGene.Rd | 4 +- man/plotGeneLoadings.Rd | 4 +- man/plotGenes.Rd | 4 +- man/plotWordClouds.Rd | 4 +- man/runUMAP.Rd | 4 +- man/seuratToLiger.Rd | 4 +- 14 files changed, 215 insertions(+), 254 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 09c3de78..7dc831eb 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rliger Version: 1.0.1 -Date: 2023-11-02 +Date: 2023-11-08 Type: Package Title: Linked Inference of Genomic Experimental Relationships Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) , and Liu J, Gao C, Sodicoff J, et al (2020) for more details. @@ -65,6 +65,7 @@ RoxygenNote: 7.2.3 Encoding: UTF-8 Suggests: Seurat, + SeuratObject, knitr, reticulate, rmarkdown, diff --git a/NEWS.md b/NEWS.md index 1de58af0..4a9d67ee 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,5 +4,6 @@ - Fix efficiency issue when taking the log of norm.data (e.g. `runWilcoxon`) - Add runable examples to all exported functions when possible - Fix typo in online_iNMF matrix initialization +- Adapt to Seurat5 - Other minor fixes diff --git a/R/RcppExports.R b/R/RcppExports.R index b08396de..a3d24b85 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,113 +1,113 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -RunModularityClusteringCpp <- function(SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) { - .Call('_rliger_RunModularityClusteringCpp', PACKAGE = 'rliger', SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) -} - -scaleNotCenterFast <- function(x) { - .Call('_rliger_scaleNotCenterFast', PACKAGE = 'rliger', x) -} - -rowMeansFast <- function(x) { - .Call('_rliger_rowMeansFast', PACKAGE = 'rliger', x) -} - -rowVarsFast <- function(x, means) { - .Call('_rliger_rowVarsFast', PACKAGE = 'rliger', x, means) -} - -sumSquaredDeviations <- function(x, means) { - .Call('_rliger_sumSquaredDeviations', PACKAGE = 'rliger', x, means) -} - -cpp_sumGroups_dgc <- function(x, p, i, ncol, groups, ngroups) { - .Call('_rliger_cpp_sumGroups_dgc', PACKAGE = 'rliger', x, p, i, ncol, groups, ngroups) -} - -cpp_sumGroups_dgc_T <- function(x, p, i, ncol, nrow, groups, ngroups) { - .Call('_rliger_cpp_sumGroups_dgc_T', PACKAGE = 'rliger', x, p, i, ncol, nrow, groups, ngroups) -} - -cpp_sumGroups_dense <- function(X, groups, ngroups) { - .Call('_rliger_cpp_sumGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) -} - -cpp_sumGroups_dense_T <- function(X, groups, ngroups) { - .Call('_rliger_cpp_sumGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) -} - -cpp_nnzeroGroups_dense <- function(X, groups, ngroups) { - .Call('_rliger_cpp_nnzeroGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) -} - -cpp_nnzeroGroups_dense_T <- function(X, groups, ngroups) { - .Call('_rliger_cpp_nnzeroGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) -} - -cpp_nnzeroGroups_dgc <- function(p, i, ncol, groups, ngroups) { - .Call('_rliger_cpp_nnzeroGroups_dgc', PACKAGE = 'rliger', p, i, ncol, groups, ngroups) -} - -cpp_in_place_rank_mean <- function(v_temp, idx_begin, idx_end) { - .Call('_rliger_cpp_in_place_rank_mean', PACKAGE = 'rliger', v_temp, idx_begin, idx_end) -} - -cpp_rank_matrix_dgc <- function(x, p, nrow, ncol) { - .Call('_rliger_cpp_rank_matrix_dgc', PACKAGE = 'rliger', x, p, nrow, ncol) -} - -cpp_rank_matrix_dense <- function(X) { - .Call('_rliger_cpp_rank_matrix_dense', PACKAGE = 'rliger', X) -} - -cpp_nnzeroGroups_dgc_T <- function(p, i, ncol, nrow, groups, ngroups) { - .Call('_rliger_cpp_nnzeroGroups_dgc_T', PACKAGE = 'rliger', p, i, ncol, nrow, groups, ngroups) -} - -#' Fast calculation of feature count matrix -#' -#' @param bedmat A feature count list generated by bedmap -#' @param barcodes A list of barcodes -#' -#' @return A feature count matrix with features as rows and barcodes as -#' columns -#' @export -#' @examples -#' \dontrun{ -#' gene.counts <- makeFeatureMatrix(genes.bc, barcodes) -#' promoter.counts <- makeFeatureMatrix(promoters.bc, barcodes) -#' samnple <- gene.counts + promoter.counts -#' } -makeFeatureMatrix <- function(bedmat, barcodes) { - .Call('_rliger_makeFeatureMatrix', PACKAGE = 'rliger', bedmat, barcodes) -} - -cluster_vote <- function(nn_ranked, clusts) { - .Call('_rliger_cluster_vote', PACKAGE = 'rliger', nn_ranked, clusts) -} - -scale_columns_fast <- function(mat, scale = TRUE, center = TRUE) { - .Call('_rliger_scale_columns_fast', PACKAGE = 'rliger', mat, scale, center) -} - -max_factor <- function(H, dims_use, center_cols = FALSE) { - .Call('_rliger_max_factor', PACKAGE = 'rliger', H, dims_use, center_cols) -} - -solveNNLS <- function(C, B) { - .Call('_rliger_solveNNLS', PACKAGE = 'rliger', C, B) -} - -ComputeSNN <- function(nn_ranked, prune) { - .Call('_rliger_ComputeSNN', PACKAGE = 'rliger', nn_ranked, prune) -} - -WriteEdgeFile <- function(snn, filename, display_progress) { - invisible(.Call('_rliger_WriteEdgeFile', PACKAGE = 'rliger', snn, filename, display_progress)) -} - -DirectSNNToFile <- function(nn_ranked, prune, display_progress, filename) { - .Call('_rliger_DirectSNNToFile', PACKAGE = 'rliger', nn_ranked, prune, display_progress, filename) -} - +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +RunModularityClusteringCpp <- function(SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) { + .Call('_rliger_RunModularityClusteringCpp', PACKAGE = 'rliger', SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) +} + +scaleNotCenterFast <- function(x) { + .Call('_rliger_scaleNotCenterFast', PACKAGE = 'rliger', x) +} + +rowMeansFast <- function(x) { + .Call('_rliger_rowMeansFast', PACKAGE = 'rliger', x) +} + +rowVarsFast <- function(x, means) { + .Call('_rliger_rowVarsFast', PACKAGE = 'rliger', x, means) +} + +sumSquaredDeviations <- function(x, means) { + .Call('_rliger_sumSquaredDeviations', PACKAGE = 'rliger', x, means) +} + +cpp_sumGroups_dgc <- function(x, p, i, ncol, groups, ngroups) { + .Call('_rliger_cpp_sumGroups_dgc', PACKAGE = 'rliger', x, p, i, ncol, groups, ngroups) +} + +cpp_sumGroups_dgc_T <- function(x, p, i, ncol, nrow, groups, ngroups) { + .Call('_rliger_cpp_sumGroups_dgc_T', PACKAGE = 'rliger', x, p, i, ncol, nrow, groups, ngroups) +} + +cpp_sumGroups_dense <- function(X, groups, ngroups) { + .Call('_rliger_cpp_sumGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) +} + +cpp_sumGroups_dense_T <- function(X, groups, ngroups) { + .Call('_rliger_cpp_sumGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) +} + +cpp_nnzeroGroups_dense <- function(X, groups, ngroups) { + .Call('_rliger_cpp_nnzeroGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) +} + +cpp_nnzeroGroups_dense_T <- function(X, groups, ngroups) { + .Call('_rliger_cpp_nnzeroGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) +} + +cpp_nnzeroGroups_dgc <- function(p, i, ncol, groups, ngroups) { + .Call('_rliger_cpp_nnzeroGroups_dgc', PACKAGE = 'rliger', p, i, ncol, groups, ngroups) +} + +cpp_in_place_rank_mean <- function(v_temp, idx_begin, idx_end) { + .Call('_rliger_cpp_in_place_rank_mean', PACKAGE = 'rliger', v_temp, idx_begin, idx_end) +} + +cpp_rank_matrix_dgc <- function(x, p, nrow, ncol) { + .Call('_rliger_cpp_rank_matrix_dgc', PACKAGE = 'rliger', x, p, nrow, ncol) +} + +cpp_rank_matrix_dense <- function(X) { + .Call('_rliger_cpp_rank_matrix_dense', PACKAGE = 'rliger', X) +} + +cpp_nnzeroGroups_dgc_T <- function(p, i, ncol, nrow, groups, ngroups) { + .Call('_rliger_cpp_nnzeroGroups_dgc_T', PACKAGE = 'rliger', p, i, ncol, nrow, groups, ngroups) +} + +#' Fast calculation of feature count matrix +#' +#' @param bedmat A feature count list generated by bedmap +#' @param barcodes A list of barcodes +#' +#' @return A feature count matrix with features as rows and barcodes as +#' columns +#' @export +#' @examples +#' \dontrun{ +#' gene.counts <- makeFeatureMatrix(genes.bc, barcodes) +#' promoter.counts <- makeFeatureMatrix(promoters.bc, barcodes) +#' samnple <- gene.counts + promoter.counts +#' } +makeFeatureMatrix <- function(bedmat, barcodes) { + .Call('_rliger_makeFeatureMatrix', PACKAGE = 'rliger', bedmat, barcodes) +} + +cluster_vote <- function(nn_ranked, clusts) { + .Call('_rliger_cluster_vote', PACKAGE = 'rliger', nn_ranked, clusts) +} + +scale_columns_fast <- function(mat, scale = TRUE, center = TRUE) { + .Call('_rliger_scale_columns_fast', PACKAGE = 'rliger', mat, scale, center) +} + +max_factor <- function(H, dims_use, center_cols = FALSE) { + .Call('_rliger_max_factor', PACKAGE = 'rliger', H, dims_use, center_cols) +} + +solveNNLS <- function(C, B) { + .Call('_rliger_solveNNLS', PACKAGE = 'rliger', C, B) +} + +ComputeSNN <- function(nn_ranked, prune) { + .Call('_rliger_ComputeSNN', PACKAGE = 'rliger', nn_ranked, prune) +} + +WriteEdgeFile <- function(snn, filename, display_progress) { + invisible(.Call('_rliger_WriteEdgeFile', PACKAGE = 'rliger', snn, filename, display_progress)) +} + +DirectSNNToFile <- function(nn_ranked, prune, display_progress, filename) { + .Call('_rliger_DirectSNNToFile', PACKAGE = 'rliger', nn_ranked, prune, display_progress, filename) +} + diff --git a/R/rliger.R b/R/rliger.R index 04096c1e..22eb0996 100755 --- a/R/rliger.R +++ b/R/rliger.R @@ -1,6 +1,8 @@ #' @import Matrix #' @importFrom grDevices dev.off pdf #' @import hdf5r +#' @importFrom methods new +#' @importFrom utils packageVersion #' @importFrom Rcpp evalCpp NULL @@ -4066,7 +4068,9 @@ runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), u #' # Specification for minimal example run time, not converging #' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex) +#' if (packageVersion("Matrix") <= package_version("1.6.1.1")) { +#' ligerex <- runUMAP(ligerex) +#' } #' } runUMAP <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k = 2, distance = "euclidean", n_neighbors = 10, min_dist = 0.1, rand.seed = 42) { @@ -4595,9 +4599,9 @@ getProportionMito <- function(object, use.norm = FALSE, mito.pattern = "^mt-") { #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' ligerex <- runTSNE(ligerex) #' ligerex <- louvainCluster(ligerex) #' plotByDatasetAndCluster(ligerex, pt.size = 1) plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.size = 0.3, @@ -4737,9 +4741,9 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' ligerex <- runTSNE(ligerex) #' plotFeature(ligerex, "nUMI", pt.size = 1) plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, title = NULL, pt.size = 0.3, text.size = 3, do.shuffle = TRUE, rand.seed = 1, do.labels = FALSE, @@ -4861,10 +4865,10 @@ plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, tit #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) #' plotFactors(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' ligerex <- runTSNE(ligerex) #' plotFactors(ligerex, plot.tsne = TRUE) #' } plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsne = FALSE, verbose = TRUE) { @@ -4957,9 +4961,9 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' ligerex <- runTSNE(ligerex) #' plotWordClouds(ligerex, do.spec.plot = FALSE) #' } plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = 30, min.size = 1, @@ -5113,9 +5117,9 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' ligerex <- runTSNE(ligerex) #' plotGeneLoadings(ligerex, "stim", "ctrl", do.spec.plot = FALSE) #' } plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes.show = 12, @@ -5473,9 +5477,9 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' ligerex <- runTSNE(ligerex) #' plotGene(ligerex, "CD74", pt.size = 1) plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by = 'dataset', log2scale = NULL, methylation.indices = NULL, plot.by = 'dataset', @@ -5730,9 +5734,9 @@ plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by #' ligerex <- normalize(ligerex) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +#' ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) #' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +#' ligerex <- runTSNE(ligerex) #' plotGenes(ligerex, c("CD74", "NKG7"), pt.size = 1) #' } plotGenes <- function(object, genes, ...) { @@ -6245,116 +6249,77 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh #' @param nms By default, labels cell names with dataset of origin (this is to account for cells in #' different datasets which may have same name). Other names can be passed here as vector, must #' have same length as the number of datasets. (default names(H)) -#' @param renormalize Whether to log-normalize raw data using Seurat defaults (default TRUE). +#' @param renormalize Whether to log-normalize raw data using Seurat defaults (default FALSE). #' @param use.liger.genes Whether to carry over variable genes (default TRUE). #' @param by.dataset Include dataset of origin in cluster identity in Seurat object (default FALSE). -#' +#' @param assay Assay name to set in the Seurat object (default "RNA"). #' @return Seurat object with raw.data, scale.data, dr$tsne, dr$inmf, and ident slots set. -#' -#' @import Matrix -#' @importFrom methods new -#' @importFrom utils packageVersion -#' #' @export #' @examples -#' \donttest{ -#' library(Seurat) -#' print(packageVersion("Seurat")) #' ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) -#' ligerex <- normalize(ligerex) -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) -#' ligerex <- quantile_norm(ligerex) -#' ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) -#' print(packageVersion("Seurat")) -#' srt <- ligerToSeurat(ligerex, renormalize = FALSE) +#' if (packageVersion("Matrix") <= package_version("1.6.1.1")) { +#' # 1.6.2 is not compatible thus don't test +#' # but can use `setOldClass("mMatrix")` as a hack +#' srt <- ligerToSeurat(ligerex) #' } -ligerToSeurat <- function(object, nms = names(object@raw.data), renormalize = TRUE, use.liger.genes = TRUE, - by.dataset = FALSE) { +ligerToSeurat <- function(object, nms = NULL, renormalize = FALSE, use.liger.genes = TRUE, + by.dataset = FALSE, assay = "RNA") { if (!requireNamespace("Seurat", quietly = TRUE)) { stop("Package \"Seurat\" needed for this function to work. Please install it.", - call. = FALSE - ) + call. = FALSE) } - # get Seurat version - maj_version <- packageVersion('Seurat')$major - if (class(object@raw.data[[1]])[1] != 'dgCMatrix') { - # mat <- as(x, 'CsparseMatrix') - object@raw.data <- lapply(object@raw.data, function(x) { - as(x, 'CsparseMatrix') - }) + if (!inherits(object@raw.data[[1]], 'dgCMatrix')) { + object@raw.data <- lapply(object@raw.data, as, Class = "CsparseMatrix") } raw.data <- MergeSparseDataAll(object@raw.data, nms) - scale.data <- do.call(rbind, object@scale.data) - rownames(scale.data) <- colnames(raw.data) - if (maj_version < 3) { - var.genes <- object@var.genes - inmf.obj <- new( - Class = "dim.reduction", gene.loadings = t(object@W), - cell.embeddings = object@H.norm, key = "iNMF_" - ) - rownames(inmf.obj@gene.loadings) <- var.genes - tsne.obj <- new( - Class = "dim.reduction", cell.embeddings = object@tsne.coords, - key = "tSNE_" - ) + new.seurat <- Seurat::CreateSeuratObject(raw.data, assay = assay) + if (isTRUE(renormalize)) { + new.seurat <- Seurat::NormalizeData(new.seurat) } else { - var.genes <- object@var.genes - if (any(grepl('_', var.genes))) { - message("Warning: Seurat v3 genes cannot have underscores, replacing with dashes ('-')") - var.genes <- gsub("_", replacement = "-", var.genes) + if (length(object@norm.data) > 0) { + norm.data <- MergeSparseDataAll(object@norm.data, nms) + new.seurat <- SeuratObject::SetAssayData(new.seurat, layer = "data", slot = "data", new.data = norm.data) } + } + if (length(object@var.genes) > 0 && use.liger.genes) { + Seurat::VariableFeatures(new.seurat) <- object@var.genes + } + if (length(object@scale.data) > 0) { + scale.data <- t(Reduce(rbind, object@scale.data)) + colnames(scale.data) <- colnames(raw.data) + new.seurat <- SeuratObject::SetAssayData(object = new.seurat, layer = "scale.data", slot = "scale.data", new.data = scale.data) + } + if (all(dim(object@W) > 0) && all(dim(object@H.norm) > 0)) { inmf.loadings <- t(x = object@W) + dimnames(inmf.loadings) <- list(object@var.genes, + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) inmf.embeddings <- object@H.norm - tsne.embeddings <- object@tsne.coords - rownames(x = inmf.loadings) <- var.genes - rownames(x = inmf.embeddings) <- - rownames(x = tsne.embeddings) <- - rownames(x = scale.data) + dimnames(inmf.embeddings) <- list(unlist(lapply(object@scale.data, rownames), use.names = FALSE), + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) inmf.obj <- Seurat::CreateDimReducObject( embeddings = inmf.embeddings, - loadings = inmf.loadings, - key = "iNMF_", - global = TRUE + loadings = inmf.embeddings, + assay = assay, + key = "iNMF_" ) + new.seurat[["iNMF"]] <- inmf.obj + } + if (all(dim(object@tsne.coords) > 0)) { + tsne.embeddings <- object@tsne.coords + dimnames(tsne.embeddings) <- list(rownames(object@H.norm), + c("TSNE_1", "TSNE_2")) tsne.obj <- Seurat::CreateDimReducObject( embeddings = tsne.embeddings, - key = "tSNE_", - global = TRUE + assay = assay, + key = "TSNE_" ) + new.seurat[["TSNE"]] <- tsne.obj } - new.seurat <- Seurat::CreateSeuratObject(raw.data) - if (renormalize) { - new.seurat <- Seurat::NormalizeData(new.seurat) - } - if (by.dataset) { - ident.use <- as.character(unlist(lapply(1:length(object@raw.data), function(i) { - dataset.name <- names(object@raw.data)[i] - paste0(dataset.name, as.character(object@clusters[colnames(object@raw.data[[i]])])) - }))) - } else { - ident.use <- as.character(object@clusters) - } - - if (maj_version < 3) { - if (use.liger.genes) { - new.seurat@var.genes <- var.genes - } - new.seurat@scale.data <- t(scale.data) - new.seurat@dr$tsne <- tsne.obj - new.seurat@dr$inmf <- inmf.obj - new.seurat <- Seurat::SetIdent(new.seurat, ident.use = ident.use) - - } else { - if (use.liger.genes) { - Seurat::VariableFeatures(new.seurat) <- var.genes - } - Seurat::SetAssayData(new.seurat, slot = "scale.data", new.data = t(scale.data), assay = "RNA") - new.seurat[['tsne']] <- tsne.obj - new.seurat[['inmf']] <- inmf.obj - Seurat::Idents(new.seurat) <- ident.use - } + new.seurat$orig.ident <- object@cell.data$dataset + + idents <- object@clusters + if (length(idents) == 0 || isTRUE(by.dataset)) idents <- object@cell.data$dataset + Seurat::Idents(new.seurat) <- idents return(new.seurat) } @@ -6398,18 +6363,14 @@ ligerToSeurat <- function(object, nms = names(object@raw.data), renormalize = TR #' @param cca.to.H Carry over CCA (and aligned) loadings and insert them into H (and H.norm) slot in #' liger object (only meaningful for combined analysis Seurat object). Useful for plotting directly #' afterwards. (default FALSE) -#' #' @return \code{liger} object. -#' -#' @import Matrix -#' #' @export #' @examples -#' if (requireNamespace("Seurat")) { +#' if (packageVersion("Matrix") <= package_version("1.6.1.1")) { #' ctrl.srt <- Seurat::CreateSeuratObject(ctrl, project = "ctrl") #' stim.srt <- Seurat::CreateSeuratObject(stim, project = "stim") #' ligerex <- seuratToLiger(list(ctrl = ctrl.srt, stim = stim.srt), -#' use.seurat.genes = FALSE) # because no var.gene now +#' use.seurat.genes = FALSE) #' } seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-projects", meta.var = NULL, assays.use = NULL, raw.assay = "RNA", remove.missing = TRUE, renormalize = TRUE, diff --git a/man/ligerToSeurat.Rd b/man/ligerToSeurat.Rd index 2dce12fe..fae2ebc9 100644 --- a/man/ligerToSeurat.Rd +++ b/man/ligerToSeurat.Rd @@ -6,10 +6,11 @@ \usage{ ligerToSeurat( object, - nms = names(object@raw.data), - renormalize = TRUE, + nms = NULL, + renormalize = FALSE, use.liger.genes = TRUE, - by.dataset = FALSE + by.dataset = FALSE, + assay = "RNA" ) } \arguments{ @@ -19,11 +20,13 @@ ligerToSeurat( different datasets which may have same name). Other names can be passed here as vector, must have same length as the number of datasets. (default names(H))} -\item{renormalize}{Whether to log-normalize raw data using Seurat defaults (default TRUE).} +\item{renormalize}{Whether to log-normalize raw data using Seurat defaults (default FALSE).} \item{use.liger.genes}{Whether to carry over variable genes (default TRUE).} \item{by.dataset}{Include dataset of origin in cluster identity in Seurat object (default FALSE).} + +\item{assay}{Assay name to set in the Seurat object (default "RNA").} } \value{ Seurat object with raw.data, scale.data, dr$tsne, dr$inmf, and ident slots set. @@ -37,17 +40,10 @@ Stores original dataset identity by default in new object metadata if dataset na in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". } \examples{ -\donttest{ -library(Seurat) -print(packageVersion("Seurat")) ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) -ligerex <- normalize(ligerex) -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) -ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) -print(packageVersion("Seurat")) -srt <- ligerToSeurat(ligerex, renormalize = FALSE) +if (packageVersion("Matrix") <= package_version("1.6.1.1")) { + # 1.6.2 is not compatible thus don't test + # but can use `setOldClass("mMatrix")` as a hack + srt <- ligerToSeurat(ligerex) } } diff --git a/man/plotByDatasetAndCluster.Rd b/man/plotByDatasetAndCluster.Rd index 1e815350..07ef9347 100644 --- a/man/plotByDatasetAndCluster.Rd +++ b/man/plotByDatasetAndCluster.Rd @@ -72,9 +72,9 @@ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +ligerex <- runTSNE(ligerex) ligerex <- louvainCluster(ligerex) plotByDatasetAndCluster(ligerex, pt.size = 1) } diff --git a/man/plotFactors.Rd b/man/plotFactors.Rd index 6bdf8f8c..d62c8a50 100644 --- a/man/plotFactors.Rd +++ b/man/plotFactors.Rd @@ -41,10 +41,10 @@ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) plotFactors(ligerex) -ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +ligerex <- runTSNE(ligerex) plotFactors(ligerex, plot.tsne = TRUE) } } diff --git a/man/plotFeature.Rd b/man/plotFeature.Rd index 9fcf5ed8..67529de0 100644 --- a/man/plotFeature.Rd +++ b/man/plotFeature.Rd @@ -76,8 +76,8 @@ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +ligerex <- runTSNE(ligerex) plotFeature(ligerex, "nUMI", pt.size = 1) } diff --git a/man/plotGene.Rd b/man/plotGene.Rd index 3647138d..92fc86bd 100644 --- a/man/plotGene.Rd +++ b/man/plotGene.Rd @@ -104,8 +104,8 @@ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +ligerex <- runTSNE(ligerex) plotGene(ligerex, "CD74", pt.size = 1) } diff --git a/man/plotGeneLoadings.Rd b/man/plotGeneLoadings.Rd index b3ba706f..c456a35d 100644 --- a/man/plotGeneLoadings.Rd +++ b/man/plotGeneLoadings.Rd @@ -96,9 +96,9 @@ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +ligerex <- runTSNE(ligerex) plotGeneLoadings(ligerex, "stim", "ctrl", do.spec.plot = FALSE) } } diff --git a/man/plotGenes.Rd b/man/plotGenes.Rd index f831ca55..e4196139 100644 --- a/man/plotGenes.Rd +++ b/man/plotGenes.Rd @@ -27,9 +27,9 @@ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +ligerex <- runTSNE(ligerex) plotGenes(ligerex, c("CD74", "NKG7"), pt.size = 1) } } diff --git a/man/plotWordClouds.Rd b/man/plotWordClouds.Rd index af0a9112..68992dd7 100644 --- a/man/plotWordClouds.Rd +++ b/man/plotWordClouds.Rd @@ -65,9 +65,9 @@ ligerex <- createLiger(list(ctrl = ctrl, stim = stim)) ligerex <- normalize(ligerex) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 5, max.iter = 2) +ligerex <- optimizeALS(ligerex, k = 5, max.iter = 1) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex, distance = "cosine", min_dist = .3) +ligerex <- runTSNE(ligerex) plotWordClouds(ligerex, do.spec.plot = FALSE) } } diff --git a/man/runUMAP.Rd b/man/runUMAP.Rd index c90a424d..e2ddaf30 100644 --- a/man/runUMAP.Rd +++ b/man/runUMAP.Rd @@ -61,6 +61,8 @@ ligerex <- scaleNotCenter(ligerex) # Specification for minimal example run time, not converging ligerex <- optimizeALS(ligerex, k = 5, max.iters = 1) ligerex <- quantile_norm(ligerex) -ligerex <- runUMAP(ligerex) +if (packageVersion("Matrix") <= package_version("1.6.1.1")) { + ligerex <- runUMAP(ligerex) +} } } diff --git a/man/seuratToLiger.Rd b/man/seuratToLiger.Rd index 5e85eb9c..250e6b17 100644 --- a/man/seuratToLiger.Rd +++ b/man/seuratToLiger.Rd @@ -76,10 +76,10 @@ identities from the original Seurat objects. Seurat V2 and V3 supported (though should share the same major version). } \examples{ -if (requireNamespace("Seurat")) { +if (packageVersion("Matrix") <= package_version("1.6.1.1")) { ctrl.srt <- Seurat::CreateSeuratObject(ctrl, project = "ctrl") stim.srt <- Seurat::CreateSeuratObject(stim, project = "stim") ligerex <- seuratToLiger(list(ctrl = ctrl.srt, stim = stim.srt), - use.seurat.genes = FALSE) # because no var.gene now + use.seurat.genes = FALSE) } }