From 83dd41f475d6b7f678d4d9baf21902a41c6aad31 Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:43:43 -0400 Subject: [PATCH] WIP: use pizzarr and anndataR --- R/data_to_zarr.R | 53 +++--------------------------------------------- 1 file changed, 3 insertions(+), 50 deletions(-) diff --git a/R/data_to_zarr.R b/R/data_to_zarr.R index 8fa9a3c..dff87ac 100644 --- a/R/data_to_zarr.R +++ b/R/data_to_zarr.R @@ -11,56 +11,9 @@ #' obj <- get_seurat_obj() #' seurat_to_anndata_zarr(obj, out_path = "data/seurat.zarr", assay = "RNA") seurat_to_anndata_zarr <- function(seurat_obj, out_path, assay) { - if(!requireNamespace("SeuratDisk", quietly = TRUE)) { - stop("Install 'SeuratDisk' to enable conversion of Seurat objects to AnnData objects.") - } - - h5seurat_path <- paste0(out_path, ".h5Seurat") - h5ad_path <- paste0(out_path, ".h5ad") - - # Convert factor columns to string/numeric. - seurat_obj@meta.data <- varhandle::unfactor(seurat_obj@meta.data) - - SeuratDisk::SaveH5Seurat(seurat_obj, filename = h5seurat_path, overwrite = TRUE) - SeuratDisk::Convert(h5seurat_path, dest = "h5ad", overwrite = TRUE, assay = assay) - - # Use basilisk - proc <- basilisk::basiliskStart(py_env) - on.exit(basilisk::basiliskStop(proc)) - - success <- basilisk::basiliskRun(proc, function(h5ad_path, out_path) { - anndata <- reticulate::import("anndata") - zarr <- reticulate::import("zarr") - - adata <- anndata$read_h5ad(h5ad_path) - - cleanup_colnames <- function(df) { - # Reference: https://github.com/theislab/scvelo/issues/255#issuecomment-739995301 - new_colnames <- colnames(df) - new_colnames[new_colnames == "_index"] <- "features" - return(new_colnames) - } - - noop <- function(cond) { } - - tryCatch({ - colnames(adata$var) <- cleanup_colnames(adata$var) - }, error = noop) - - # Reconstruct, omitting raw and uns. - adata <- anndata$AnnData( - X = adata$X, - obs = as.data.frame(adata$obs), - var = as.data.frame(adata$var), - obsm = adata$obsm, - varm = adata$varm - ) - - adata$write_zarr(out_path) - - return(TRUE) - }, h5ad_path = h5ad_path, out_path = out_path) - return(success) + sce <- Seurat::as.SingleCellExperiment(seurat_obj) + store <- pizzarr::DirectoryStore$new(out_path) + anndataR::from_SingleCellExperiment(sce, "ZarrAnnData", store = store) } #' Save a SingleCellExperiment to an AnnData-Zarr store.