From 921126e292e4bc91adb97d8aa7f2fd5f5535bcf4 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Fri, 4 Oct 2024 10:47:13 +0200 Subject: [PATCH 01/57] create spatialdata visium --- .../make_mudataspatial_from_csv.py | 75 ++++++++++++------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/panpipes/python_scripts/make_mudataspatial_from_csv.py b/panpipes/python_scripts/make_mudataspatial_from_csv.py index 06453411..afad522c 100644 --- a/panpipes/python_scripts/make_mudataspatial_from_csv.py +++ b/panpipes/python_scripts/make_mudataspatial_from_csv.py @@ -8,6 +8,7 @@ import warnings from muon._atac.tools import add_peak_annotation, locate_fragments import squidpy as sq +import spatialdata_io as sd_io from mudata import MuData import os """ @@ -52,6 +53,15 @@ parser.add_argument('--spatial_counts', default=None, help='') +parser.add_argument('--scalefactors_file', + default=None, + help='') +parser.add_argument('--fullres_image_file', + default=None, + help='') +parser.add_argument('--tissue_positions_file', + default=None, + help='') parser.add_argument('--spatial_metadata', default=None, help='') @@ -64,21 +74,24 @@ L.info("Running with params: %s", args) # unimodal mu (check if all the modalities) -if isinstance(args.mode_dictionary, dict): - mode_dictionary = args.mode_dictionary -else: - mode_dictionary = read_yaml(args.mode_dictionary) +#if isinstance(args.mode_dictionary, dict): +# mode_dictionary = args.mode_dictionary +#else: +# mode_dictionary = read_yaml(args.mode_dictionary) #{'spatialT': True} -permf = [key for key, value in mode_dictionary.items() if value == True] +#permf = [key for key, value in mode_dictionary.items() if value == True] all_files = { - "spatial":[args.spatial_infile, #path, mandatory for squidpy + "spatial":[args.spatial_infile, #path args.spatial_filetype, #needed for the load_adata_in function to call one of vizgen,visium args.spatial_counts, #name of the counts file, mandatory for squidpy - args.spatial_metadata, #name of the metadata file, mandatory for squidpy - args.spatial_transformation]} + args.fullres_image_file, # visium + args.tissue_positions_file, #visium + args.scalefactors_file]} # visium +# args.spatial_metadata, #name of the metadata file, mandatory for squidpy +# args.spatial_transformation]} #subset to the modalities we want from permf (in this case only spatial) -all_files = {nm: x for (nm, x) in all_files.items() if nm in permf} +#all_files = {nm: x for (nm, x) in all_files.items() if nm in permf} #[check_filetype(x[0], x[1]) for x in all_files.values()] # read the spatial data with one of the functions inside @@ -125,16 +138,22 @@ def check_dir_transform(infile_path, transform_file): adata.uns["spatial"][str(args.sample_id)]["scalefactors"]["transformation_matrix"].columns = adata.uns["spatial"][str(args.sample_id)]["scalefactors"]["transformation_matrix"].columns.astype(str) elif args.spatial_filetype =="visium": L.info("Reading in Visium data with squidpy.read.visium() into AnnData from directory " + args.spatial_infile) - adata = sq.read.visium(path = args.spatial_infile, #path, mandatory for squidpy - counts_file=args.spatial_counts, #name of the counts file, mandatory for squidpy - library_id = str(args.sample_id) - ) #this also has kwargs for read_10x_h5 but keep simple + sdata = sd_io.visium(path=args.spatial_infile, + dataset_id=str(args.sample_id), + counts_file=args.spatial_counts, + fullres_image_file=args.fullres_image_file, + tissue_positions_file=args.tissue_positions_file, + scalefactors_file=args.scalefactors_file) + #adata = sq.read.visium(path = args.spatial_infile, #path, mandatory for squidpy + # counts_file=args.spatial_counts, #name of the counts file, mandatory for squidpy + # library_id = str(args.sample_id) + # ) #this also has kwargs for read_10x_h5 but keep simple -L.info("Resulting AnnData is:") -L.info(adata) -L.info("Creating MuData with .mod['spatial']") +L.info("Resulting SpatialData is:") +L.info(sdata) +#L.info("Creating MuData with .mod['spatial']") -mdata = MuData({"spatial": adata}) +#mdata = MuData({"spatial": adata}) #--------------- @@ -143,25 +162,25 @@ def check_dir_transform(infile_path, transform_file): L.info("Making var names unique") #make var names unique -for mm in mdata.mod.keys(): - mdata[mm].var_names_make_unique() +#for mm in mdata.mod.keys(): +sdata["table"].var_names_make_unique() L.info("Adding sample_id '%s'to MuData.obs and MuData.mod['spatial'].obs" % args.sample_id) -mdata.obs['sample_id'] = str(args.sample_id) +sdata["table"].obs['sample_id'] = str(args.sample_id) # copy the sample_id to each modality -for mm in mdata.mod.keys(): +#for mm in mdata.mod.keys(): # mdata[mm].obs['sample_id'] = mdata.obs['sample_id'] - mdata[mm].obs['sample_id'] = mdata.obs.loc[mdata[mm].obs_names,:]['sample_id'] +sdata["table"].obs['sample_id'] = sdata["table"].obs.loc[sdata["table"].obs_names,:]['sample_id'] -mdata.update() +#mdata.update() -L.info("Resulting MuData is:") -L.info(mdata) +L.info("Resulting SpatialData is:") +L.info(sdata) -L.info("Saving MuData to '%s'" % args.output_file) -L.debug(mdata) -mdata.write(args.output_file) +L.info("Saving SpatialData to '%s'" % args.output_file) +L.debug(sdata) +sdata.write(args.output_file) L.info("Done") From 43e0c1e23823fa336705459613821e505ed74c84 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Fri, 4 Oct 2024 10:53:25 +0200 Subject: [PATCH 02/57] create spatialdata vizgen --- .../python_scripts/make_mudataspatial_from_csv.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/panpipes/python_scripts/make_mudataspatial_from_csv.py b/panpipes/python_scripts/make_mudataspatial_from_csv.py index afad522c..95c06ff0 100644 --- a/panpipes/python_scripts/make_mudataspatial_from_csv.py +++ b/panpipes/python_scripts/make_mudataspatial_from_csv.py @@ -130,12 +130,13 @@ def check_dir_transform(infile_path, transform_file): if args.spatial_filetype=="vizgen": L.info("Reading in Vizgen data with squidpy.read.vizgen() into AnnData from directory " + args.spatial_infile) - adata = sq.read.vizgen(path = args.spatial_infile, #path, mandatory for squidpy - counts_file=args.spatial_counts, #name of the counts file, mandatory for squidpy - meta_file = args.spatial_metadata, #name of the metadata file, mandatory for squidpy - transformation_file=args.spatial_transformation, - library_id = str(args.sample_id)) #this also has kwargs for read_10x_h5 but keep simple - adata.uns["spatial"][str(args.sample_id)]["scalefactors"]["transformation_matrix"].columns = adata.uns["spatial"][str(args.sample_id)]["scalefactors"]["transformation_matrix"].columns.astype(str) + sdata = sd_io.merscope(path = args.spatial_infile) +# adata = sq.read.vizgen(path = args.spatial_infile, #path, mandatory for squidpy +# counts_file=args.spatial_counts, #name of the counts file, mandatory for squidpy +# meta_file = args.spatial_metadata, #name of the metadata file, mandatory for squidpy +# transformation_file=args.spatial_transformation, +# library_id = str(args.sample_id)) #this also has kwargs for read_10x_h5 but keep simple +# adata.uns["spatial"][str(args.sample_id)]["scalefactors"]["transformation_matrix"].columns = adata.uns["spatial"][str(args.sample_id)]["scalefactors"]["transformation_matrix"].columns.astype(str) elif args.spatial_filetype =="visium": L.info("Reading in Visium data with squidpy.read.visium() into AnnData from directory " + args.spatial_infile) sdata = sd_io.visium(path=args.spatial_infile, From c1e823a471e06375d24f9a364fb3c8768d6e576b Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 8 Oct 2024 09:52:45 +0200 Subject: [PATCH 03/57] rename python file --- panpipes/panpipes/pipeline_qc_spatial.py | 2 +- ...e_mudataspatial_from_csv.py => make_spatialData_from_csv.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename panpipes/python_scripts/{make_mudataspatial_from_csv.py => make_spatialData_from_csv.py} (100%) diff --git a/panpipes/panpipes/pipeline_qc_spatial.py b/panpipes/panpipes/pipeline_qc_spatial.py index 538cf67c..f0ceac69 100644 --- a/panpipes/panpipes/pipeline_qc_spatial.py +++ b/panpipes/panpipes/pipeline_qc_spatial.py @@ -98,7 +98,7 @@ def load_mudatas(spatial_path, outfile, assays[outfile] = spatial_filetype cmd = """ - python %(py_path)s/make_mudataspatial_from_csv.py + python %(py_path)s/make_spatialData_from_csv.py --mode_dictionary "%(modality_dict)s" --sample_id %(sample_id)s --output_file %(outfile)s diff --git a/panpipes/python_scripts/make_mudataspatial_from_csv.py b/panpipes/python_scripts/make_spatialData_from_csv.py similarity index 100% rename from panpipes/python_scripts/make_mudataspatial_from_csv.py rename to panpipes/python_scripts/make_spatialData_from_csv.py From e3d6f2e5ff512d64e01005b999aabf880457e358 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 8 Oct 2024 10:27:26 +0200 Subject: [PATCH 04/57] adjust spatial loading for spatialData --- panpipes/funcs/io.py | 53 ++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/panpipes/funcs/io.py b/panpipes/funcs/io.py index 77d74d2b..96b4dca2 100644 --- a/panpipes/funcs/io.py +++ b/panpipes/funcs/io.py @@ -159,33 +159,52 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): if caf['spatial_filetype'][nn]=="vizgen": spatial_filetype = caf['spatial_filetype'][nn] #path, counts and metadata are mandatory - if pd.notna(caf["spatial_counts"][nn]): - spatial_counts= caf["spatial_counts"][nn] - else: - spatial_counts = None - if pd.notna(caf["spatial_metadata"][nn]): - spatial_metadata = caf["spatial_metadata"][nn] - else: - spatial_metadata = None + #if pd.notna(caf["spatial_counts"][nn]): + # spatial_counts= caf["spatial_counts"][nn] + #else: + # spatial_counts = None + #if pd.notna(caf["spatial_metadata"][nn]): + # spatial_metadata = caf["spatial_metadata"][nn] + #else: + # spatial_metadata = None #transformation is optional - if pd.notna(caf["spatial_transformation"][nn]): - spatial_transformation = caf["spatial_transformation"][nn] - else: - spatial_transformation = None + #if pd.notna(caf["spatial_transformation"][nn]): + # spatial_transformation = caf["spatial_transformation"][nn] + #else: + # spatial_transformation = None elif caf['spatial_filetype'][nn]=="visium": - spatial_metadata= None - spatial_transformation = None + #spatial_metadata= None + #spatial_transformation = None spatial_filetype = caf['spatial_filetype'][nn] + #counts file if pd.notna(caf["spatial_counts"][nn]): spatial_counts= caf["spatial_counts"][nn] else: spatial_counts = None + # fullres image + if pd.notna(caf["spatial_fullres_image_file"][nn]): + spatial_fullres_image_file= caf["spatial_fullres_image_file"][nn] + else: + spatial_fullres_image_file = None + # tissue position + if pd.notna(caf["spatial_tissue_positions_file"][nn]): + spatial_tissue_positions_file= caf["spatial_tissue_positions_file"][nn] + else: + spatial_tissue_positions_file = None + # scalefactor + if pd.notna(caf["spatial_scalefactors_file"][nn]): + spatial_scalefactors_file= caf["spatial_scalefactors_file"][nn] + else: + spatial_scalefactors_file = None else: spatial_path= None spatial_filetype = None spatial_counts = None - spatial_metadata = None - spatial_transformation = None + spatial_fullres_image_file = None + spatial_tissue_positions_file = None + spatial_scalefactors_file = None + #spatial_metadata = None + #spatial_transformation = None if 'barcode_mtd_path' in caf.columns: cell_mtd_path = caf['barcode_mtd_path'][nn] #not yielding this right now! @@ -199,7 +218,7 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): outfile = outfile + ".h5mu" sample_id = caf['sample_id'][nn] yield spatial_path, outfile, \ - sample_id, spatial_filetype, spatial_counts, spatial_metadata, spatial_transformation + sample_id, spatial_filetype, spatial_counts, spatial_fullres_image_file, spatial_tissue_positions_file, spatial_scalefactors_file #spatial_metadata, spatial_transformation def read_anndata( From 07844b5d1b37f3d413e6200e7c109a1bf95a3602 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 8 Oct 2024 10:28:03 +0200 Subject: [PATCH 05/57] adjust spatial loading for spatialData --- panpipes/panpipes/pipeline_qc_spatial.py | 35 +++++++++++++----------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/panpipes/panpipes/pipeline_qc_spatial.py b/panpipes/panpipes/pipeline_qc_spatial.py index f0ceac69..c84ade7a 100644 --- a/panpipes/panpipes/pipeline_qc_spatial.py +++ b/panpipes/panpipes/pipeline_qc_spatial.py @@ -73,12 +73,9 @@ def gen_load_spatial_anndata_jobs(): @follows(mkdir("logs")) @follows(mkdir("tmp")) @files(gen_load_spatial_anndata_jobs) -def load_mudatas(spatial_path, outfile, - sample_id, - spatial_filetype, - spatial_counts, - spatial_metadata, - spatial_transformation): +def load_mudatas(spatial_path, outfile, + sample_id, spatial_filetype, spatial_counts, + spatial_fullres_image_file, spatial_tissue_positions_file, spatial_scalefactors_file): path_dict = {'spatial':spatial_path} @@ -86,12 +83,17 @@ def load_mudatas(spatial_path, outfile, print('sample_id = %s' % str(sample_id)) print('outfile = %s' % str(outfile)) print('spatial_filetype = %s' % str(spatial_filetype)) - print('spatial_counts = %s' % str(spatial_counts)) - if spatial_filetype == "vizgen": - print('spatial_metadata = %s' % str(spatial_metadata)) - print('spatial_transformation = %s' % str(spatial_transformation)) - else: - print("visium") + #print('spatial_counts = %s' % str(spatial_counts)) + #if spatial_filetype == "vizgen": + # print('spatial_metadata = %s' % str(spatial_metadata)) + # print('spatial_transformation = %s' % str(spatial_transformation)) + #else: + # print("visium") + if spatial_filetype == "visium": + print('spatial_counts = %s' % str(spatial_counts)) + print('spatial_fullres_image_file= %s' % str(spatial_fullres_image_file)) + print('spatial_tissue_positions_file= %s' % str(spatial_tissue_positions_file)) + print('spatial_scalefactors_file= %s' % str(spatial_scalefactors_file)) modality_dict = {k:True if path_dict[k] is not None else False for k,v in {'spatial': True}.items() } print(modality_dict) @@ -104,12 +106,13 @@ def load_mudatas(spatial_path, outfile, --output_file %(outfile)s --spatial_filetype %(spatial_filetype)s --spatial_infile %(spatial_path)s - --spatial_counts %(spatial_counts)s """ - if spatial_filetype == "vizgen": + if spatial_filetype == "visium": cmd += """ - --spatial_metadata %(spatial_metadata)s - --spatial_transformation %(spatial_transformation)s + --spatial_counts %(spatial_counts)s + --scalefactors_file %(spatial_scalefactors_file)s + --fullres_image_file %(spatial_fullres_image_file)s + --tissue_positions_file %(spatial_tissue_positions_file)s """ cmd += " > logs/1_make_mudatas_%(sample_id)s.log" job_kwargs["job_threads"] = PARAMS['resources_threads_medium'] From 2c92e9a4eeb4e4f50632a278b1048531913e8e8a Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 8 Oct 2024 10:28:22 +0200 Subject: [PATCH 06/57] remove squidpy merfish parameters --- panpipes/python_scripts/make_spatialData_from_csv.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/panpipes/python_scripts/make_spatialData_from_csv.py b/panpipes/python_scripts/make_spatialData_from_csv.py index 95c06ff0..81050809 100644 --- a/panpipes/python_scripts/make_spatialData_from_csv.py +++ b/panpipes/python_scripts/make_spatialData_from_csv.py @@ -62,12 +62,12 @@ parser.add_argument('--tissue_positions_file', default=None, help='') -parser.add_argument('--spatial_metadata', - default=None, - help='') -parser.add_argument('--spatial_transformation', - default=None, - help='') +#parser.add_argument('--spatial_metadata', +# default=None, +# help='') +#parser.add_argument('--spatial_transformation', +# default=None, +# help='') parser.set_defaults(verbose=True) args, opt = parser.parse_known_args() From d198b1150fa53c909d93fb65e14b0d025b09946c Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 8 Oct 2024 11:41:44 +0200 Subject: [PATCH 07/57] add parameter=None for vizgen --- panpipes/funcs/io.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/panpipes/funcs/io.py b/panpipes/funcs/io.py index 96b4dca2..f2edb8ef 100644 --- a/panpipes/funcs/io.py +++ b/panpipes/funcs/io.py @@ -157,24 +157,12 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): else: spatial_path = caf["spatial_path"][nn] if caf['spatial_filetype'][nn]=="vizgen": + spatial_counts = None + spatial_fullres_image_file = None + spatial_tissue_positions_file = None + spatial_scalefactors_file = None spatial_filetype = caf['spatial_filetype'][nn] - #path, counts and metadata are mandatory - #if pd.notna(caf["spatial_counts"][nn]): - # spatial_counts= caf["spatial_counts"][nn] - #else: - # spatial_counts = None - #if pd.notna(caf["spatial_metadata"][nn]): - # spatial_metadata = caf["spatial_metadata"][nn] - #else: - # spatial_metadata = None - #transformation is optional - #if pd.notna(caf["spatial_transformation"][nn]): - # spatial_transformation = caf["spatial_transformation"][nn] - #else: - # spatial_transformation = None elif caf['spatial_filetype'][nn]=="visium": - #spatial_metadata= None - #spatial_transformation = None spatial_filetype = caf['spatial_filetype'][nn] #counts file if pd.notna(caf["spatial_counts"][nn]): @@ -203,8 +191,6 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): spatial_fullres_image_file = None spatial_tissue_positions_file = None spatial_scalefactors_file = None - #spatial_metadata = None - #spatial_transformation = None if 'barcode_mtd_path' in caf.columns: cell_mtd_path = caf['barcode_mtd_path'][nn] #not yielding this right now! @@ -217,8 +203,9 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): else: outfile = outfile + ".h5mu" sample_id = caf['sample_id'][nn] + yield spatial_path, outfile, \ - sample_id, spatial_filetype, spatial_counts, spatial_fullres_image_file, spatial_tissue_positions_file, spatial_scalefactors_file #spatial_metadata, spatial_transformation + sample_id, spatial_filetype, spatial_counts, spatial_fullres_image_file, spatial_tissue_positions_file, spatial_scalefactors_file def read_anndata( From 2b4c01bf0043f2f49ab8e7ee76e8ec79294f5fc6 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 8 Oct 2024 11:47:46 +0200 Subject: [PATCH 08/57] change h5mu to zarr --- panpipes/funcs/io.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/panpipes/funcs/io.py b/panpipes/funcs/io.py index f2edb8ef..c6ff2c5c 100644 --- a/panpipes/funcs/io.py +++ b/panpipes/funcs/io.py @@ -199,11 +199,11 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): # create the output file outfile = "./tmp/" + caf['sample_id'][nn] if load_raw: - outfile = outfile + "_raw.h5mu" + outfile = outfile + "_raw.zarr" else: - outfile = outfile + ".h5mu" + outfile = outfile + ".zarr" sample_id = caf['sample_id'][nn] - + yield spatial_path, outfile, \ sample_id, spatial_filetype, spatial_counts, spatial_fullres_image_file, spatial_tissue_positions_file, spatial_scalefactors_file From 459d07fc32640e6dfcdb958e3a1aacf9bb9fa6d2 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 15 Oct 2024 10:51:59 +0200 Subject: [PATCH 09/57] change from mudata to spatialdata --- panpipes/python_scripts/plot_qc_spatial.py | 43 ++++++++++--------- .../python_scripts/run_scanpyQC_spatial.py | 36 ++++++++-------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/panpipes/python_scripts/plot_qc_spatial.py b/panpipes/python_scripts/plot_qc_spatial.py index 6434b3b4..558219c0 100644 --- a/panpipes/python_scripts/plot_qc_spatial.py +++ b/panpipes/python_scripts/plot_qc_spatial.py @@ -13,6 +13,7 @@ import sys import logging import re +import spatialdata as sd L = logging.getLogger() L.setLevel(logging.INFO) log_handler = logging.StreamHandler(sys.stdout) @@ -57,15 +58,16 @@ sc.settings.figdir = figdir sc.set_figure_params(scanpy=True, fontsize=14, dpi=300, facecolor='white', figsize=(5,5)) -L.info("Reading in MuData from '%s'" % args.input_mudata) -mdata = mu.read(args.input_mudata) -spatial = mdata.mod['spatial'] +L.info("Reading in SpatialData from '%s'" % args.input_mudata) +sdata = sd.read_zarr(args.input_mudata) +#mdata = mu.read(args.input_mudata) +#spatial = mdata.mod['spatial'] input_data = os.path.basename(args.input_mudata) -pattern = r"_filtered.h5(.*)" +pattern = r"_filtered.zarr" match = re.search(pattern, input_data) if match is None: - match = re.search(r"_unfilt.h5(.*)", input_data) + match = re.search(r"_unfilt.zarr", input_data) sprefix = input_data[:match.start()] # convert string to list of strings @@ -74,15 +76,16 @@ # check if metrics in adata.obs or adata.var -qc_metrics = [metric if metric in spatial.obs.columns or metric in spatial.var.columns else L.warning("Variable '%s' not found in adata.var or adata.obs, will not be plotted" % metric) for metric in qc_metrics] +qc_metrics = [metric if metric in + sdata["table"].obs.columns or metric in sdata["table"].var.columns else L.warning("Variable '%s' not found in adata.var or adata.obs, will not be plotted" % metric) for metric in qc_metrics] qc_metrics = [metric for metric in qc_metrics if metric is not None] # check that group_vars are in adata.obs -group_var = [group if group in spatial.obs.columns else L.warning("group_var '%s' not found in adata.obs, will be ignored" % group) for group in group_var] +group_var = [group if group in sdata["table"].obs.columns else L.warning("group_var '%s' not found in adata.obs, will be ignored" % group) for group in group_var] group_var = [group for group in group_var if group is not None] # make sure that it's saved as categorical for group in group_var: - spatial.obs[group] = spatial.obs[group].astype("category") + sdata["table"].obs[group] = sdata["table"].obs[group].astype("category") if group_var == []: group_var = None @@ -93,34 +96,34 @@ for metric in qc_metrics: # check if in adata.obs: - if metric in spatial.obs.columns: + if metric in sdata["table"].obs.columns: # check that it's a numeric column, so that it can be plotted: - if metric not in spatial.obs._get_numeric_data().columns: + if metric not in sdata["table"].obs._get_numeric_data().columns: L.warning("Variable '%s' not numerical in adata.obs, will not be plotted" % metric) else: L.info("Creating violin plot for '%s' of .obs" % metric) if group_var is None: - sc.pl.violin(spatial, keys = metric, xlabel = metric+ " in .obs", + sc.pl.violin(sdata["table"], keys = metric, xlabel = metric+ " in .obs", save = "_obs_" + metric+ "_" + "."+sprefix + ".png", show = False) else: #plot violin for each group for group in group_var: - sc.pl.violin(spatial, keys = metric,groupby = group, xlabel = group + ", "+ metric+ " in .obs", + sc.pl.violin(sdata["table"], keys = metric,groupby = group, xlabel = group + ", "+ metric+ " in .obs", save = "_obs_" + metric+ "_" + group+ "."+sprefix +".png", show = False) #plot spatial L.info("Creating spatial embedding plot for '%s' of .obs" % metric) - sc.pl.embedding(spatial,basis="spatial", color = metric, save = "_spatial_" + metric + "."+sprefix +".png", show = False) + sc.pl.embedding(sdata["table"],basis="spatial", color = metric, save = "_spatial_" + metric + "."+sprefix +".png", show = False) #check if in adata.var: - if metric in spatial.var.columns: + if metric in sdata["table"].var.columns: - if metric not in spatial.var._get_numeric_data().columns: + if metric not in sdata["table"].var._get_numeric_data().columns: L.warning("Variable '%s' not numerical in adata.var, will not be plotted" % metric) else: # plot violins L.info("Creating violin plot for '%s' of .var" % metric) ax = sns.violinplot( - data=spatial.var[[metric]], + data=sdata["table"].var[[metric]], orient='vertical', ) ax.set(xlabel=metric+ " in .var" ) @@ -135,28 +138,28 @@ axs[0].set_title("Total transcripts per cell") sns.histplot( - spatial.obs["total_counts"], + sdata["table"].obs["total_counts"], kde=False, ax=axs[0], ) axs[1].set_title("Unique transcripts per cell") sns.histplot( - spatial.obs["n_genes_by_counts"], + sdata["table"].obs["n_genes_by_counts"], kde=False, ax=axs[1], ) axs[2].set_title("Transcripts per FOV") sns.histplot( - spatial.obs.groupby('fov')[['total_counts']].sum(), + sdata["table"].obs.groupby('fov')[['total_counts']].sum(), kde=False, ax=axs[2], ) axs[3].set_title("Volume of segmented cells") sns.histplot( - spatial.obs["volume"], + sdata["table"].obs["volume"], kde=False, ax=axs[3], ) diff --git a/panpipes/python_scripts/run_scanpyQC_spatial.py b/panpipes/python_scripts/run_scanpyQC_spatial.py index 3e3059c6..595d79da 100644 --- a/panpipes/python_scripts/run_scanpyQC_spatial.py +++ b/panpipes/python_scripts/run_scanpyQC_spatial.py @@ -18,6 +18,7 @@ import argparse import scanpy as sc import muon as mu +import spatialdata as sd from panpipes.funcs.io import write_obs @@ -64,14 +65,15 @@ sc.set_figure_params(scanpy=True, fontsize=14, dpi=300, facecolor='white', figsize=(5,5)) -L.info("Reading in MuData from '%s'" % args.input_anndata) +L.info("Reading in SpatialData from '%s'" % args.input_anndata) -mdata = mu.read(args.input_anndata) -spatial = mdata['spatial'] +#mdata = mu.read(args.input_anndata) +sdata = sd.read_zarr(args.input_anndata) +#spatial = mdata['spatial'] L.info("Spatial data is:") -print(spatial) -L.info("With sample id '%s'" % spatial.obs["sample_id"].unique()[0]) +print(sdata) +L.info("With sample id '%s'" % sdata["table"].obs["sample_id"].unique()[0]) qc_vars = [] @@ -94,7 +96,7 @@ for kk in calc_proportions: xname= kk gene_list = cat_dic[kk] - spatial.var[xname] = [x in gene_list for x in spatial.var_names] + sdata["table"].var[xname] = [x in gene_list for x in sdata["table"].var_names] qc_vars.append(xname) # Score genes @@ -105,7 +107,7 @@ L.info("Computing gene scores for '%s'" % kk) xname= kk gene_list = cat_dic[kk] - sc.tl.score_genes(spatial, gene_list , + sc.tl.score_genes(sdata["table"], gene_list , ctrl_size=min(len(gene_list), 50), gene_pool=None, n_bins=25, @@ -127,11 +129,11 @@ qc_info = " and calculating proportions for '%s'" % qc_vars L.info("Calculating QC metrics with scanpy.pp.calculate_qc_metrics()" + qc_info) percent_top = [50, 100, 200, 500] #default -percent_top = [x for x in percent_top if x <= spatial.n_vars] -sc.pp.calculate_qc_metrics(spatial, qc_vars=qc_vars, percent_top=percent_top, inplace=True) +percent_top = [x for x in percent_top if x <= sdata["table"].n_vars] +sc.pp.calculate_qc_metrics(sdata["table"], qc_vars=qc_vars, percent_top=percent_top, inplace=True) -if (args.spatial_filetype == "vizgen") and ("blank_genes" in spatial.obsm): - spatial.obsm["blank_genes"].to_numpy().sum() / spatial.var["total_counts"].sum() * 100 +if (args.spatial_filetype == "vizgen") and ("blank_genes" in sdata["table"].obsm): + sdata["table"].obsm["blank_genes"].to_numpy().sum() / sdata["table"].var["total_counts"].sum() * 100 # Calculate cc scores if args.ccgenes is not None: @@ -144,7 +146,7 @@ sgenes = ccgenes[ccgenes["cc_phase"] == "s"]["gene_name"].tolist() g2mgenes = ccgenes[ccgenes["cc_phase"] == "g2m"]["gene_name"].tolist() L.info("Calculating cell cycle scores") - sc.tl.score_genes_cell_cycle(spatial, s_genes=sgenes, g2m_genes=g2mgenes) + sc.tl.score_genes_cell_cycle(sdata["table"], s_genes=sgenes, g2m_genes=g2mgenes) else: L.error("The path of the cell cycle genes tsv file '%s' could not be found" % args.ccgenes) sys.exit("The path of the cell cycle genes tsv file '%s' could not be found" % args.ccgenes) @@ -153,15 +155,15 @@ #TODO: we now need to update the mdata object to pick the calc proportion outputs made on # spatial = mdata['spatial'] -mdata.update() +#mdata.update() single_id = os.path.basename(str(args.input_anndata)) single_id = single_id.replace("_raw.h5mu","") -L.info("Saving updated obs in a metadata tsv file to ./" + single_id + "_cell_metadata.tsv") -write_obs(mdata, output_prefix=single_id, output_suffix="_cell_metadata.tsv") -L.info("Saving updated MuData to '%s'" % args.outfile) -mdata.write(args.outfile) +#L.info("Saving updated obs in a metadata tsv file to ./" + single_id + "_cell_metadata.tsv") +#write_obs(mdata, output_prefix=single_id, output_suffix="_cell_metadata.tsv") +L.info("Saving updated SpatialData to '%s'" % args.outfile) +sdata.write(args.outfile) L.info("Done") From c3fd8d52ec4eb008c983ea9dc4cf3432eb38c2f4 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 15 Oct 2024 11:29:26 +0200 Subject: [PATCH 10/57] write obs spatialdata --- panpipes/python_scripts/run_scanpyQC_spatial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/python_scripts/run_scanpyQC_spatial.py b/panpipes/python_scripts/run_scanpyQC_spatial.py index 595d79da..57c35343 100644 --- a/panpipes/python_scripts/run_scanpyQC_spatial.py +++ b/panpipes/python_scripts/run_scanpyQC_spatial.py @@ -160,8 +160,8 @@ single_id = os.path.basename(str(args.input_anndata)) single_id = single_id.replace("_raw.h5mu","") -#L.info("Saving updated obs in a metadata tsv file to ./" + single_id + "_cell_metadata.tsv") -#write_obs(mdata, output_prefix=single_id, output_suffix="_cell_metadata.tsv") +L.info("Saving updated obs in a metadata tsv file to ./" + single_id + "_cell_metadata.tsv") +write_obs(sdata["table"], output_prefix=single_id, output_suffix="_cell_metadata.tsv") L.info("Saving updated SpatialData to '%s'" % args.outfile) sdata.write(args.outfile) From b8281663e282c0f762b5d15d638ef84fc02f402b Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 22 Oct 2024 11:59:59 +0200 Subject: [PATCH 11/57] adjust filtering to spatialdata --- panpipes/python_scripts/run_filter_spatial.py | 115 ++++++++++++------ 1 file changed, 79 insertions(+), 36 deletions(-) diff --git a/panpipes/python_scripts/run_filter_spatial.py b/panpipes/python_scripts/run_filter_spatial.py index 980f7070..c0734ba3 100644 --- a/panpipes/python_scripts/run_filter_spatial.py +++ b/panpipes/python_scripts/run_filter_spatial.py @@ -4,6 +4,7 @@ import re import muon as mu from anndata import AnnData +import spatialdata as sd import os # import scpipelines.funcs as scp from panpipes.funcs.processing import intersect_obs_by_mod, remove_unused_categories @@ -74,33 +75,73 @@ def test_matching_df_ignore_cat(new_df, old_df): # load mudata -L.info("Reading in MuData from '%s'" % args.input_mudata) +L.info("Reading in SpatialData from '%s'" % args.input_mudata) +sdata = sd.read_zarr(args.input_anndata) +#mdata = mu.read(args.input_mudata) -mdata = mu.read(args.input_mudata) +#if isinstance(mdata, AnnData): +# raise TypeError("Input '%s' should be of MuData format, not Anndata" % args.input_mudata) -if isinstance(mdata, AnnData): - raise TypeError("Input '%s' should be of MuData format, not Anndata" % args.input_mudata) +orig_obs = sdata["table"].obs.copy() -orig_obs = mdata.obs.copy() - -L.info("Before filtering: "+ str(mdata.n_obs) + " cells and " + str(mdata.n_vars) + " features") +L.info("Before filtering: "+ str(sdata["table"].n_obs) + " cells and " + str(sdata["table"].n_vars) + " features") # filter based on provided barcodes ----- if args.keep_barcodes is not None: - L.info("Filtering MuData by keep_barcodes file") + L.info("Filtering SpatialData by keep_barcodes file") keep_bc = pd.read_csv(args.keep_barcodes,header=None) - mdata = mdata[mdata.obs_names.isin(keep_bc[0]),:].copy() - remove_unused_categories(mdata.obs) - mdata.update() - L.info("Remaining cells: %d" % mdata.n_obs) + sdata["table"] = sdata["table"][sdata["table"].obs_names.isin(keep_bc[0]),:].copy() + remove_unused_categories(sdata["table"].obs) + #mdata.update() + L.info("Remaining cells: %d" % sdata["table"].n_obs) +# filter more than +if filter_dict['run']: + for marg in filter_dict["spatial"].keys(): + if marg == "obs": + if "max" in filter_dict["spatial"][marg].keys(): + for col, n in filter_dict["spatial"][marg]['max'].items(): + L.info("Filtering cells of modality '%s' by '%s' in .obs to less than %s" % ("spatial", col, n)) + mu.pp.filter_obs(sdata["table"], col, lambda x: x <= n) + L.info("Remaining cells: %d" % sdata["table"].n_obs) + if "min" in filter_dict["spatial"][marg].keys(): + for col, n in filter_dict["spatial"][marg]['min'].items(): + L.info("Filtering cells of modality '%s' by '%s' in .obs to more than %s" % ("spatial", col, n)) + mu.pp.filter_obs(sdata["table"], col, lambda x: x >= n) + L.info("Remaining cells: %d" % sdata["table"].n_obs) + if "bool" in filter_dict["spatial"][marg].keys(): + for col, n in filter_dict["spatial"][marg]['bool'].items(): + L.info("Filtering cells of modality '%s' by '%s' in .obs marked %s" % ("spatial", col, n)) + mu.pp.filter_obs(sdata["table"], col, lambda x: x == n) + L.info("Remaining cells: %d" % sdata["table"].n_obs) + if marg == "var": + if "max" in filter_dict["spatial"][marg].keys(): + for col, n in filter_dict["spatial"][marg]['max'].items(): + L.info("Filtering features of modality '%s' by '%s' in .var to less than %s" % ("spatial", col, n)) + mu.pp.filter_var(sdata["table"], col, lambda x: x <= n) + L.info("Remaining features: %d" % sdata["table"].n_vars) + + if "min" in filter_dict["spatial"][marg].keys(): + for col, n in filter_dict["spatial"][marg]['min'].items(): + L.info("Filtering features of modality '%s' by '%s' in .var to more than %s" % ("spatial", col, n)) + mu.pp.filter_var(sdata["table"], col, lambda x: x >= n) + L.info("Remaining features: %d" % sdata["table"].n_vars) + + if "bool" in filter_dict["spatial"][marg].keys(): + for col, n in filter_dict["spatial"][marg]['bool'].items(): + L.info("Filtering features of modality '%s' by '%s' in .var marked %s" % ("spatial", col, n)) + mu.pp.filter_var(sdata["table"], col, lambda x: x == n) + L.info("Remaining features: %d" % sdata["table"].n_vars) + + +''' # filter more than if filter_dict['run']: # this will go through the modalities one at a time, # then the categories max, min and bool - for mod in mdata.mod.keys(): + for mod in sdata["table"].mod.keys(): L.info(mod) if mod in filter_dict.keys(): for marg in filter_dict[mod].keys(): @@ -108,57 +149,59 @@ def test_matching_df_ignore_cat(new_df, old_df): if "max" in filter_dict[mod][marg].keys(): for col, n in filter_dict[mod][marg]['max'].items(): L.info("Filtering cells of modality '%s' by '%s' in .obs to less than %s" % (mod, col, n)) - mu.pp.filter_obs(mdata.mod[mod], col, lambda x: x <= n) - L.info("Remaining cells: %d" % mdata[mod].n_obs) + mu.pp.filter_obs(sdata["table"].mod[mod], col, lambda x: x <= n) + L.info("Remaining cells: %d" % sdata["table"][mod].n_obs) if "min" in filter_dict[mod][marg].keys(): for col, n in filter_dict[mod][marg]['min'].items(): L.info("Filtering cells of modality '%s' by '%s' in .obs to more than %s" % (mod, col, n)) - mu.pp.filter_obs(mdata.mod[mod], col, lambda x: x >= n) - L.info("Remaining cells: %d" % mdata[mod].n_obs) + mu.pp.filter_obs(sdata["table"].mod[mod], col, lambda x: x >= n) + L.info("Remaining cells: %d" % sdata["table"][mod].n_obs) if "bool" in filter_dict[mod][marg].keys(): for col, n in filter_dict[mod][marg]['bool'].items(): L.info("Filtering cells of modality '%s' by '%s' in .obs marked %s" % (mod, col, n)) - mu.pp.filter_obs(mdata.mod[mod], col, lambda x: x == n) - L.info("Remaining cells: %d" % mdata[mod].n_obs) + mu.pp.filter_obs(sdata["table"].mod[mod], col, lambda x: x == n) + L.info("Remaining cells: %d" % sdata["table"][mod].n_obs) if marg == "var": if "max" in filter_dict[mod][marg].keys(): for col, n in filter_dict[mod][marg]['max'].items(): L.info("Filtering features of modality '%s' by '%s' in .var to less than %s" % (mod, col, n)) - mu.pp.filter_var(mdata.mod[mod], col, lambda x: x <= n) - L.info("Remaining features: %d" % mdata[mod].n_vars) + mu.pp.filter_var(sdata["table"].mod[mod], col, lambda x: x <= n) + L.info("Remaining features: %d" % sdata["table"][mod].n_vars) if "min" in filter_dict[mod][marg].keys(): for col, n in filter_dict[mod][marg]['min'].items(): L.info("Filtering features of modality '%s' by '%s' in .var to more than %s" % (mod, col, n)) - mu.pp.filter_var(mdata.mod[mod], col, lambda x: x >= n) - L.info("Remaining features: %d" % mdata[mod].n_vars) + mu.pp.filter_var(sdata["table"].mod[mod], col, lambda x: x >= n) + L.info("Remaining features: %d" % sdata["table"][mod].n_vars) if "bool" in filter_dict[mod][marg].keys(): for col, n in filter_dict[mod][marg]['bool'].items(): L.info("Filtering features of modality '%s' by '%s' in .var marked %s" % (mod, col, n)) - mu.pp.filter_var(mdata.mod[mod], col, lambda x: x == n) - L.info("Remaining features: %d" % mdata[mod].n_vars) + mu.pp.filter_var(sdata["table"].mod[mod], col, lambda x: x == n) + L.info("Remaining features: %d" % sdata["table"][mod].n_vars) +''' + -mdata.update() +#mdata.update() -L.info("After filtering: "+ str(mdata.n_obs) + " cells and " + str(mdata.n_vars) + " features") +L.info("After filtering: "+ str(sdata["table"].n_obs) + " cells and " + str(sdata["table"].n_vars) + " features") -remove_unused_categories(mdata.obs) +remove_unused_categories(sdata["table"].obs) # run quick test before saving out. -assert test_matching_df_ignore_cat(mdata.obs, orig_obs) +assert test_matching_df_ignore_cat(sdata["table"].obs, orig_obs) # write out obs -output_prefix = re.sub(".h5mu", "", os.path.basename(args.output_mudata)) +output_prefix = re.sub(".zarr", "", os.path.basename(args.output_mudata)) L.info("Saving updated obs in a metadata tsv file to './tables/" + output_prefix + "_filtered_cell_metadata.tsv'") -write_obs(mdata, output_prefix=os.path.join("tables/",output_prefix), output_suffix="_filtered_cell_metadata.tsv") +write_obs(sdata["table"], output_prefix=os.path.join("tables/",output_prefix), output_suffix="_filtered_cell_metadata.tsv") # write out the per sample_id cell numbers cell_counts_dict={} -for mm in mdata.mod.keys(): - cell_counts_dict[mm] = mdata[mm].obs.sample_id.value_counts().to_frame('n_cells') +#for mm in mdata.mod.keys(): +cell_counts_dict["spatial"] = sdata["table"].obs.sample_id.value_counts().to_frame('n_cells') cell_counts = pd.concat(cell_counts_dict).reset_index().rename( columns={"level_0": "modality", "level_1": "sample_id"}) @@ -167,10 +210,10 @@ def test_matching_df_ignore_cat(new_df, old_df): L.info("Saving cell counts in a metadata csv file to './tables/" + output_prefix + "_cell_counts.csv'") cell_counts.to_csv("tables/" + output_prefix + "_cell_counts.csv", index=None) -mdata.update() +#mdata.update() -L.info("Saving updated MuData to '%s'" % args.output_mudata) -mdata.write(args.output_mudata) +L.info("Saving updated SpatialData to '%s'" % args.output_mudata) +sdata.write(args.output_mudata) L.info("Done") From a3251d75e1238ce7eb9bd2bdf22253f97a37f72d Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 22 Oct 2024 12:00:10 +0200 Subject: [PATCH 12/57] exchange h5mu for zarr --- panpipes/panpipes/pipeline_preprocess_spatial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/panpipes/pipeline_preprocess_spatial.py b/panpipes/panpipes/pipeline_preprocess_spatial.py index b4bf3f71..b7865b00 100644 --- a/panpipes/panpipes/pipeline_preprocess_spatial.py +++ b/panpipes/panpipes/pipeline_preprocess_spatial.py @@ -39,7 +39,7 @@ def gen_filter_jobs(): input_dir = "../qc.data" if not os.path.exists(input_dir): sys.exit("can't find input data") - input_paths=glob.glob(os.path.join(input_dir,"*unfilt.h5mu")) + input_paths=glob.glob(os.path.join(input_dir,"*unfilt.zarr")) for infile_path in input_paths: file_name = os.path.basename(infile_path) outfile = file_name.replace("unfilt","filtered") @@ -55,7 +55,7 @@ def gen_filter_jobs(): def filter_mudata(infile_path,outfile): print('processing file = %s' % str(infile_path)) log_file = os.path.basename(outfile) - log_file= "1_filtering."+log_file.replace("filtered.h5mu","") + ".log" + log_file= "1_filtering."+log_file.replace("filtered.zarr","") + ".log" filter_dict = dictionary_stripper(PARAMS['filtering']) From 5851feb4912a6820e14fc02a96b835cdf3d2f297 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Thu, 24 Oct 2024 16:04:53 +0200 Subject: [PATCH 13/57] bug fix --- panpipes/python_scripts/run_filter_spatial.py | 48 +------------------ 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/panpipes/python_scripts/run_filter_spatial.py b/panpipes/python_scripts/run_filter_spatial.py index c0734ba3..733c8cad 100644 --- a/panpipes/python_scripts/run_filter_spatial.py +++ b/panpipes/python_scripts/run_filter_spatial.py @@ -76,7 +76,7 @@ def test_matching_df_ignore_cat(new_df, old_df): # load mudata L.info("Reading in SpatialData from '%s'" % args.input_mudata) -sdata = sd.read_zarr(args.input_anndata) +sdata = sd.read_zarr(args.input_mudata) #mdata = mu.read(args.input_mudata) #if isinstance(mdata, AnnData): @@ -136,52 +136,6 @@ def test_matching_df_ignore_cat(new_df, old_df): L.info("Remaining features: %d" % sdata["table"].n_vars) -''' -# filter more than -if filter_dict['run']: - # this will go through the modalities one at a time, - # then the categories max, min and bool - for mod in sdata["table"].mod.keys(): - L.info(mod) - if mod in filter_dict.keys(): - for marg in filter_dict[mod].keys(): - if marg == "obs": - if "max" in filter_dict[mod][marg].keys(): - for col, n in filter_dict[mod][marg]['max'].items(): - L.info("Filtering cells of modality '%s' by '%s' in .obs to less than %s" % (mod, col, n)) - mu.pp.filter_obs(sdata["table"].mod[mod], col, lambda x: x <= n) - L.info("Remaining cells: %d" % sdata["table"][mod].n_obs) - if "min" in filter_dict[mod][marg].keys(): - for col, n in filter_dict[mod][marg]['min'].items(): - L.info("Filtering cells of modality '%s' by '%s' in .obs to more than %s" % (mod, col, n)) - mu.pp.filter_obs(sdata["table"].mod[mod], col, lambda x: x >= n) - L.info("Remaining cells: %d" % sdata["table"][mod].n_obs) - if "bool" in filter_dict[mod][marg].keys(): - for col, n in filter_dict[mod][marg]['bool'].items(): - L.info("Filtering cells of modality '%s' by '%s' in .obs marked %s" % (mod, col, n)) - mu.pp.filter_obs(sdata["table"].mod[mod], col, lambda x: x == n) - L.info("Remaining cells: %d" % sdata["table"][mod].n_obs) - if marg == "var": - if "max" in filter_dict[mod][marg].keys(): - for col, n in filter_dict[mod][marg]['max'].items(): - L.info("Filtering features of modality '%s' by '%s' in .var to less than %s" % (mod, col, n)) - mu.pp.filter_var(sdata["table"].mod[mod], col, lambda x: x <= n) - L.info("Remaining features: %d" % sdata["table"][mod].n_vars) - - if "min" in filter_dict[mod][marg].keys(): - for col, n in filter_dict[mod][marg]['min'].items(): - L.info("Filtering features of modality '%s' by '%s' in .var to more than %s" % (mod, col, n)) - mu.pp.filter_var(sdata["table"].mod[mod], col, lambda x: x >= n) - L.info("Remaining features: %d" % sdata["table"][mod].n_vars) - - if "bool" in filter_dict[mod][marg].keys(): - for col, n in filter_dict[mod][marg]['bool'].items(): - L.info("Filtering features of modality '%s' by '%s' in .var marked %s" % (mod, col, n)) - mu.pp.filter_var(sdata["table"].mod[mod], col, lambda x: x == n) - L.info("Remaining features: %d" % sdata["table"][mod].n_vars) - -''' - #mdata.update() From dc969bb8fede4b0a590b41ebd063b411a5ba6b57 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Thu, 24 Oct 2024 16:05:13 +0200 Subject: [PATCH 14/57] change mudata to spatialdata --- .../python_scripts/run_preprocess_spatial.py | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/panpipes/python_scripts/run_preprocess_spatial.py b/panpipes/python_scripts/run_preprocess_spatial.py index bdf28cf6..d4e83f89 100644 --- a/panpipes/python_scripts/run_preprocess_spatial.py +++ b/panpipes/python_scripts/run_preprocess_spatial.py @@ -8,6 +8,7 @@ import scanpy as sc import muon as mu import scanpy.experimental as sce +import spatialdata as sd import os import argparse @@ -88,9 +89,10 @@ sc.settings.figdir = figdir sc.set_figure_params(scanpy=True, fontsize=14, dpi=300, facecolor='white', figsize=(5,5)) -L.info("Reading in MuData from '%s'" % args.input_mudata) -mdata = mu.read(args.input_mudata) -spatial = mdata.mod['spatial'] +L.info("Reading in SpatialData from '%s'" % args.input_mudata) +sdata = sd.read_zarr(args.input_mudata) +#mdata = mu.read(args.input_mudata) +#spatial = mdata.mod['spatial'] input_data = os.path.basename(args.input_mudata) pattern = r"_filtered.h5(.*)" @@ -101,12 +103,12 @@ # check if raw data is available #maybe layer of raw data as parameter L.info("Checking if raw data is available") -if X_is_raw(spatial): +if X_is_raw(sdata["table"]): L.info("Saving raw counts from .X to .layers['raw_counts']") - spatial.layers['raw_counts'] = spatial.X.copy() -elif "raw_counts" in spatial.layers : + sdata["table"].layers['raw_counts'] = sdata["table"].X.copy() +elif "raw_counts" in sdata["table"].layers : L.info(".layers['raw_counts'] already exists and copying it to .X") - spatial.X = spatial.layers['raw_counts'].copy() + sdata["table"].X = sdata["table"].layers['raw_counts'].copy() else: L.error("X is not raw data and 'raw_counts' layer not found") sys.exit("X is not raw data and 'raw_counts' layer not found") @@ -116,24 +118,24 @@ if args.norm_hvg_flavour == "squidpy": if args.squidpy_hvg_flavour == "seurat_v3": L.info("Running HVG selection with flavor seurat_v3") - sc.pp.highly_variable_genes(spatial, flavor="seurat_v3", n_top_genes=int(args.n_top_genes), subset=args.filter_by_hvg, + sc.pp.highly_variable_genes(sdata["table"], flavor="seurat_v3", n_top_genes=int(args.n_top_genes), subset=args.filter_by_hvg, batch_key=args.hvg_batch_key) L.info("Log-normalizing data") - sc.pp.normalize_total(spatial) - sc.pp.log1p(spatial) + sc.pp.normalize_total(sdata["table"]) + sc.pp.log1p(sdata["table"]) else: L.info("Log-normalizing data") - sc.pp.normalize_total(spatial) - sc.pp.log1p(spatial) + sc.pp.normalize_total(sdata["table"]) + sc.pp.log1p(sdata["table"]) L.info("Running HVG selection with flavor %s" % args.squidpy_hvg_flavour) - sc.pp.highly_variable_genes(spatial, flavor=args.squidpy_hvg_flavour, + sc.pp.highly_variable_genes(sdata["table"], flavor=args.squidpy_hvg_flavour, min_mean=float(args.min_mean), max_mean=float(args.max_mean), min_disp=float(args.min_disp), subset=args.filter_by_hvg, batch_key=args.hvg_batch_key) L.info("Saving log-normalized counts to .layers['lognorm']") - spatial.layers["lognorm"] = spatial.X.copy() + sdata["table"].layers["lognorm"] = sdata["table"].X.copy() # plot HVGs: - sc.pl.highly_variable_genes(spatial, show=False, save="_genes_highlyvar" + "."+ sprefix+ ".png") + sc.pl.highly_variable_genes(sdata["table"], show=False, save="_genes_highlyvar" + "."+ sprefix+ ".png") elif args.norm_hvg_flavour == "seurat": if args.clip is None: @@ -145,35 +147,35 @@ else: clip = float(args.clip) L.info("Running Pearson Residuals HVG selection") - sce.pp.highly_variable_genes(spatial, theta=float(args.theta), clip=clip, n_top_genes=int(args.n_top_genes), + sce.pp.highly_variable_genes(sdata["table"], theta=float(args.theta), clip=clip, n_top_genes=int(args.n_top_genes), batch_key=args.hvg_batch_key, flavor='pearson_residuals', layer="raw_counts", subset=args.filter_by_hvg) L.info("Running Pearson Residuals normalization") - sce.pp.normalize_pearson_residuals(spatial, theta=float(args.theta), clip=clip, layer="raw_counts") + sce.pp.normalize_pearson_residuals(sdata["table"], theta=float(args.theta), clip=clip, layer="raw_counts") L.info("Saving log-normalized counts to .layers['norm_pearson_resid']") - spatial.layers["norm_pearson_resid"] = spatial.X.copy() + sdata["table"].layers["norm_pearson_resid"] = sdata["table"].X.copy() else: # error or warning? L.warning("No normalization and HVG selection was performed! To perform, please specify the 'norm_hvg_flavour' as either 'squidpy' or 'seurat'") -if "highly_variable" in spatial.var: - L.info("You have %s Highly Variable Features", np.sum(spatial.var.highly_variable)) +if "highly_variable" in sdata["table"].var: + L.info("You have %s Highly Variable Features", np.sum(sdata["table"].var.highly_variable)) #PCA L.info("Running PCA") -sc.pp.pca(spatial, n_comps=int(args.n_pcs), svd_solver='arpack', random_state=0) +sc.pp.pca(sdata["table"], n_comps=int(args.n_pcs), svd_solver='arpack', random_state=0) L.info("Plotting PCA") -sc.pl.pca(spatial, save = "_vars" + "."+ sprefix+".png") -sc.pl.pca_variance_ratio(spatial, log=True, n_pcs=int(args.n_pcs), save= "."+ sprefix+".png") +sc.pl.pca(sdata["table"], save = "_vars" + "."+ sprefix+".png") +sc.pl.pca_variance_ratio(sdata["table"], log=True, n_pcs=int(args.n_pcs), save= "."+ sprefix+".png") -mdata.update() -L.info("Saving updated MuData to '%s'" % args.output_mudata) -mdata.write(args.output_mudata) +#mdata.update() +L.info("Saving updated SpatialData to '%s'" % args.output_mudata) +sdata["table"].write(args.output_mudata) L.info("Done") From 8920cdeeefd9818918e195dbda328c69cfbd3c2a Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Thu, 24 Oct 2024 16:27:46 +0200 Subject: [PATCH 15/57] bug fix --- panpipes/panpipes/pipeline_qc_spatial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/panpipes/pipeline_qc_spatial.py b/panpipes/panpipes/pipeline_qc_spatial.py index c84ade7a..93cd2fa8 100644 --- a/panpipes/panpipes/pipeline_qc_spatial.py +++ b/panpipes/panpipes/pipeline_qc_spatial.py @@ -127,7 +127,7 @@ def load_mudatas(spatial_path, outfile, @follows(mkdir("qc.data")) @follows(mkdir("./figures")) @transform(load_mudatas, - regex("./tmp/(.*)_raw.h5(.*)"), + regex("./tmp/(.*)_raw.zarr"), r"./logs/2_spatialQC_\1.log") def spatialQC(infile,log_file): spatial_filetype = assays[infile] @@ -175,7 +175,7 @@ def run_plotqc_query(pqc_dict): @follows(mkdir("./figures/spatial")) @active_if(run_plotqc_query(PARAMS['plotqc'])) @transform(load_mudatas, - regex("./tmp/(.*)_raw.h5(.*)"), + regex("./tmp/(.*)_raw.zarr"), r"./logs/3_qcplot.\1.log") def plotQC_spatial(unfilt_file,log_file): spatial_filetype = assays[unfilt_file] From f1ac7857a379f19dbe17b5ac94f7bbfdfb9abdd8 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Thu, 24 Oct 2024 16:36:47 +0200 Subject: [PATCH 16/57] bug fix --- panpipes/panpipes/pipeline_preprocess_spatial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/panpipes/pipeline_preprocess_spatial.py b/panpipes/panpipes/pipeline_preprocess_spatial.py index b7865b00..d7a4053a 100644 --- a/panpipes/panpipes/pipeline_preprocess_spatial.py +++ b/panpipes/panpipes/pipeline_preprocess_spatial.py @@ -85,7 +85,7 @@ def run_plotqc_query(pqc_dict): @active_if(run_plotqc_query(PARAMS['plotqc'])) @active_if(PARAMS['filtering_run']) @transform(filter_mudata, - regex("./filtered.data/(.*)_filtered.h5(.*)"), + regex("./filtered.data/(.*)_filtered.zarr"), r"./logs/2_postfilterplot.\1.log") def postfilterplot_spatial(filt_file,log_file): print(filt_file) @@ -109,7 +109,7 @@ def postfilterplot_spatial(filt_file,log_file): @transform(filter_mudata, - regex("./filtered.data/(.*)_filtered.h5(.*)"), + regex("./filtered.data/(.*)_filtered.zarr"), r"./logs/3_preprocess.\1.log") def spatial_preprocess(filt_file,log_file): if os.path.exists("figures/spatial") is False: From cb165266440d06083fccf7dedf737202f4ac7800 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Thu, 24 Oct 2024 16:37:12 +0200 Subject: [PATCH 17/57] bug fixes --- panpipes/python_scripts/run_preprocess_spatial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/python_scripts/run_preprocess_spatial.py b/panpipes/python_scripts/run_preprocess_spatial.py index d4e83f89..5e389e04 100644 --- a/panpipes/python_scripts/run_preprocess_spatial.py +++ b/panpipes/python_scripts/run_preprocess_spatial.py @@ -95,7 +95,7 @@ #spatial = mdata.mod['spatial'] input_data = os.path.basename(args.input_mudata) -pattern = r"_filtered.h5(.*)" +pattern = r"_filtered.zarr" match = re.search(pattern, input_data) sprefix = input_data[:match.start()] @@ -175,7 +175,7 @@ #mdata.update() L.info("Saving updated SpatialData to '%s'" % args.output_mudata) -sdata["table"].write(args.output_mudata) +sdata.write(args.output_mudata) L.info("Done") From 24f035ac9707641c3fd79beb4287b2072b6529d0 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Thu, 7 Nov 2024 15:05:26 +0100 Subject: [PATCH 18/57] add vpt parameters --- panpipes/funcs/io.py | 63 +++++++++++++++--------- panpipes/panpipes/pipeline_qc_spatial.py | 37 ++++++++------ 2 files changed, 62 insertions(+), 38 deletions(-) diff --git a/panpipes/funcs/io.py b/panpipes/funcs/io.py index c6ff2c5c..b50496da 100644 --- a/panpipes/funcs/io.py +++ b/panpipes/funcs/io.py @@ -157,40 +157,58 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): else: spatial_path = caf["spatial_path"][nn] if caf['spatial_filetype'][nn]=="vizgen": - spatial_counts = None - spatial_fullres_image_file = None - spatial_tissue_positions_file = None - spatial_scalefactors_file = None + visium_feature_bc_matrix = None + visium_fullres_image_file = None + visium_tissue_positions_file = None + visium_scalefactors_file = None spatial_filetype = caf['spatial_filetype'][nn] + if pd.notna(caf['vpt_cell_by_gene'][nn]): + vpt_cell_by_gene = caf['vpt_cell_by_gene'][nn] + else: + vpt_cell_by_gene = None + if pd.notna(caf['vpt_cell_metadata'][nn]): + vpt_cell_metadata = caf['vpt_cell_metadata'][nn] + else: + vpt_cell_metadata = None + if pd.notna(caf['vpt_cell_boundaries'][nn]): + vpt_cell_boundaries = caf['vpt_cell_boundaries'][nn] + else: + vpt_cell_boundaries = None elif caf['spatial_filetype'][nn]=="visium": + vpt_cell_by_gene = None + vpt_cell_metadata = None + vpt_cell_boundaries = None spatial_filetype = caf['spatial_filetype'][nn] #counts file - if pd.notna(caf["spatial_counts"][nn]): - spatial_counts= caf["spatial_counts"][nn] + if pd.notna(caf["visium_feature_bc_matrix"][nn]): + visium_feature_bc_matrix= caf["visium_feature_bc_matrix"][nn] else: - spatial_counts = None + visium_feature_bc_matrix = None # fullres image - if pd.notna(caf["spatial_fullres_image_file"][nn]): - spatial_fullres_image_file= caf["spatial_fullres_image_file"][nn] + if pd.notna(caf["visium_fullres_image_file"][nn]): + visium_fullres_image_file= caf["visium_fullres_image_file"][nn] else: - spatial_fullres_image_file = None + visium_fullres_image_file = None # tissue position - if pd.notna(caf["spatial_tissue_positions_file"][nn]): - spatial_tissue_positions_file= caf["spatial_tissue_positions_file"][nn] + if pd.notna(caf["visium_tissue_positions_file"][nn]): + visium_tissue_positions_file= caf["visium_tissue_positions_file"][nn] else: - spatial_tissue_positions_file = None + visium_tissue_positions_file = None # scalefactor - if pd.notna(caf["spatial_scalefactors_file"][nn]): - spatial_scalefactors_file= caf["spatial_scalefactors_file"][nn] + if pd.notna(caf["visium_scalefactors_file"][nn]): + visium_scalefactors_file= caf["visium_scalefactors_file"][nn] else: - spatial_scalefactors_file = None + visium_scalefactors_file = None else: spatial_path= None spatial_filetype = None - spatial_counts = None - spatial_fullres_image_file = None - spatial_tissue_positions_file = None - spatial_scalefactors_file = None + visium_feature_bc_matrix = None + visium_fullres_image_file = None + visium_tissue_positions_file = None + visium_scalefactors_file = None + vpt_cell_by_gene = None + vpt_cell_metadata = None + vpt_cell_boundaries = None if 'barcode_mtd_path' in caf.columns: cell_mtd_path = caf['barcode_mtd_path'][nn] #not yielding this right now! @@ -204,8 +222,9 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): outfile = outfile + ".zarr" sample_id = caf['sample_id'][nn] - yield spatial_path, outfile, \ - sample_id, spatial_filetype, spatial_counts, spatial_fullres_image_file, spatial_tissue_positions_file, spatial_scalefactors_file + yield spatial_path, outfile, sample_id, spatial_filetype, \ + visium_feature_bc_matrix, visium_fullres_image_file, visium_tissue_positions_file, visium_scalefactors_file, \ + vpt_cell_by_gene, vpt_cell_metadata, vpt_cell_boundaries def read_anndata( diff --git a/panpipes/panpipes/pipeline_qc_spatial.py b/panpipes/panpipes/pipeline_qc_spatial.py index 93cd2fa8..3bc90556 100644 --- a/panpipes/panpipes/pipeline_qc_spatial.py +++ b/panpipes/panpipes/pipeline_qc_spatial.py @@ -74,8 +74,8 @@ def gen_load_spatial_anndata_jobs(): @follows(mkdir("tmp")) @files(gen_load_spatial_anndata_jobs) def load_mudatas(spatial_path, outfile, - sample_id, spatial_filetype, spatial_counts, - spatial_fullres_image_file, spatial_tissue_positions_file, spatial_scalefactors_file): + sample_id, spatial_filetype, visium_feature_bc_matrix, visium_fullres_image_file, visium_tissue_positions_file, visium_scalefactors_file, + vpt_cell_by_gene, vpt_cell_metadata, vpt_cell_boundaries): path_dict = {'spatial':spatial_path} @@ -83,17 +83,16 @@ def load_mudatas(spatial_path, outfile, print('sample_id = %s' % str(sample_id)) print('outfile = %s' % str(outfile)) print('spatial_filetype = %s' % str(spatial_filetype)) - #print('spatial_counts = %s' % str(spatial_counts)) - #if spatial_filetype == "vizgen": - # print('spatial_metadata = %s' % str(spatial_metadata)) - # print('spatial_transformation = %s' % str(spatial_transformation)) - #else: - # print("visium") + if spatial_filetype == "visium": - print('spatial_counts = %s' % str(spatial_counts)) - print('spatial_fullres_image_file= %s' % str(spatial_fullres_image_file)) - print('spatial_tissue_positions_file= %s' % str(spatial_tissue_positions_file)) - print('spatial_scalefactors_file= %s' % str(spatial_scalefactors_file)) + print('visium_feature_bc_matrix = %s' % str(visium_feature_bc_matrix)) + print('visium_fullres_image_file= %s' % str(visium_fullres_image_file)) + print('visium_tissue_positions_file= %s' % str(visium_tissue_positions_file)) + print('visium_scalefactors_file= %s' % str(visium_scalefactors_file)) + if spatial_filetype == "vizgen": + print('vpt_cell_by_gene = %s' % str(vpt_cell_by_gene)) + print('vpt_cell_metadata= %s' % str(vpt_cell_metadata)) + print('vpt_cell_boundaries= %s' % str(vpt_cell_boundaries)) modality_dict = {k:True if path_dict[k] is not None else False for k,v in {'spatial': True}.items() } print(modality_dict) @@ -109,10 +108,16 @@ def load_mudatas(spatial_path, outfile, """ if spatial_filetype == "visium": cmd += """ - --spatial_counts %(spatial_counts)s - --scalefactors_file %(spatial_scalefactors_file)s - --fullres_image_file %(spatial_fullres_image_file)s - --tissue_positions_file %(spatial_tissue_positions_file)s + --visium_feature_bc_matrix %(visium_feature_bc_matrix)s + --scalefactors_file %(visium_scalefactors_file)s + --fullres_image_file %(visium_fullres_image_file)s + --tissue_positions_file %(visium_tissue_positions_file)s + """ + if spatial_filetype == "vizgen": + cmd += """ + --vpt_cell_by_gene %(vpt_cell_by_gene)s + --vpt_cell_metadata %(vpt_cell_metadata)s + --vpt_cell_boundaries %(vpt_cell_boundaries)s """ cmd += " > logs/1_make_mudatas_%(sample_id)s.log" job_kwargs["job_threads"] = PARAMS['resources_threads_medium'] From ac41674d896ff14e2ff94773e7eb809976b27f88 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Thu, 7 Nov 2024 15:25:52 +0100 Subject: [PATCH 19/57] add vpt output parameter --- .../make_spatialData_from_csv.py | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/panpipes/python_scripts/make_spatialData_from_csv.py b/panpipes/python_scripts/make_spatialData_from_csv.py index 81050809..d34c4822 100644 --- a/panpipes/python_scripts/make_spatialData_from_csv.py +++ b/panpipes/python_scripts/make_spatialData_from_csv.py @@ -11,6 +11,7 @@ import spatialdata_io as sd_io from mudata import MuData import os +from pathlib import Path """ this script copies the make_adata_from_csv.py that creates ONE MUDATA PER SAMPLE, with in each ONE LAYER per modality @@ -50,7 +51,7 @@ parser.add_argument('--spatial_filetype', default=None, help='') -parser.add_argument('--spatial_counts', +parser.add_argument('--visium_feature_bc_matrix', default=None, help='') parser.add_argument('--scalefactors_file', @@ -62,12 +63,15 @@ parser.add_argument('--tissue_positions_file', default=None, help='') -#parser.add_argument('--spatial_metadata', -# default=None, -# help='') -#parser.add_argument('--spatial_transformation', -# default=None, -# help='') +parser.add_argument('--vpt_cell_by_gene', + default=None, + help='') +parser.add_argument('--vpt_cell_metadata', + default=None, + help='') +parser.add_argument('--vpt_cell_boundaries', + default=None, + help='') parser.set_defaults(verbose=True) args, opt = parser.parse_known_args() @@ -84,10 +88,13 @@ all_files = { "spatial":[args.spatial_infile, #path args.spatial_filetype, #needed for the load_adata_in function to call one of vizgen,visium - args.spatial_counts, #name of the counts file, mandatory for squidpy + args.visium_feature_bc_matrix, #name of the counts file, mandatory for squidpy args.fullres_image_file, # visium args.tissue_positions_file, #visium - args.scalefactors_file]} # visium + args.scalefactors_file, + args.vpt_cell_by_gene, + args.vpt_cell_metadata, + args.vpt_cell_boundaries ]} # visium # args.spatial_metadata, #name of the metadata file, mandatory for squidpy # args.spatial_transformation]} #subset to the modalities we want from permf (in this case only spatial) @@ -130,26 +137,25 @@ def check_dir_transform(infile_path, transform_file): if args.spatial_filetype=="vizgen": L.info("Reading in Vizgen data with squidpy.read.vizgen() into AnnData from directory " + args.spatial_infile) - sdata = sd_io.merscope(path = args.spatial_infile) -# adata = sq.read.vizgen(path = args.spatial_infile, #path, mandatory for squidpy -# counts_file=args.spatial_counts, #name of the counts file, mandatory for squidpy -# meta_file = args.spatial_metadata, #name of the metadata file, mandatory for squidpy -# transformation_file=args.spatial_transformation, -# library_id = str(args.sample_id)) #this also has kwargs for read_10x_h5 but keep simple -# adata.uns["spatial"][str(args.sample_id)]["scalefactors"]["transformation_matrix"].columns = adata.uns["spatial"][str(args.sample_id)]["scalefactors"]["transformation_matrix"].columns.astype(str) + # check that all vpt parameters are not None + if None not in (args.vpt_cell_by_gene, args.vpt_cell_metadata, args.vpt_cell_boundaries): + vpt_outputs = {'cell_by_gene': Path(args.vpt_cell_by_gene) , + 'cell_metadata': Path(args.vpt_cell_metadata) , + 'cell_boundaries': Path(args.vpt_cell_boundaries)} + sdata = sd_io.merscope(path = args.spatial_infile, vpt_outputs=vpt_outputs) + else: + sdata = sd_io.merscope(path = args.spatial_infile) + elif args.spatial_filetype =="visium": L.info("Reading in Visium data with squidpy.read.visium() into AnnData from directory " + args.spatial_infile) sdata = sd_io.visium(path=args.spatial_infile, dataset_id=str(args.sample_id), - counts_file=args.spatial_counts, + counts_file=args.visium_feature_bc_matrix, fullres_image_file=args.fullres_image_file, tissue_positions_file=args.tissue_positions_file, scalefactors_file=args.scalefactors_file) - #adata = sq.read.visium(path = args.spatial_infile, #path, mandatory for squidpy - # counts_file=args.spatial_counts, #name of the counts file, mandatory for squidpy - # library_id = str(args.sample_id) - # ) #this also has kwargs for read_10x_h5 but keep simple - + + L.info("Resulting SpatialData is:") L.info(sdata) #L.info("Creating MuData with .mod['spatial']") From 9acc5d0a3b39eeb539fb720e9ba6001d898683a5 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 11 Nov 2024 11:35:55 +0100 Subject: [PATCH 20/57] add xenium --- panpipes/funcs/io.py | 9 +++++++++ panpipes/python_scripts/make_spatialData_from_csv.py | 4 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/panpipes/funcs/io.py b/panpipes/funcs/io.py index b50496da..af41d6ff 100644 --- a/panpipes/funcs/io.py +++ b/panpipes/funcs/io.py @@ -156,6 +156,15 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): spatial_filetype = None else: spatial_path = caf["spatial_path"][nn] + if caf['spatial_filetype'][nn]=="xenium": + spatial_filetype = caf['spatial_filetype'][nn] + visium_feature_bc_matrix = None + visium_fullres_image_file = None + visium_tissue_positions_file = None + visium_scalefactors_file = None + vpt_cell_by_gene = None + vpt_cell_metadata = None + vpt_cell_boundaries = None if caf['spatial_filetype'][nn]=="vizgen": visium_feature_bc_matrix = None visium_fullres_image_file = None diff --git a/panpipes/python_scripts/make_spatialData_from_csv.py b/panpipes/python_scripts/make_spatialData_from_csv.py index d34c4822..0d22a5c6 100644 --- a/panpipes/python_scripts/make_spatialData_from_csv.py +++ b/panpipes/python_scripts/make_spatialData_from_csv.py @@ -155,7 +155,9 @@ def check_dir_transform(infile_path, transform_file): tissue_positions_file=args.tissue_positions_file, scalefactors_file=args.scalefactors_file) - +elif args.spatial_filetype =="xenium": + sdata = sd_io.xenium(path = args.spatial_infile) + L.info("Resulting SpatialData is:") L.info(sdata) #L.info("Creating MuData with .mod['spatial']") From 8ab1570e057ee3a8fe8a72d80470d429d51460c6 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 26 Nov 2024 11:06:05 +0100 Subject: [PATCH 21/57] adjust sample submission file --- .github/workflows/spatial_ingestion_visium-ci.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/spatial_ingestion_visium-ci.yml b/.github/workflows/spatial_ingestion_visium-ci.yml index 8fb57e05..a3bac8ca 100644 --- a/.github/workflows/spatial_ingestion_visium-ci.yml +++ b/.github/workflows/spatial_ingestion_visium-ci.yml @@ -78,12 +78,11 @@ jobs: - name: Preparing the submission file run: | cd spatial/ingestion - curl -o sample_file_qc_spatial.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/main/docs/ingesting_visium_data/sample_file_qc_spatial.txt - + curl -o sample_file_qc_spatial.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_visium_data/sample_file_qc_visium.txt - name: Preparing the yaml file run: | cd spatial/ingestion - curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/main/docs/ingesting_visium_data/pipeline.yml + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_visium_data/pipeline.yml - name: File tree if: env.debug == 'true' From f3b8747b3ade6b9c851c105c55ce42325ede74d6 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 26 Nov 2024 11:08:41 +0100 Subject: [PATCH 22/57] add spatialdata --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2dc39d84..b888fba6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,9 @@ spatial = [ "scipy==1.12.0", "squidpy", "cell2location", - "tangram-sc" + "tangram-sc", + "spatialdata", + "spatialdata-io" ] refmap_old = [ From 1937c1a63b6961162ab75e8726132d8cd823c6b0 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 26 Nov 2024 11:14:34 +0100 Subject: [PATCH 23/57] change file name --- .github/workflows/spatial_ingestion_visium-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/spatial_ingestion_visium-ci.yml b/.github/workflows/spatial_ingestion_visium-ci.yml index a3bac8ca..77da20f2 100644 --- a/.github/workflows/spatial_ingestion_visium-ci.yml +++ b/.github/workflows/spatial_ingestion_visium-ci.yml @@ -78,7 +78,7 @@ jobs: - name: Preparing the submission file run: | cd spatial/ingestion - curl -o sample_file_qc_spatial.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_visium_data/sample_file_qc_visium.txt + curl -o sample_file_qc_visium.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_visium_data/sample_file_qc_visium.txt - name: Preparing the yaml file run: | cd spatial/ingestion From 5d172140fbc123afad9ebfa6cff9c705935e3b4c Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 26 Nov 2024 11:25:20 +0100 Subject: [PATCH 24/57] adjust submission file --- .github/workflows/spatial_ingestion_merfish-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/spatial_ingestion_merfish-ci.yml b/.github/workflows/spatial_ingestion_merfish-ci.yml index 89812404..a270bfbf 100644 --- a/.github/workflows/spatial_ingestion_merfish-ci.yml +++ b/.github/workflows/spatial_ingestion_merfish-ci.yml @@ -75,12 +75,12 @@ jobs: - name: Preparing the submission file run: | cd spatial/ingestion_merfish - curl -o sample_file_qc_spatial.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/main/docs/ingesting_merfish_data/sample_file_qc_spatial.txt + curl -o sample_file_qc_merfish.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_merfish_data/sample_file_qc_merfish.txt - name: Preparing the yaml file run: | cd spatial/ingestion_merfish - curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/main/docs/ingesting_merfish_data/pipeline.yml + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_merfish_data/pipeline.yml - name: File tree if: env.debug == 'true' From 72f5d5bd2db9c9d3ae19fe4cddea97c2427cdf48 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 26 Nov 2024 11:33:54 +0100 Subject: [PATCH 25/57] change None to string --- panpipes/python_scripts/make_spatialData_from_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/panpipes/python_scripts/make_spatialData_from_csv.py b/panpipes/python_scripts/make_spatialData_from_csv.py index 0d22a5c6..69303ea4 100644 --- a/panpipes/python_scripts/make_spatialData_from_csv.py +++ b/panpipes/python_scripts/make_spatialData_from_csv.py @@ -138,7 +138,7 @@ def check_dir_transform(infile_path, transform_file): if args.spatial_filetype=="vizgen": L.info("Reading in Vizgen data with squidpy.read.vizgen() into AnnData from directory " + args.spatial_infile) # check that all vpt parameters are not None - if None not in (args.vpt_cell_by_gene, args.vpt_cell_metadata, args.vpt_cell_boundaries): + if "None" not in (args.vpt_cell_by_gene, args.vpt_cell_metadata, args.vpt_cell_boundaries): vpt_outputs = {'cell_by_gene': Path(args.vpt_cell_by_gene) , 'cell_metadata': Path(args.vpt_cell_metadata) , 'cell_boundaries': Path(args.vpt_cell_boundaries)} From afd637858b14b44b0472b99e3bd56ede5c1fb9c9 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 26 Nov 2024 11:42:47 +0100 Subject: [PATCH 26/57] rename data files --- .github/workflows/spatial_ingestion_merfish-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/spatial_ingestion_merfish-ci.yml b/.github/workflows/spatial_ingestion_merfish-ci.yml index a270bfbf..e0b8ae06 100644 --- a/.github/workflows/spatial_ingestion_merfish-ci.yml +++ b/.github/workflows/spatial_ingestion_merfish-ci.yml @@ -58,11 +58,11 @@ jobs: run: | mkdir spatial spatial/ingestion_merfish spatial/ingestion_merfish/data cd spatial/ingestion_merfish/data - curl -L -o datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_cell_by_gene_S1R1.csv https://figshare.com/ndownloader/files/45028624 - curl -L -o datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_cell_metadata_S1R1.csv https://figshare.com/ndownloader/files/45028621 + curl -L -o cell_by_gene.csv https://figshare.com/ndownloader/files/45028624 + curl -L -o cell_metadata.csv https://figshare.com/ndownloader/files/45028621 mkdir images cd images - curl -L -o datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_images_micron_to_mosaic_pixel_transform.csv https://figshare.com/ndownloader/files/45028645 + curl -L -o micron_to_mosaic_pixel_transform.csv https://figshare.com/ndownloader/files/45028645 # Note: we run the following to test that the commands works From 90ef6e605ff45f9e66f0777c90d401f4c7a911ef Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Sun, 1 Dec 2024 11:57:38 +0100 Subject: [PATCH 27/57] add ingest merscope action --- .../spatial_ingestion_merscope-ci.yml | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 .github/workflows/spatial_ingestion_merscope-ci.yml diff --git a/.github/workflows/spatial_ingestion_merscope-ci.yml b/.github/workflows/spatial_ingestion_merscope-ci.yml new file mode 100644 index 00000000..0d10ac0d --- /dev/null +++ b/.github/workflows/spatial_ingestion_merscope-ci.yml @@ -0,0 +1,105 @@ +name: Run tutorials (spatial ingest merscope) + +on: + push: + branches: + - main + pull_request: + branches: + - main + +env: + debug: 'true' + +jobs: + spatial_ingest_merscope: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] # , "macos-latest", "windows-latest" + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: File tree + if: env.debug == 'true' + run: tree + + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest + auto-activate-base: true + auto-update-conda: true + channels: conda-forge + channel-priority: strict + activate-environment: pipeline_env + environment-file: pipeline_env.yaml + + - name: Install Panpipes + shell: bash -el {0} + run: | + pip install .[spatial] + conda list + + - name: Conda info + if: env.debug == 'true' + shell: bash -el {0} + run: conda info + + - name: Conda list + if: env.debug == 'true' + shell: pwsh + run: conda list + + + - name: Preparing the data + run: | + mkdir spatial spatial/ingestion_merscope spatial/ingestion_merscope/data + cd spatial/ingestion_merscope/data + curl -L -o cell_by_gene.csv https://figshare.com/ndownloader/files/50899455 + curl -L -o cell_metadata.csv https://figshare.com/ndownloader/files/50899452 + curl -L -o cellpose_micron_space.parquet https://figshare.com/ndownloader/files/50899458 + curl -L -o detected_transcripts.csv https://figshare.com/ndownloader/files/50899476 + mkdir images + cd images + curl -L -o micron_to_mosaic_pixel_transform.csv https://figshare.com/ndownloader/files/50899449 + + + # Note: we run the following to test that the commands works + - name: Preparing the configuration file + shell: bash -el {0} + run: | + cd spatial/ingestion_merscope + panpipes qc_spatial config + + - name: Preparing the submission file + run: | + cd spatial/ingestion_merscope + curl -o sample_file_qc_merscope.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_merscope_data/sample_file_qc_merscope.txt + + - name: Preparing the yaml file + run: | + cd spatial/ingestion_merscope + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_merscope_data/pipeline.yml + + - name: File tree + if: env.debug == 'true' + run: tree spatial/ingestion_merscope + + - name: Review pipeline tasks + shell: bash -el {0} + run: | + cd spatial/ingestion_merscope + panpipes qc_spatial show full --local + + - name: Run pipeline tasks + shell: bash -el {0} + run: | + cd spatial/ingestion_merscope + panpipes qc_spatial make full --local + + - name: File tree + if: env.debug == 'true' + run: tree spatial/ingestion_merscope From 5fef8cf3c5f7e7a6b3d5cd15e71718d6ed3d3f8f Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Sun, 1 Dec 2024 12:07:34 +0100 Subject: [PATCH 28/57] test to pin squidpy version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b888fba6..199bb99b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ spatial = [ "jaxlib==0.4.23", "scvi-tools==1.0.4", "scipy==1.12.0", - "squidpy", + "squidpy>1.6.1", "cell2location", "tangram-sc", "spatialdata", From 1f33c411ac3d16bee5675f62a90d261b460bfa57 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 12:05:40 +0100 Subject: [PATCH 29/57] remove detected transcripts --- .github/workflows/spatial_ingestion_merscope-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/spatial_ingestion_merscope-ci.yml b/.github/workflows/spatial_ingestion_merscope-ci.yml index 0d10ac0d..3a2da28a 100644 --- a/.github/workflows/spatial_ingestion_merscope-ci.yml +++ b/.github/workflows/spatial_ingestion_merscope-ci.yml @@ -61,7 +61,7 @@ jobs: curl -L -o cell_by_gene.csv https://figshare.com/ndownloader/files/50899455 curl -L -o cell_metadata.csv https://figshare.com/ndownloader/files/50899452 curl -L -o cellpose_micron_space.parquet https://figshare.com/ndownloader/files/50899458 - curl -L -o detected_transcripts.csv https://figshare.com/ndownloader/files/50899476 + # curl -L -o detected_transcripts.csv https://figshare.com/ndownloader/files/50899476 mkdir images cd images curl -L -o micron_to_mosaic_pixel_transform.csv https://figshare.com/ndownloader/files/50899449 From 2f4392c4756320605cc59ca1382c8a7926976062 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 12:05:56 +0100 Subject: [PATCH 30/57] remove squidpy version pin --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 199bb99b..b888fba6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ spatial = [ "jaxlib==0.4.23", "scvi-tools==1.0.4", "scipy==1.12.0", - "squidpy>1.6.1", + "squidpy", "cell2location", "tangram-sc", "spatialdata", From cf4ff073a8b074d7f7bae248bcacfb95b2b9fdeb Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 12:06:09 +0100 Subject: [PATCH 31/57] remove unnecessary imports --- panpipes/python_scripts/make_spatialData_from_csv.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/panpipes/python_scripts/make_spatialData_from_csv.py b/panpipes/python_scripts/make_spatialData_from_csv.py index 69303ea4..d6c85ac6 100644 --- a/panpipes/python_scripts/make_spatialData_from_csv.py +++ b/panpipes/python_scripts/make_spatialData_from_csv.py @@ -1,15 +1,15 @@ import argparse import yaml # import scanpy as sc -import pandas as pd +#import pandas as pd # import numpy as np # from scipy.sparse import csr_matrix -import muon as mu -import warnings -from muon._atac.tools import add_peak_annotation, locate_fragments -import squidpy as sq +#import muon as mu +#import warnings +#from muon._atac.tools import add_peak_annotation, locate_fragments +#import squidpy as sq import spatialdata_io as sd_io -from mudata import MuData +#from mudata import MuData import os from pathlib import Path """ From 124d849656e32163eb4657d2839d7d8e6d011ef2 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 12:06:22 +0100 Subject: [PATCH 32/57] add ingest xenium action --- .../workflows/spatial_ingestion_xenium.yml | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 .github/workflows/spatial_ingestion_xenium.yml diff --git a/.github/workflows/spatial_ingestion_xenium.yml b/.github/workflows/spatial_ingestion_xenium.yml new file mode 100644 index 00000000..0cd3fcdb --- /dev/null +++ b/.github/workflows/spatial_ingestion_xenium.yml @@ -0,0 +1,106 @@ +name: Run tutorials (spatial ingest xenium) + +on: + push: + branches: + - main + pull_request: + branches: + - main + +env: + debug: 'true' + +jobs: + spatial_ingest_xenium: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] # , "macos-latest", "windows-latest" + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: File tree + if: env.debug == 'true' + run: tree + + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest + auto-activate-base: true + auto-update-conda: true + channels: conda-forge + channel-priority: strict + activate-environment: pipeline_env + environment-file: pipeline_env.yaml + + - name: Install Panpipes + shell: bash -el {0} + run: | + pip install .[spatial] + conda list + + - name: Conda info + if: env.debug == 'true' + shell: bash -el {0} + run: conda info + + - name: Conda list + if: env.debug == 'true' + shell: pwsh + run: conda list + + + - name: Preparing the data + run: | + mkdir spatial spatial/ingestion_xenium spatial/ingestion_xenium/data + cd spatial/ingestion_xenium/data + curl -L -o experiment.xenium https://figshare.com/ndownloader/files/51243614 + curl -L -o nucleus_boundaries.parquet https://figshare.com/ndownloader/files/51243605 + curl -L -o cell_boundaries.parquet https://figshare.com/ndownloader/files/51243596 + curl -L -o transcripts.parquet https://figshare.com/ndownloader/files/51243608 + curl -L -o cell_feature_matrix.h5 https://figshare.com/ndownloader/files/51243599 + curl -L -o cells.parquet https://figshare.com/ndownloader/files/51243620 + curl -L -o morphology_mip.ome.tif https://figshare.com/ndownloader/files/51243623 + curl -L -o morphology_focus.ome.tif https://figshare.com/ndownloader/files/51243626 + + + # Note: we run the following to test that the commands works + - name: Preparing the configuration file + shell: bash -el {0} + run: | + cd spatial/ingestion_xenium + panpipes qc_spatial config + + - name: Preparing the submission file + run: | + cd spatial/ingestion_xenium + curl -o sample_file_qc_xenium.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_xenium_data/sample_file_qc_xenium.txt + + - name: Preparing the yaml file + run: | + cd spatial/ingestion_xenium + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_xenium_data/pipeline.yml + + - name: File tree + if: env.debug == 'true' + run: tree spatial/ingestion_xenium + + - name: Review pipeline tasks + shell: bash -el {0} + run: | + cd spatial/ingestion_xenium + panpipes qc_spatial show full --local + + - name: Run pipeline tasks + shell: bash -el {0} + run: | + cd spatial/ingestion_xenium + panpipes qc_spatial make full --local + + - name: File tree + if: env.debug == 'true' + run: tree spatial/ingestion_xenium From a79bc580134a9f312a5a275f30f92d4b7f8ae16d Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 12:32:53 +0100 Subject: [PATCH 33/57] add cells.zarr file --- .github/workflows/spatial_ingestion_xenium.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/spatial_ingestion_xenium.yml b/.github/workflows/spatial_ingestion_xenium.yml index 0cd3fcdb..b33b7f93 100644 --- a/.github/workflows/spatial_ingestion_xenium.yml +++ b/.github/workflows/spatial_ingestion_xenium.yml @@ -66,6 +66,7 @@ jobs: curl -L -o cells.parquet https://figshare.com/ndownloader/files/51243620 curl -L -o morphology_mip.ome.tif https://figshare.com/ndownloader/files/51243623 curl -L -o morphology_focus.ome.tif https://figshare.com/ndownloader/files/51243626 + curl -L -o cells.zarr.zip https://figshare.com/ndownloader/files/51244049 # Note: we run the following to test that the commands works From 218f437676f33376cab63ee44bb805b12af76432 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 12:41:32 +0100 Subject: [PATCH 34/57] add morphology focus folder --- .github/workflows/spatial_ingestion_xenium.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/spatial_ingestion_xenium.yml b/.github/workflows/spatial_ingestion_xenium.yml index b33b7f93..58ab69de 100644 --- a/.github/workflows/spatial_ingestion_xenium.yml +++ b/.github/workflows/spatial_ingestion_xenium.yml @@ -65,8 +65,10 @@ jobs: curl -L -o cell_feature_matrix.h5 https://figshare.com/ndownloader/files/51243599 curl -L -o cells.parquet https://figshare.com/ndownloader/files/51243620 curl -L -o morphology_mip.ome.tif https://figshare.com/ndownloader/files/51243623 - curl -L -o morphology_focus.ome.tif https://figshare.com/ndownloader/files/51243626 curl -L -o cells.zarr.zip https://figshare.com/ndownloader/files/51244049 + mkdir morphology_focus + cd morphology_focus + curl -L -o morphology_focus.ome.tif https://figshare.com/ndownloader/files/51243626 # Note: we run the following to test that the commands works From 89a19bfb44f99865e85362906a02d2d8079dd500 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 12:41:49 +0100 Subject: [PATCH 35/57] adjust spatial preprocessing to spatialData --- .github/workflows/spatial_preprocess-ci.yml | 60 +++++++++++++++++---- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/.github/workflows/spatial_preprocess-ci.yml b/.github/workflows/spatial_preprocess-ci.yml index f9a6123f..fee80bce 100644 --- a/.github/workflows/spatial_preprocess-ci.yml +++ b/.github/workflows/spatial_preprocess-ci.yml @@ -53,19 +53,62 @@ jobs: shell: pwsh run: conda list - - name: Preparing the data run: | - mkdir spatial spatial/preprocess spatial/preprocess/data - cd spatial/preprocess/data - - curl -L -o V1_Human_Heart_unfilt.h5mu https://figshare.com/ndownloader/files/45031048 - curl -L -o V1_Human_Lymph_Node_unfilt.h5mu https://figshare.com/ndownloader/files/45031051 + mkdir spatial spatial/ingestion spatial/ingestion/data + cd spatial/ingestion/data + mkdir V1_Human_Heart V1_Human_Lymph_Node + cd V1_Human_Heart + curl -O https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Heart/V1_Human_Heart_filtered_feature_bc_matrix.h5 + curl -O https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Heart/V1_Human_Heart_spatial.tar.gz + tar -xf V1_Human_Heart_spatial.tar.gz + cd ../V1_Human_Lymph_Node + curl -O https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5 + curl -O https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_spatial.tar.gz + tar -xf V1_Human_Lymph_Node_spatial.tar.gz # Note: we run the following to test that the commands works - name: Preparing the configuration file shell: bash -el {0} run: | + cd spatial/ingestion + panpipes qc_spatial config + + - name: Preparing the submission file + run: | + cd spatial/ingestion + curl -o sample_file_qc_visium.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_visium_data/sample_file_qc_visium.txt + - name: Preparing the yaml file + run: | + cd spatial/ingestion + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_visium_data/pipeline.yml + + - name: File tree + if: env.debug == 'true' + run: tree spatial/ingestion + + - name: Review pipeline tasks + shell: bash -el {0} + run: | + cd spatial/ingestion + panpipes qc_spatial show full --local + + - name: Run pipeline tasks + shell: bash -el {0} + run: | + cd spatial/ingestion + panpipes qc_spatial make full --local + + - name: File tree + if: env.debug == 'true' + run: tree spatial/ingestion + + + # Note: we run the following to test that the commands works + - name: Preparing the configuration file + shell: bash -el {0} + run: | + mkdir spatial/preprocess cd spatial/preprocess panpipes preprocess_spatial config @@ -74,11 +117,6 @@ jobs: cd spatial/preprocess curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/main/docs/preprocess_spatial_data/pipeline.yml - - name: Replace template contents in configuration file - run: | - cd spatial/preprocess - sed -i 's+../ingestion/qc.data/+./data/+g' pipeline.yml - - name: File tree if: env.debug == 'true' run: tree spatial/preprocess From 5fa72b0493d81cf933a33a072873c84da8653a7f Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 13:02:53 +0100 Subject: [PATCH 36/57] adjust file name --- .github/workflows/spatial_ingestion_xenium.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/spatial_ingestion_xenium.yml b/.github/workflows/spatial_ingestion_xenium.yml index 58ab69de..87382353 100644 --- a/.github/workflows/spatial_ingestion_xenium.yml +++ b/.github/workflows/spatial_ingestion_xenium.yml @@ -68,7 +68,7 @@ jobs: curl -L -o cells.zarr.zip https://figshare.com/ndownloader/files/51244049 mkdir morphology_focus cd morphology_focus - curl -L -o morphology_focus.ome.tif https://figshare.com/ndownloader/files/51243626 + curl -L -o morphology_focus_0000.ome.tif https://figshare.com/ndownloader/files/51243626 # Note: we run the following to test that the commands works From 7e04d41dd39ce95c49bae24f85e3956dc0b9763c Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 17 Dec 2024 13:11:44 +0100 Subject: [PATCH 37/57] xenium adjust figshare links --- .github/workflows/spatial_ingestion_xenium.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/spatial_ingestion_xenium.yml b/.github/workflows/spatial_ingestion_xenium.yml index 87382353..366beb3d 100644 --- a/.github/workflows/spatial_ingestion_xenium.yml +++ b/.github/workflows/spatial_ingestion_xenium.yml @@ -58,17 +58,17 @@ jobs: run: | mkdir spatial spatial/ingestion_xenium spatial/ingestion_xenium/data cd spatial/ingestion_xenium/data - curl -L -o experiment.xenium https://figshare.com/ndownloader/files/51243614 - curl -L -o nucleus_boundaries.parquet https://figshare.com/ndownloader/files/51243605 - curl -L -o cell_boundaries.parquet https://figshare.com/ndownloader/files/51243596 - curl -L -o transcripts.parquet https://figshare.com/ndownloader/files/51243608 - curl -L -o cell_feature_matrix.h5 https://figshare.com/ndownloader/files/51243599 - curl -L -o cells.parquet https://figshare.com/ndownloader/files/51243620 - curl -L -o morphology_mip.ome.tif https://figshare.com/ndownloader/files/51243623 - curl -L -o cells.zarr.zip https://figshare.com/ndownloader/files/51244049 + curl -L -o experiment.xenium https://figshare.com/ndownloader/files/51244265 + curl -L -o nucleus_boundaries.parquet https://figshare.com/ndownloader/files/51244286 + curl -L -o cell_boundaries.parquet https://figshare.com/ndownloader/files/51244244 + curl -L -o transcripts.parquet https://figshare.com/ndownloader/files/51244283 + curl -L -o cell_feature_matrix.h5 https://figshare.com/ndownloader/files/51244247 + curl -L -o cells.parquet https://figshare.com/ndownloader/files/51244259 + curl -L -o morphology_mip.ome.tif https://figshare.com/ndownloader/files/51244415 + curl -L -o cells.zarr.zip https://figshare.com/ndownloader/files/51244262 mkdir morphology_focus cd morphology_focus - curl -L -o morphology_focus_0000.ome.tif https://figshare.com/ndownloader/files/51243626 + curl -L -o morphology_focus_0000.ome.tif https://figshare.com/ndownloader/files/51244277 # Note: we run the following to test that the commands works From 1e002b5532b82fe4b27170888369cfc9f6b82d8e Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 14 Jan 2025 10:54:26 +0100 Subject: [PATCH 38/57] update from mudata to spatialdata --- .../pipeline_deconvolution_spatial.py | 6 +- panpipes/python_scripts/run_cell2location.py | 95 ++++++++++--------- panpipes/python_scripts/run_tangram.py | 49 +++++----- 3 files changed, 78 insertions(+), 72 deletions(-) diff --git a/panpipes/panpipes/pipeline_deconvolution_spatial.py b/panpipes/panpipes/pipeline_deconvolution_spatial.py index 41e60dde..e7026dc4 100644 --- a/panpipes/panpipes/pipeline_deconvolution_spatial.py +++ b/panpipes/panpipes/pipeline_deconvolution_spatial.py @@ -30,12 +30,12 @@ def get_logger(): def gen_filter_jobs(): - input_paths_spatial=glob.glob(os.path.join(PARAMS["input_spatial"],"*.h5mu")) + input_paths_spatial=glob.glob(os.path.join(PARAMS["input_spatial"],"*.zarr")) input_singlecell = PARAMS["input_singlecell"] for input_spatial in input_paths_spatial: sample_prefix = os.path.basename(input_spatial) - sample_prefix = sample_prefix.replace(".h5mu","") - outfile_spatial = "cell2location.output/" + sample_prefix + "/Cell2Loc_spatial_output.h5mu" + sample_prefix = sample_prefix.replace(".zarr","") + outfile_spatial = "cell2location.output/" + sample_prefix + "/Cell2Loc_spatial_output.zarr" yield input_spatial, outfile_spatial, sample_prefix, input_singlecell diff --git a/panpipes/python_scripts/run_cell2location.py b/panpipes/python_scripts/run_cell2location.py index 3bd74d6c..f262b491 100644 --- a/panpipes/python_scripts/run_cell2location.py +++ b/panpipes/python_scripts/run_cell2location.py @@ -7,7 +7,7 @@ import cell2location as c2l import scanpy as sc import pandas as pd -import muon as mu +import spatialdata as sd import os import argparse @@ -20,6 +20,7 @@ from panpipes.funcs.scmethods import cell2loc_filter_genes + L = logging.getLogger() L.setLevel(logging.INFO) log_handler = logging.StreamHandler(sys.stdout) @@ -197,13 +198,15 @@ #1. read in the data #spatial: -L.info("Reading in spatial MuData from '%s'" % args.input_spatial) -mdata_spatial = mu.read(args.input_spatial) -adata_st = mdata_spatial.mod['spatial'] +L.info("Reading in spatial SpatialData from '%s'" % args.input_spatial) +sdata_st = sd.read_zarr(args.input_spatial) +#mdata_spatial = mu.read(args.input_spatial) +#adata_st = mdata_spatial.mod['spatial'] #single-cell: -L.info("Reading in reference MuData from '%s'" % args.input_singlecell) -mdata_singlecell = mu.read(args.input_singlecell) -adata_sc = mdata_singlecell.mod['rna'] +L.info("Reading in reference SpatialData from '%s'" % args.input_singlecell) +sdata_sc = sd.read_zarr(args.input_singlecell) +#mdata_singlecell = mu.read(args.input_singlecell) +#adata_sc = mdata_singlecell.mod['rna'] @@ -217,12 +220,12 @@ reduced_gene_set = pd.read_csv(args.gene_list, header = 0) reduced_gene_set.columns = ["HVGs"] L.info("Subsetting data on gene list") - adata_sc.var["selected_gene"] = adata_sc.var.index.isin(reduced_gene_set["HVGs"]) - adata_st.var["selected_gene"] = adata_st.var.index.isin(reduced_gene_set["HVGs"]) - adata_sc = adata_sc[:, adata_sc.var["selected_gene"]] - adata_st = adata_st[:, adata_st.var["selected_gene"]] + sdata_sc["table"].var["selected_gene"] = sdata_sc["table"].var.index.isin(reduced_gene_set["HVGs"]) + sdata_st["table"].var["selected_gene"] = sdata_st["table"].var.index.isin(reduced_gene_set["HVGs"]) + sdata_sc["table"] = sdata_sc["table"][:, sdata_sc["table"].var["selected_gene"]] + sdata_st["table"] = sdata_st["table"][:, sdata_st["table"].var["selected_gene"]] # check whether all genes are present in both, spatial & reference - if set(adata_st.var.index) != set(adata_sc.var.index): + if set(sdata_st["table"].var.index) != set(sdata_sc["table"].var.index): L.error( "Not all genes of the gene list %s are present in the reference as well as in the ST data. Please provide a gene list where all genes are present in both, reference and ST.", args.gene_list) sys.exit( @@ -231,34 +234,34 @@ else: # perform feature selection according to cell2loc if remove_mt is True: L.info("Removing MT genes") - adata_st.var["MT_gene"] = [gene.startswith("MT-") for gene in adata_st.var.index] - adata_st.obsm["MT"] = adata_st[:, adata_st.var["MT_gene"].values].X.toarray() - adata_st = adata_st[:, ~adata_st.var["MT_gene"].values] + sdata_st["table"].var["MT_gene"] = [gene.startswith("MT-") for gene in sdata_st["table"].var.index] + sdata_st["table"].obsm["MT"] = sdata_st["table"][:, sdata_st["table"].var["MT_gene"].values].X.toarray() + sdata_st["table"] = sdata_st["table"][:, ~sdata_st["table"].var["MT_gene"].values] # intersect vars of reference and spatial L.info("Intersecting vars of reference and spatial ") - shared_features = [feature for feature in adata_st.var_names if feature in adata_sc.var_names] - adata_sc = adata_sc[:, shared_features] - adata_st = adata_st[:, shared_features] + shared_features = [feature for feature in sdata_st["table"].var_names if feature in sdata_sc["table"].var_names] + sdata_sc["table"] = sdata_sc["table"][:, shared_features] + sdata_st["table"] = sdata_st["table"][:, shared_features] # select features L.info("Selecting features using 'cell2location.utils.filtering.filter_genes() function'") - selected = cell2loc_filter_genes(adata_sc, figdir + "/gene_filter.png", cell_count_cutoff=float(args.cell_count_cutoff), + selected = cell2loc_filter_genes(sdata_sc["table"], figdir + "/gene_filter.png", cell_count_cutoff=float(args.cell_count_cutoff), cell_percentage_cutoff2=float(args.cell_percentage_cutoff2), nonz_mean_cutoff=float(args.nonz_mean_cutoff)) L.info("Subsetting data on selected features") - adata_sc = adata_sc[:, selected] - adata_st = adata_st[:, selected] + sdata_sc["table"] = sdata_sc["table"][:, selected] + sdata_st["table"] = sdata_st["table"][:, selected] # 3. Fit regression model L.info("Setting up AnnData for the reference model") -c2l.models.RegressionModel.setup_anndata(adata=adata_sc, +c2l.models.RegressionModel.setup_anndata(adata=sdata_sc["table"], labels_key = args.labels_key_reference, layer= args.layer_reference, batch_key= args.batch_key_reference, categorical_covariate_keys = categorical_covariate_keys_reference, continuous_covariate_keys = continuous_covariate_keys_reference) -model_ref = c2l.models.RegressionModel(adata_sc) +model_ref = c2l.models.RegressionModel(sdata_sc["table"]) L.info("Training the reference model") model_ref.train(max_epochs=max_epochs_reference, use_gpu = use_gpu_reference) @@ -268,23 +271,23 @@ # export results L.info("Extracting the posterior of the reference model") -adata_sc = model_ref.export_posterior(adata_sc) -if "means_per_cluster_mu_fg" in adata_sc.varm.keys(): - inf_aver = adata_sc.varm["means_per_cluster_mu_fg"][[f"means_per_cluster_mu_fg_{i}" for i in adata_sc.uns["mod"]["factor_names"]]].copy() +sdata_sc["table"] = model_ref.export_posterior(sdata_sc["table"]) +if "means_per_cluster_mu_fg" in sdata_sc["table"].varm.keys(): + inf_aver = sdata_sc["table"].varm["means_per_cluster_mu_fg"][[f"means_per_cluster_mu_fg_{i}" for i in sdata_sc["table"].uns["mod"]["factor_names"]]].copy() else: - inf_aver = adata_sc.var[[f"means_per_cluster_mu_fg_{i}" for i in adata_sc.uns["mod"]["factor_names"]]].copy() -inf_aver.columns = adata_sc.uns["mod"]["factor_names"] + inf_aver = sdata_sc["table"].var[[f"means_per_cluster_mu_fg_{i}" for i in sdata_sc["table"].uns["mod"]["factor_names"]]].copy() +inf_aver.columns = sdata_sc["table"].uns["mod"]["factor_names"] inf_aver.to_csv(output_dir+"/Cell2Loc_inf_aver.csv") # plot QC L.info("Plotting QC plots") cell2loc_plot_QC_reference(model_ref, figdir + "/QC_reference_reconstruction_accuracy.png", figdir + "/QC_reference_expression signatures_vs_avg_expression.png") -# save model and update mudata -if adata_sc.var.index.names[0] in adata_sc.var.columns: - adata_sc.var.index.names = [None] -mdata_singlecell.mod["rna"] = adata_sc -mdata_singlecell.update() +# save model +if sdata_sc["table"].var.index.names[0] in sdata_sc["table"].var.columns: + sdata_sc["table"].var.index.names = [None] +#mdata_singlecell.mod["rna"] = adata_sc +#mdata_singlecell.update() if save_models is True: L.info("Saving reference model to '%s'" % output_dir) model_ref.save(output_dir +"/Reference_model", overwrite=True) @@ -293,7 +296,7 @@ # 4. Fit mapping model L.info("Setting up AnnData for the spatial model") -c2l.models.Cell2location.setup_anndata(adata=adata_st, +c2l.models.Cell2location.setup_anndata(adata=sdata_st["table"], labels_key = args.labels_key_st, layer= args.layer_st, batch_key= args.batch_key_st, @@ -301,7 +304,7 @@ continuous_covariate_keys = continuous_covariate_keys_st) -model_spatial = c2l.models.Cell2location(adata = adata_st, cell_state_df=inf_aver, +model_spatial = c2l.models.Cell2location(adata = sdata_st["table"], cell_state_df=inf_aver, N_cells_per_location=float(args.N_cells_per_location), detection_alpha=float(args.detection_alpha)) L.info("Training the spatial model") @@ -312,7 +315,7 @@ cell2loc_plot_history(model_spatial, figdir + "/ELBO_spatial_model.png") #extract posterior L.info("Extracting the posterior of the spatial model") -adata_st = model_spatial.export_posterior(adata_st) +sdata_st["table"] = model_spatial.export_posterior(sdata_st["table"]) #plot QC L.info("Plotting QC plots") cell2loc_plot_QC_reconstr(model_spatial, figdir + "/QC_spatial_reconstruction_accuracy.png") @@ -320,24 +323,24 @@ #plot output L.info("Plotting spatial embedding plot coloured by 'q05_cell_abundance_w_sf'") -adata_st.obs[adata_st.uns["mod"]["factor_names"]] = adata_st.obsm["q05_cell_abundance_w_sf"] -sc.pl.spatial(adata_st,color=adata_st.uns["mod"]["factor_names"], show = False, save = "_Cell2Loc_q05_cell_abundance_w_sf.png") +sdata_st["table"].obs[sdata_st["table"].uns["mod"]["factor_names"]] = sdata_st["table"].obsm["q05_cell_abundance_w_sf"] +sc.pl.spatial(sdata_st["table"],color=sdata_st["table"].uns["mod"]["factor_names"], show = False, save = "_Cell2Loc_q05_cell_abundance_w_sf.png") -# save model and update mudata -if adata_st.var.index.names[0] in adata_st.var.columns: - adata_st.var.index.names = [None] -mdata_spatial.mod["spatial"] = adata_st -mdata_spatial.update() +# save model +if sdata_st["table"].var.index.names[0] in sdata_st["table"].var.columns: + sdata_st["table"].var.index.names = [None] +#mdata_spatial.mod["spatial"] = adata_st +#mdata_spatial.update() if save_models is True: L.info("Saving spatial model to '%s'" % output_dir) model_spatial.save(output_dir+"/Spatial_mapping_model", overwrite=True) #6. save mudatas -L.info("Saving MuDatas to '%s'" % output_dir) -mdata_singlecell.write(output_dir+"/Cell2Loc_screference_output.h5mu") -mdata_spatial.write(output_dir+"/Cell2Loc_spatial_output.h5mu") +L.info("Saving SpatialDatas to '%s'" % output_dir) +sdata_sc.write(output_dir+"/Cell2Loc_screference_output.zarr") +sdata_st.write(output_dir+"/Cell2Loc_spatial_output.zarr") L.info("Done") diff --git a/panpipes/python_scripts/run_tangram.py b/panpipes/python_scripts/run_tangram.py index 6b2cc6a2..6cbe68e7 100644 --- a/panpipes/python_scripts/run_tangram.py +++ b/panpipes/python_scripts/run_tangram.py @@ -9,6 +9,7 @@ import scanpy as sc import tangram as tg import muon as mu +import spatialdata as sd import os import argparse @@ -100,13 +101,15 @@ #1. read in the data #spatial: -L.info("Reading in spatial MuData from '%s'" % args.input_spatial) -mdata_spatial = mu.read(args.input_spatial) -adata_st = mdata_spatial.mod['spatial'] +L.info("Reading in spatial SpatialData from '%s'" % args.input_spatial) +sdata_st = sd.read_zarr(args.input_spatial) +#mdata_spatial = mu.read(args.input_spatial) +#adata_st = mdata_spatial.mod['spatial'] #single-cell: -L.info("Reading in reference MuData from '%s'" % args.input_singlecell) -mdata_singlecell = mu.read(args.input_singlecell) -adata_sc = mdata_singlecell.mod['rna'] +L.info("Reading in reference SpatialData from '%s'" % args.input_singlecell) +sdata_sc = sd.read_zarr(args.input_singlecell) +#mdata_singlecell = mu.read(args.input_singlecell) +#adata_sc = mdata_singlecell.mod['rna'] #2. Perform gene selection: @@ -121,43 +124,43 @@ else: # perform feature selection using sc.tl.rank_genes_groups() L.info("Running 'scanpy.tl.rank_genes_groups()'") - sc.tl.rank_genes_groups(adata_sc, groupby=args.labels_key_rank_genes, layer=args.layer_rank_genes, method=args.method_rank_genes,corr_method = args.corr_method_rank_genes) + sc.tl.rank_genes_groups(sdata_sc["table"], groupby=args.labels_key_rank_genes, layer=args.layer_rank_genes, method=args.method_rank_genes,corr_method = args.corr_method_rank_genes) L.info("Plotting rank genes group") - sc.pl.rank_genes_groups(adata_sc, show = False, save = ".png") - markers_df = pd.DataFrame(adata_sc.uns["rank_genes_groups"]["names"]).iloc[0:int(args.n_genes_rank), :] + sc.pl.rank_genes_groups(sdata_sc["table"], show = False, save = ".png") + markers_df = pd.DataFrame(sdata_sc["table"].uns["rank_genes_groups"]["names"]).iloc[0:int(args.n_genes_rank), :] L.info("Saving rank genes to " + output_dir + "/rank_genes_groups.csv") markers_df.to_csv(output_dir + "/rank_genes_groups.csv") markers = list(np.unique(markers_df.melt().value.values)) # "Preprocess" anndatas L.info("Preprocessing AnnDatas") -tg.pp_adatas(adata_sc=adata_sc, adata_sp=adata_st, genes=markers) +tg.pp_adatas(adata_sc=sdata_sc["table"], adata_sp=sdata_st["table"], genes=markers) # 3. Run tangram L.info("Training model") adata_results = tg.mapping_utils.map_cells_to_space( - adata_sc=adata_sc, adata_sp=adata_st, num_epochs=int(args.num_epochs), device=args.device, **args.kwargs + adata_sc=sdata_sc["table"], adata_sp=sdata_st["table"], num_epochs=int(args.num_epochs), device=args.device, **args.kwargs ) # 3. Extract and plot results L.info("Extracting annotations") -tg.project_cell_annotations(adata_results, adata_st, annotation=args.labels_key_model) +tg.project_cell_annotations(adata_results, sdata_st["table"], annotation=args.labels_key_model) L.info("Plotting spatial embedding plot coloured by 'tangram_ct_pred'") -annotation_list = list(pd.unique(adata_sc.obs[args.labels_key_model])) -df = adata_st.obsm["tangram_ct_pred"][annotation_list] -tg.construct_obs_plot(df, adata_st, perc=0.05) -if "spatial" in adata_st.uns: - sc.pl.spatial(adata_st, color=annotation_list, cmap="viridis", show=False, frameon=False, ncols=3, save = "_tangram_ct_pred.png") +annotation_list = list(pd.unique(sdata_sc["table"].obs[args.labels_key_model])) +df = sdata_st["table"].obsm["tangram_ct_pred"][annotation_list] +tg.construct_obs_plot(df, sdata_st["table"], perc=0.05) +if "spatial" in sdata_st["table"].uns: + sc.pl.spatial(sdata_st["table"], color=annotation_list, cmap="viridis", show=False, frameon=False, ncols=3, save = "_tangram_ct_pred.png") else: - sc.pl.spatial(adata_st, color=annotation_list, cmap="viridis", show=False, frameon=False, ncols=3, save = "_tangram_ct_pred.png",spot_size=0.5) + sc.pl.spatial(sdata_st["table"], color=annotation_list, cmap="viridis", show=False, frameon=False, ncols=3, save = "_tangram_ct_pred.png",spot_size=0.5) -mdata_singlecell_results = mu.MuData({"rna": adata_sc}) -mdata_spatial_results = mu.MuData({"spatial": adata_st}) +#mdata_singlecell_results = mu.MuData({"rna": adata_sc}) +#mdata_spatial_results = mu.MuData({"spatial": adata_st}) -L.info("Saving MuDatas to '%s'" % output_dir) -mdata_singlecell_results.write(output_dir+"/Tangram_screference_output.h5mu") -mdata_spatial_results.write(output_dir+"/Tangram_spatial_output.h5mu") +L.info("Saving SpatialDatas to '%s'" % output_dir) +sdata_sc.write(output_dir+"/Tangram_screference_output.zarr") +sdata_st.write(output_dir+"/Tangram_spatial_output.zarr") L.info("Done") From 35d5ecb47c5e56c07090b48c50232c967e47598d Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 14 Jan 2025 11:00:21 +0100 Subject: [PATCH 39/57] correct data type of reference --- panpipes/python_scripts/run_cell2location.py | 46 ++++++++++---------- panpipes/python_scripts/run_tangram.py | 23 +++++----- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/panpipes/python_scripts/run_cell2location.py b/panpipes/python_scripts/run_cell2location.py index f262b491..893ca233 100644 --- a/panpipes/python_scripts/run_cell2location.py +++ b/panpipes/python_scripts/run_cell2location.py @@ -8,6 +8,7 @@ import scanpy as sc import pandas as pd import spatialdata as sd +import muon as mu import os import argparse @@ -203,10 +204,9 @@ #mdata_spatial = mu.read(args.input_spatial) #adata_st = mdata_spatial.mod['spatial'] #single-cell: -L.info("Reading in reference SpatialData from '%s'" % args.input_singlecell) -sdata_sc = sd.read_zarr(args.input_singlecell) -#mdata_singlecell = mu.read(args.input_singlecell) -#adata_sc = mdata_singlecell.mod['rna'] +L.info("Reading in reference MuData from '%s'" % args.input_singlecell) +mdata_singlecell = mu.read(args.input_singlecell) +adata_sc = mdata_singlecell.mod['rna'] @@ -220,12 +220,12 @@ reduced_gene_set = pd.read_csv(args.gene_list, header = 0) reduced_gene_set.columns = ["HVGs"] L.info("Subsetting data on gene list") - sdata_sc["table"].var["selected_gene"] = sdata_sc["table"].var.index.isin(reduced_gene_set["HVGs"]) + adata_sc.var["selected_gene"] = adata_sc.var.index.isin(reduced_gene_set["HVGs"]) sdata_st["table"].var["selected_gene"] = sdata_st["table"].var.index.isin(reduced_gene_set["HVGs"]) - sdata_sc["table"] = sdata_sc["table"][:, sdata_sc["table"].var["selected_gene"]] + adata_sc = adata_sc[:, adata_sc.var["selected_gene"]] sdata_st["table"] = sdata_st["table"][:, sdata_st["table"].var["selected_gene"]] # check whether all genes are present in both, spatial & reference - if set(sdata_st["table"].var.index) != set(sdata_sc["table"].var.index): + if set(sdata_st["table"].var.index) != set(adata_sc.var.index): L.error( "Not all genes of the gene list %s are present in the reference as well as in the ST data. Please provide a gene list where all genes are present in both, reference and ST.", args.gene_list) sys.exit( @@ -239,29 +239,29 @@ sdata_st["table"] = sdata_st["table"][:, ~sdata_st["table"].var["MT_gene"].values] # intersect vars of reference and spatial L.info("Intersecting vars of reference and spatial ") - shared_features = [feature for feature in sdata_st["table"].var_names if feature in sdata_sc["table"].var_names] - sdata_sc["table"] = sdata_sc["table"][:, shared_features] + shared_features = [feature for feature in sdata_st["table"].var_names if feature in adata_sc.var_names] + adata_sc = adata_sc[:, shared_features] sdata_st["table"] = sdata_st["table"][:, shared_features] # select features L.info("Selecting features using 'cell2location.utils.filtering.filter_genes() function'") - selected = cell2loc_filter_genes(sdata_sc["table"], figdir + "/gene_filter.png", cell_count_cutoff=float(args.cell_count_cutoff), + selected = cell2loc_filter_genes(adata_sc, figdir + "/gene_filter.png", cell_count_cutoff=float(args.cell_count_cutoff), cell_percentage_cutoff2=float(args.cell_percentage_cutoff2), nonz_mean_cutoff=float(args.nonz_mean_cutoff)) L.info("Subsetting data on selected features") - sdata_sc["table"] = sdata_sc["table"][:, selected] + adata_sc = adata_sc[:, selected] sdata_st["table"] = sdata_st["table"][:, selected] # 3. Fit regression model L.info("Setting up AnnData for the reference model") -c2l.models.RegressionModel.setup_anndata(adata=sdata_sc["table"], +c2l.models.RegressionModel.setup_anndata(adata=adata_sc, labels_key = args.labels_key_reference, layer= args.layer_reference, batch_key= args.batch_key_reference, categorical_covariate_keys = categorical_covariate_keys_reference, continuous_covariate_keys = continuous_covariate_keys_reference) -model_ref = c2l.models.RegressionModel(sdata_sc["table"]) +model_ref = c2l.models.RegressionModel(adata_sc) L.info("Training the reference model") model_ref.train(max_epochs=max_epochs_reference, use_gpu = use_gpu_reference) @@ -271,12 +271,12 @@ # export results L.info("Extracting the posterior of the reference model") -sdata_sc["table"] = model_ref.export_posterior(sdata_sc["table"]) -if "means_per_cluster_mu_fg" in sdata_sc["table"].varm.keys(): - inf_aver = sdata_sc["table"].varm["means_per_cluster_mu_fg"][[f"means_per_cluster_mu_fg_{i}" for i in sdata_sc["table"].uns["mod"]["factor_names"]]].copy() +adata_sc = model_ref.export_posterior(adata_sc) +if "means_per_cluster_mu_fg" in adata_sc.varm.keys(): + inf_aver = adata_sc.varm["means_per_cluster_mu_fg"][[f"means_per_cluster_mu_fg_{i}" for i in adata_sc.uns["mod"]["factor_names"]]].copy() else: - inf_aver = sdata_sc["table"].var[[f"means_per_cluster_mu_fg_{i}" for i in sdata_sc["table"].uns["mod"]["factor_names"]]].copy() -inf_aver.columns = sdata_sc["table"].uns["mod"]["factor_names"] + inf_aver = adata_sc.var[[f"means_per_cluster_mu_fg_{i}" for i in adata_sc.uns["mod"]["factor_names"]]].copy() +inf_aver.columns = adata_sc.uns["mod"]["factor_names"] inf_aver.to_csv(output_dir+"/Cell2Loc_inf_aver.csv") # plot QC @@ -284,10 +284,10 @@ cell2loc_plot_QC_reference(model_ref, figdir + "/QC_reference_reconstruction_accuracy.png", figdir + "/QC_reference_expression signatures_vs_avg_expression.png") # save model -if sdata_sc["table"].var.index.names[0] in sdata_sc["table"].var.columns: - sdata_sc["table"].var.index.names = [None] -#mdata_singlecell.mod["rna"] = adata_sc -#mdata_singlecell.update() +if adata_sc.var.index.names[0] in adata_sc.var.columns: + adata_sc.var.index.names = [None] +mdata_singlecell.mod["rna"] = adata_sc +mdata_singlecell.update() if save_models is True: L.info("Saving reference model to '%s'" % output_dir) model_ref.save(output_dir +"/Reference_model", overwrite=True) @@ -339,7 +339,7 @@ #6. save mudatas L.info("Saving SpatialDatas to '%s'" % output_dir) -sdata_sc.write(output_dir+"/Cell2Loc_screference_output.zarr") +mdata_singlecell.write(output_dir+"/Cell2Loc_screference_output.h5mu") sdata_st.write(output_dir+"/Cell2Loc_spatial_output.zarr") diff --git a/panpipes/python_scripts/run_tangram.py b/panpipes/python_scripts/run_tangram.py index 6cbe68e7..28eaeb0d 100644 --- a/panpipes/python_scripts/run_tangram.py +++ b/panpipes/python_scripts/run_tangram.py @@ -107,9 +107,8 @@ #adata_st = mdata_spatial.mod['spatial'] #single-cell: L.info("Reading in reference SpatialData from '%s'" % args.input_singlecell) -sdata_sc = sd.read_zarr(args.input_singlecell) -#mdata_singlecell = mu.read(args.input_singlecell) -#adata_sc = mdata_singlecell.mod['rna'] +mdata_singlecell = mu.read(args.input_singlecell) +adata_sc = mdata_singlecell.mod['rna'] #2. Perform gene selection: @@ -124,22 +123,22 @@ else: # perform feature selection using sc.tl.rank_genes_groups() L.info("Running 'scanpy.tl.rank_genes_groups()'") - sc.tl.rank_genes_groups(sdata_sc["table"], groupby=args.labels_key_rank_genes, layer=args.layer_rank_genes, method=args.method_rank_genes,corr_method = args.corr_method_rank_genes) + sc.tl.rank_genes_groups(adata_sc, groupby=args.labels_key_rank_genes, layer=args.layer_rank_genes, method=args.method_rank_genes,corr_method = args.corr_method_rank_genes) L.info("Plotting rank genes group") - sc.pl.rank_genes_groups(sdata_sc["table"], show = False, save = ".png") - markers_df = pd.DataFrame(sdata_sc["table"].uns["rank_genes_groups"]["names"]).iloc[0:int(args.n_genes_rank), :] + sc.pl.rank_genes_groups(adata_sc, show = False, save = ".png") + markers_df = pd.DataFrame(adata_sc.uns["rank_genes_groups"]["names"]).iloc[0:int(args.n_genes_rank), :] L.info("Saving rank genes to " + output_dir + "/rank_genes_groups.csv") markers_df.to_csv(output_dir + "/rank_genes_groups.csv") markers = list(np.unique(markers_df.melt().value.values)) # "Preprocess" anndatas L.info("Preprocessing AnnDatas") -tg.pp_adatas(adata_sc=sdata_sc["table"], adata_sp=sdata_st["table"], genes=markers) +tg.pp_adatas(adata_sc=adata_sc, adata_sp=sdata_st["table"], genes=markers) # 3. Run tangram L.info("Training model") adata_results = tg.mapping_utils.map_cells_to_space( - adata_sc=sdata_sc["table"], adata_sp=sdata_st["table"], num_epochs=int(args.num_epochs), device=args.device, **args.kwargs + adata_sc=adata_sc, adata_sp=sdata_st["table"], num_epochs=int(args.num_epochs), device=args.device, **args.kwargs ) # 3. Extract and plot results @@ -147,7 +146,7 @@ tg.project_cell_annotations(adata_results, sdata_st["table"], annotation=args.labels_key_model) L.info("Plotting spatial embedding plot coloured by 'tangram_ct_pred'") -annotation_list = list(pd.unique(sdata_sc["table"].obs[args.labels_key_model])) +annotation_list = list(pd.unique(adata_sc.obs[args.labels_key_model])) df = sdata_st["table"].obsm["tangram_ct_pred"][annotation_list] tg.construct_obs_plot(df, sdata_st["table"], perc=0.05) if "spatial" in sdata_st["table"].uns: @@ -156,11 +155,11 @@ sc.pl.spatial(sdata_st["table"], color=annotation_list, cmap="viridis", show=False, frameon=False, ncols=3, save = "_tangram_ct_pred.png",spot_size=0.5) -#mdata_singlecell_results = mu.MuData({"rna": adata_sc}) +mdata_singlecell_results = mu.MuData({"rna": adata_sc}) #mdata_spatial_results = mu.MuData({"spatial": adata_st}) -L.info("Saving SpatialDatas to '%s'" % output_dir) -sdata_sc.write(output_dir+"/Tangram_screference_output.zarr") +L.info("Saving SpatialData and MuData to '%s'" % output_dir) +mdata_singlecell_results.write(output_dir+"/Tangram_screference_output.h5mu") sdata_st.write(output_dir+"/Tangram_spatial_output.zarr") L.info("Done") From 358da951ccc97fd98dabd09d223643c18ad25b81 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 14 Jan 2025 11:51:14 +0100 Subject: [PATCH 40/57] adjust to spatialData --- .github/workflows/spatial_deconvolution-ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/spatial_deconvolution-ci.yml b/.github/workflows/spatial_deconvolution-ci.yml index b22e40da..ac964e32 100644 --- a/.github/workflows/spatial_deconvolution-ci.yml +++ b/.github/workflows/spatial_deconvolution-ci.yml @@ -60,7 +60,9 @@ jobs: cd deconvolution/data curl -L -o Human_Heart_reference.h5mu https://figshare.com/ndownloader/files/44969677 cd spatial_data - curl -L -o Human_Heart.h5mu https://figshare.com/ndownloader/files/44969488 + curl -L -o Human_Heart.zarr.zip https://figshare.com/ndownloader/files/51667673 + unzip Human_Heart.zarr.zip + rm Human_Heart.zarr.zip # Note: we run the following to test that the commands works From 857d043416fb4f83a0d9af2412edfee3d4b9af80 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 20 Jan 2025 14:41:58 +0100 Subject: [PATCH 41/57] add tangram github action --- ...patial_deconvolution_cell2location-ci.yml} | 4 +- .../spatial_deconvolution_tangram-ci.yml | 98 +++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) rename .github/workflows/{spatial_deconvolution-ci.yml => spatial_deconvolution_cell2location-ci.yml} (96%) create mode 100644 .github/workflows/spatial_deconvolution_tangram-ci.yml diff --git a/.github/workflows/spatial_deconvolution-ci.yml b/.github/workflows/spatial_deconvolution_cell2location-ci.yml similarity index 96% rename from .github/workflows/spatial_deconvolution-ci.yml rename to .github/workflows/spatial_deconvolution_cell2location-ci.yml index ac964e32..b43215bb 100644 --- a/.github/workflows/spatial_deconvolution-ci.yml +++ b/.github/workflows/spatial_deconvolution_cell2location-ci.yml @@ -1,4 +1,4 @@ -name: Run tutorials (spatial deconvolution) +name: Run tutorials (spatial deconvolution cell2location) on: push: @@ -12,7 +12,7 @@ env: debug: 'true' jobs: - spatial_deconvolution: + spatial_deconvolution_cell2location: runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/spatial_deconvolution_tangram-ci.yml b/.github/workflows/spatial_deconvolution_tangram-ci.yml new file mode 100644 index 00000000..c06d8b75 --- /dev/null +++ b/.github/workflows/spatial_deconvolution_tangram-ci.yml @@ -0,0 +1,98 @@ +name: Run tutorials (spatial deconvolution tangram) + +on: + push: + branches: + - main + pull_request: + branches: + - main + +env: + debug: 'true' + +jobs: + spatial_deconvolution_tangram: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] # , "macos-latest", "windows-latest" + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: File tree + if: env.debug == 'true' + run: tree + + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest + auto-activate-base: true + auto-update-conda: true + channels: conda-forge + channel-priority: strict + activate-environment: pipeline_env + environment-file: pipeline_env.yaml + + - name: Install Panpipes + shell: bash -el {0} + run: | + pip install .[spatial] + conda list + + - name: Conda info + if: env.debug == 'true' + shell: bash -el {0} + run: conda info + + - name: Conda list + if: env.debug == 'true' + shell: pwsh + run: conda list + + + - name: Preparing the data + run: | + mkdir deconvolution_tangram deconvolution_tangram/data deconvolution_tangram/data/spatial_data + cd deconvolution_tangram/data + curl -L -o Human_Heart_reference.h5mu https://figshare.com/ndownloader/files/44969677 + cd spatial_data + curl -L -o Human_Heart.zarr.zip https://figshare.com/ndownloader/files/51667673 + unzip Human_Heart.zarr.zip + rm Human_Heart.zarr.zip + + + # Note: we run the following to test that the commands works + - name: Preparing the configuration file + shell: bash -el {0} + run: | + cd deconvolution_tangram + panpipes deconvolution_spatial config + + - name: Edit the submission file + run: | + cd deconvolution_tangram + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/deconvolution_tangram/pipeline.yml + + - name: File tree + if: env.debug == 'true' + run: tree deconvolution_tangram + + - name: Review pipeline tasks + shell: bash -el {0} + run: | + cd deconvolution_tangram + panpipes deconvolution_spatial show full --local + + - name: Run pipeline tasks + shell: bash -el {0} + run: | + cd deconvolution_tangram + panpipes deconvolution_spatial make full --local + + - name: File tree + if: env.debug == 'true' + run: tree deconvolution_tangram From 573fcc280a050adb058bb93d04f873a475521491 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 28 Jan 2025 10:50:24 +0100 Subject: [PATCH 42/57] pin spatialdata version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b888fba6..ec9b794b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,8 +69,8 @@ spatial = [ "squidpy", "cell2location", "tangram-sc", - "spatialdata", - "spatialdata-io" + "spatialdata==0.2.6", + "spatialdata-io==0.1.6" ] refmap_old = [ From e4564c57ac491c7313437837514704fa9a5c975f Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 28 Jan 2025 10:58:16 +0100 Subject: [PATCH 43/57] pin dask --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ec9b794b..02285df5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,8 @@ spatial = [ "cell2location", "tangram-sc", "spatialdata==0.2.6", - "spatialdata-io==0.1.6" + "spatialdata-io==0.1.6", + "dask==2024.12.1" ] refmap_old = [ From a72fbcb63194ab607c14cf5ce552f24beb88a50a Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 28 Jan 2025 11:47:47 +0100 Subject: [PATCH 44/57] remove outfile_spatial --- panpipes/panpipes/pipeline_deconvolution_spatial.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/panpipes/panpipes/pipeline_deconvolution_spatial.py b/panpipes/panpipes/pipeline_deconvolution_spatial.py index e7026dc4..eb970434 100644 --- a/panpipes/panpipes/pipeline_deconvolution_spatial.py +++ b/panpipes/panpipes/pipeline_deconvolution_spatial.py @@ -35,8 +35,7 @@ def gen_filter_jobs(): for input_spatial in input_paths_spatial: sample_prefix = os.path.basename(input_spatial) sample_prefix = sample_prefix.replace(".zarr","") - outfile_spatial = "cell2location.output/" + sample_prefix + "/Cell2Loc_spatial_output.zarr" - yield input_spatial, outfile_spatial, sample_prefix, input_singlecell + yield input_spatial, sample_prefix, input_singlecell @mkdir("logs") @@ -45,7 +44,7 @@ def gen_filter_jobs(): @mkdir("figures/Cell2Location") @mkdir("cell2location.output") @files(gen_filter_jobs) -def run_cell2location(input_spatial, outfile_spatial, sample_prefix, input_singlecell): +def run_cell2location(input_spatial, sample_prefix, input_singlecell): figdir = "./figures/Cell2Location/" + sample_prefix output_dir = "./cell2location.output/" + sample_prefix @@ -103,6 +102,8 @@ def run_cell2location(input_spatial, outfile_spatial, sample_prefix, input_singl if PARAMS['Cell2Location_save_models'] is not None: cmd += " --save_models %(Cell2Location_save_models)s" + if PARAMS['Cell2Location_export_gene_by_spot'] is not None: + cmd += " --export_gene_by_spot %(Cell2Location_export_gene_by_spot)s" cmd += " > logs/%(log_file)s " job_kwargs["job_threads"] = PARAMS['resources_threads_low'] @@ -116,7 +117,7 @@ def run_cell2location(input_spatial, outfile_spatial, sample_prefix, input_singl @mkdir("figures/Tangram") @mkdir("tangram.output") @files(gen_filter_jobs) -def run_tangram(input_spatial, outfile_spatial, sample_prefix, input_singlecell): +def run_tangram(input_spatial, sample_prefix, input_singlecell): figdir = "./figures/Tangram/" + sample_prefix output_dir = "./tangram.output/" + sample_prefix From 86fd9d49a2bad0e63a855c90ecc73bded2076d99 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 28 Jan 2025 11:48:30 +0100 Subject: [PATCH 45/57] add option to export gene by spot matrix --- .../pipeline_deconvolution_spatial/pipeline.yml | 1 + panpipes/python_scripts/run_cell2location.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/panpipes/panpipes/pipeline_deconvolution_spatial/pipeline.yml b/panpipes/panpipes/pipeline_deconvolution_spatial/pipeline.yml index 66d524f0..b9d6d430 100644 --- a/panpipes/panpipes/pipeline_deconvolution_spatial/pipeline.yml +++ b/panpipes/panpipes/pipeline_deconvolution_spatial/pipeline.yml @@ -89,6 +89,7 @@ Cell2Location: # ------------------------------- save_models: False # Default False; whether to save the reference and spatial mapping models + export_gene_by_spot: False # Default False; whether to save a gene by spot matrix for each cell type in a layer diff --git a/panpipes/python_scripts/run_cell2location.py b/panpipes/python_scripts/run_cell2location.py index 893ca233..7cb427d8 100644 --- a/panpipes/python_scripts/run_cell2location.py +++ b/panpipes/python_scripts/run_cell2location.py @@ -49,6 +49,9 @@ parser.add_argument("--save_models", default=False, help="whether to save the reference & spatial mapping models") +parser.add_argument("--export_gene_by_spot", + default=False, + help="whether to save a gene by spot matrix for each cell type in a layer") # parameters for feature selection: @@ -148,6 +151,11 @@ save_models = False else: save_models = True + +if (args.export_gene_by_spot is False) or (args.export_gene_by_spot == "False"): + export_gene_by_spot = False +else: + export_gene_by_spot = True if (args.remove_mt is True) or (args.remove_mt == "True"): remove_mt = True @@ -320,6 +328,14 @@ L.info("Plotting QC plots") cell2loc_plot_QC_reconstr(model_spatial, figdir + "/QC_spatial_reconstruction_accuracy.png") +# export a gene by spot matrix for each cell type +if export_gene_by_spot: + # Compute expected expression per cell type + expected_dict = model_spatial.module.model.compute_expected_per_cell_type(model_spatial.samples["post_sample_q05"], model_spatial.adata_manager) + # Add to anndata layers + for i, n in enumerate(model_spatial.factor_names_): + sdata_st["table"].layers[n] = expected_dict['mu'][i] + #plot output L.info("Plotting spatial embedding plot coloured by 'q05_cell_abundance_w_sf'") From 7e8bcf1b9639d7295244d6c68b0f406e7a5e49a4 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 4 Feb 2025 15:08:40 +0100 Subject: [PATCH 46/57] adjust docs to SpatialData --- docs/workflows/clustering_spatial.md | 2 +- docs/workflows/deconvolute_spatial.md | 7 ++++--- docs/workflows/ingest_spatial.md | 10 +++++----- docs/workflows/preprocess_spatial.md | 8 ++++---- docs/yaml_docs/spatial_deconvolution.md | 8 +++++--- docs/yaml_docs/spatial_preprocess.md | 10 +++++----- docs/yaml_docs/spatial_qc.md | 4 ++-- 7 files changed, 26 insertions(+), 23 deletions(-) diff --git a/docs/workflows/clustering_spatial.md b/docs/workflows/clustering_spatial.md index 3c63b62e..99ae8b49 100644 --- a/docs/workflows/clustering_spatial.md +++ b/docs/workflows/clustering_spatial.md @@ -1,6 +1,6 @@ # Clustering spatial data The `clustering` workflow accepts both cell suspension datasets and spatial transcriptomics data as input that have been ingested with the `qc_spatial` workflow and optionally filtered with the `spatial_preprocess` workflow. -The workflow expects a **single `MuData` object** with the spatial data saved in `mdata.mod["spatial"]`. +The workflow expects a **single `SpatialData` object**. Set `spatial: True` in the configuration file and customize the spatial modality clustering parameters exactly as you would for a single cell experiment. For more information check the [clustering workflow](./clustering.md) diff --git a/docs/workflows/deconvolute_spatial.md b/docs/workflows/deconvolute_spatial.md index e1233dba..d03f56b9 100644 --- a/docs/workflows/deconvolute_spatial.md +++ b/docs/workflows/deconvolute_spatial.md @@ -1,6 +1,6 @@ # Deconvoluting spatial data -With the `deconvolution_spatial` workflow, one or multiple spatial slides can be deconvoluted in one run. For that, a `MuData` object for each slide is expected, with the spatial data saved in `mdata.mod["spatial"]`. The spatial slides are deconvoluted using the same reference. For the reference, one `MuData` with the gene expression data saved in `mdata.mod["rna"]` is expected as input. +With the `deconvolution_spatial` workflow, one or multiple spatial slides can be deconvoluted in one run. For that, a `SpatialData` object for each slide is expected. The spatial slides are deconvoluted using the same reference. For the reference, one `MuData` with the gene expression data saved in `mdata.mod["rna"]` is expected as input. The workflow provides the possibility to run deconvolution using `Cell2Location` and `Tangram`. @@ -19,8 +19,9 @@ For the reference and each spatial slide the following steps are run. **Note, th - Regression/reference model is fitted and a plot of the training history as well as QC plots are saved in the `./figures/Cell2Location` directory. Additionally, a csv-file `Cell2Loc_inf_anver.csv` with the estimated expression of every gene in every cell type is saved in `./cell2location.output`. - (Optional) Reference model is saved in `./cell2location.output` - Spatial mapping model is fitted. Training history and QC plots are saved in the `./figures/Cell2Location` directory. Plot of the spatial embedding coloured by `q05_cell_abundance_w_sf` is also saved in `./figures/Cell2Location`. +- (Optional) A gene by spot matrix for each cell type is saved to a layer in the table of the `SpatialData` object - (Optional) Spatial mapping model is saved in `./cell2location.output` -- `MuData` objects of the spatial slide and the reference are saved in `./cell2location.output`. The `MuData` object of the spatial slide contains the estimated cell type abundances. +- The `SpatialData` object of the spatial slide and the `MuData` object of the reference are saved in `./cell2location.output`. The `SpatialData` object of the spatial slide contains the estimated cell type abundances. ### Tangram @@ -34,7 +35,7 @@ For the reference and each spatial slide the following steps are run. **Note, th - Data is preprocessed with [tangram.pp_adatas](https://tangram-sc.readthedocs.io/en/latest/classes/tangram.mapping_utils.pp_adatas.html) - Tangram model is fitted with [tangram.mapping_utils.map_cells_to_space](https://tangram-sc.readthedocs.io/en/latest/classes/tangram.mapping_utils.map_cells_to_space.html) and annotations are transfered from single-cell data onto space with [tangram.project_cell_annotations](https://tangram-sc.readthedocs.io/en/latest/classes/tangram.utils.project_cell_annotations.html) - Plot of the spatial embedding coloured by `tangram_ct_pred` is saved in `./figures/Tangram` -- `MuData` objects of the spatial slide and the reference are saved in `./tangram.output`. The `MuData` object of the spatial slide contains the deconvolution predictions. +- The `SpatialData` object of the spatial slide and the `MuData` object of the reference are saved in `./tangram.output`. The `SpatialData` object of the spatial slide contains the deconvolution predictions. diff --git a/docs/workflows/ingest_spatial.md b/docs/workflows/ingest_spatial.md index a9992d6d..f6ded09a 100644 --- a/docs/workflows/ingest_spatial.md +++ b/docs/workflows/ingest_spatial.md @@ -1,19 +1,19 @@ # Ingesting spatial data -Similar to the cell suspension workflow, `spatial_qc` ingests `Vizgen` and/or `Visium` data and saves the data into `MuData` objects. -A primary difference to the cell suspension `ingestion` workflow is that we are not concatenating the input data into a single matrix, but keeping the samples as separate `MuData` objects, each with a `spatial` layer. This ensures that the processing does not introduce any technical batch effect when tissue slides are very different in cell composition. In a future release, we will use [SpatialData](https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks.html) as a data format and framework to process multi-slides experiments. +The `spatial_qc` workflow ingests `Vizgen`, `Visium`, or `Xenium` data and saves the data into `SpatialData` objects. +A primary difference to the cell suspension `ingestion` workflow is that we are not concatenating the input data into a single matrix, but keeping the samples as separate `SpatialData` objects. This ensures that the processing does not introduce any technical batch effect when tissue slides are very different in cell composition. ## Steps -- Data is ingested into `MuData` objects with the modality `spatial`. The workflow generates one MuData per dataset. - - Raw `MuData` objects are saved into `./tmp` +- Data is ingested into `SpatialData` objects. The workflow generates one `SpatialData` per dataset. + - `SpatialData` objects of the raw data are saved into `./tmp` as `zarr` files - QC metrics are computed using `scanpy` functionalities: - Basic QC metrics are computed using `sc.pp.calculate_qc_metrics` - (Optional) Compute cell-cycle scores using `sc.tl.score_genes_cell_cycle`. For that, the [default gene list](../../panpipes/resources/cell_cycle_genes.tsv) can be used or a path to a tsv file can be specified. - (Optional) Custom genes actions. [Default gene list](../../panpipes/resources/qc_genelist_1.0.csv) can be used or a path to a csv file can be specified. - Calculate proportions of gene groups, e.g. mitochondrial genes - Score genes using `sc.tl.score_genes` - - `MuData` objects with calculated QC metrics are saved in `qc.data` + - `SpatialData` objects with calculated QC metrics are saved in `qc.data` - Metadata (`.obs`) is saved into the current directory as tsv files - Specified QC metrics are plotted in violin and spatial embedding plots - For `Vizgen` data, additional histograms are plotted diff --git a/docs/workflows/preprocess_spatial.md b/docs/workflows/preprocess_spatial.md index 925bd534..3bfae011 100644 --- a/docs/workflows/preprocess_spatial.md +++ b/docs/workflows/preprocess_spatial.md @@ -1,17 +1,17 @@ # Preprocessing spatial data -The `preprocess_spatial` workflow filters the data and preprocesses the data by normalization, HVG selection, and PCA computation. Multiple `MuData` objects of the same assay (`Visium` or `Vizgen`), each with a `spatial` modality, can be filtered and preprocessed in one run. +The `preprocess_spatial` workflow filters the data and preprocesses the data by normalization, HVG selection, and PCA computation. Multiple `SpatialData` objects of the same assay (`Visium`, `Vizgen`, or `Xenium`) can be filtered and preprocessed in one run. ## Steps -If multiple `MuData` objects are provided, the following steps are run for each **with the same parameter setting.** +If multiple `SpatialData` objects are provided, the following steps are run for each **with the same parameter setting.** -- `MuData` object is filtered by the specified thresholds in the pipeline.yml. Note, that the filtering step is **optional**. You can avoid filtering by setting the `run` parameter in the pipeline.yml under `filtering` to `False`. +- `SpatialData` object is filtered by the specified thresholds in the pipeline.yml. Note, that the filtering step is **optional**. You can avoid filtering by setting the `run` parameter in the pipeline.yml under `filtering` to `False`. - Post-filter plotting is performed (only when data was filtered, i.e. `run: True`). Specified metrics in the pipeline.yml are plotted in violin and spatial embedding plots. Plots are saved into the `./figures/spatial` directory. - Data is normalized and HVGs are selected. Before normalization, raw counts are saved into `.layers["raw_counts"]`, if not present already. Normalized counts are saved into `.X` and `.layers["lognorm"]` or `.layers["norm_pearson_resid"]`, depending on the chosen normalization. HVGs are saved into `.var["highly_variable"]`. - PCA is computed and plotted. PCA plots are also saved into the `./figures/spatial` directory. -- Final `MuData` object is saved into the `./filtered.data` directory +- Final `SpatialData` object is saved into the `./filtered.data` directory as a `zarr` file ## Steps to run diff --git a/docs/yaml_docs/spatial_deconvolution.md b/docs/yaml_docs/spatial_deconvolution.md index 29a4e93a..a1debae4 100644 --- a/docs/yaml_docs/spatial_deconvolution.md +++ b/docs/yaml_docs/spatial_deconvolution.md @@ -33,7 +33,7 @@ Specified by the following three parameters: - threads_medium `Integer`, Default: 1
Number of threads used for medium intensity computing tasks. - For each thread, there must be enough memory to load your mudata and do computationally light tasks. + For each thread, there must be enough memory to load your SpatialData and do computationally light tasks. - threads_low `Integer`, Default: 1
Number of threads used for low intensity computing tasks. @@ -46,12 +46,12 @@ Specified by the following three parameters: ## 1. Input Options -With the `deconvolution_spatial` workflow, one or multiple spatial slides can be deconvoluted in one run. For that, a `MuData` object for each slide is expected, with the spatial data saved in `mdata.mod["spatial"]`. The spatial slides are deconvoluted **using the same reference**. For the reference, one `MuData` with the gene expression data saved in `mdata.mod["rna"]` is expected as input. Please note, that the same parameter setting is used for each slide.
For the **spatial** input, the workflow, therefore, reads in **all `.h5mu` objects of a directory** (see below). **The spatial and single-cell data thus need to be saved in different folders.** +With the `deconvolution_spatial` workflow, one or multiple spatial slides can be deconvoluted in one run. For that, a `SpatialData` object for each slide is expected. The spatial slides are deconvoluted **using the same reference**. For the reference, one `MuData` with the gene expression data saved in `mdata.mod["rna"]` is expected as input. Please note, that the same parameter setting is used for each slide.
For the **spatial** input, the workflow, therefore, reads in **all `.zarr` objects of a directory** (see below).
input
- spatial `String`, Mandatory parameter
- Path to folder containing one or multiple `MuDatas` of spatial data. The pipeline is reading in all `MuData` files in that folder and assuming that they are `MuDatas` of spatial slides. + Path to folder containing one or multiple `SpatialDatas` of spatial data. The pipeline is reading in all `SpatialData` files in that folder. - singlecell `String`, Mandatory parameter
Path to the MuData **file** (not folder) of the reference single-cell data. @@ -151,6 +151,8 @@ You can specify whether both models (spatial and reference) should be saved with save_models, Default: False
Whether to save the reference & spatial mapping models. +export_gene_by_spot, Default: False
+ Whether to save a gene by spot matrix for each cell type in a layer. ## 3. Tangram Options diff --git a/docs/yaml_docs/spatial_preprocess.md b/docs/yaml_docs/spatial_preprocess.md index 28270c99..2d0db62c 100644 --- a/docs/yaml_docs/spatial_preprocess.md +++ b/docs/yaml_docs/spatial_preprocess.md @@ -35,7 +35,7 @@ Specified by the following three parameters: - threads_medium `Integer`, Default: 1
Number of threads used for medium intensity computing tasks. - For each thread, there must be enough memory to load your mudata and do computationally light tasks. + For each thread, there must be enough memory to load your SpatialData and do computationally light tasks. - threads_low `Integer`, Default: 1
Number of threads used for low intensity computing tasks. @@ -48,14 +48,14 @@ Specified by the following three parameters: ## 1. Input Options -With the preprocess_spatial workflow, one or multiple `MuData` objects can be preprocessed in one run. The workflow **reads in all `.h5mu` objects of a directory**. The `MuData` objects in the directory need to be of the same assay (vizgen or visium). The workflow then runs the preprocessing of each `MuData` object separately with the same parameters that are specified in the yaml file. +With the preprocess_spatial workflow, one or multiple `SpatialData` objects can be preprocessed in one run. The workflow **reads in all `.zarr` objects of a directory**. The `SpatialData` objects in the directory need to be of the same assay (Vizgen, Visium, or Xenium). The workflow then runs the preprocessing of each `SpatialData` object separately with the same parameters that are specified in the yaml file.
input_dir `String`, Mandatory parameter
- Path to the folder containing all input `h5mu` files. + Path to the folder containing all input `zarr` files. assay [`'visium'`, `'vizgen'`], Default: `'visium'`
- Spatial transcriptomics assay of the `h5mu` files in `input_dir`. + Spatial transcriptomics assay of the `zarr` files in `input_dir`. @@ -70,7 +70,7 @@ With the preprocess_spatial workflow, one or multiple `MuData` objects can be pr
-With the parameters below you can specify thresholds for filtering. The filtering is fully customisable to any columns in `.obs` or `.var`. You are not restricted by the columns given as default. When specifying a column name, please make sure it exactly matches the column name in the h5mu object.
Please slso make sure, that the specified metrics are present in all `h5mu` objects of the `input_dir`, i.e. the `MuData` objects for that the preprocessing is run. +With the parameters below you can specify thresholds for filtering. The filtering is fully customisable to any columns in `.obs` or `.var`. You are not restricted by the columns given as default. When specifying a column name, please make sure it exactly matches the column name in the table of the `SpatialData` object.
Please also make sure, that the specified metrics are present in all `SpatialData` objects of the `input_dir`, i.e. the `SpatialData` objects for that the preprocessing is run. --- diff --git a/docs/yaml_docs/spatial_qc.md b/docs/yaml_docs/spatial_qc.md index 5a742ab0..baeb2850 100644 --- a/docs/yaml_docs/spatial_qc.md +++ b/docs/yaml_docs/spatial_qc.md @@ -32,11 +32,11 @@ Computing resources to use, specifically the number of threads used for parallel Specified by the following three parameters: - threads_high `Integer`, Default: 1
Number of threads used for high intensity computing tasks. - For each thread, there must be enough memory to load all your input files at once and create the MuData object. + For each thread, there must be enough memory to load all your input files at once and create the SpatialData object. - threads_medium `Integer`, Default: 1
Number of threads used for medium intensity computing tasks. - For each thread, there must be enough memory to load your mudata and do computationally light tasks. + For each thread, there must be enough memory to load your SpatialData and do computationally light tasks. - threads_low `Integer`, Default: 1
Number of threads used for low intensity computing tasks. From 463ba6f1f13e3633b270ec0df5fbde828d4659a5 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Tue, 4 Feb 2025 15:13:49 +0100 Subject: [PATCH 47/57] change to SpatialData --- panpipes/python_scripts/collate_mdata.py | 113 ++++++++++++------ panpipes/python_scripts/plot_cluster_umaps.py | 26 ++-- .../python_scripts/plot_scanpy_markers.py | 28 +++-- .../rerun_find_neighbors_for_clustering.py | 90 +++++++++----- panpipes/python_scripts/run_clustering.py | 21 ++-- .../python_scripts/run_find_markers_multi.py | 29 +++-- panpipes/python_scripts/run_umap.py | 21 ++-- 7 files changed, 216 insertions(+), 112 deletions(-) diff --git a/panpipes/python_scripts/collate_mdata.py b/panpipes/python_scripts/collate_mdata.py index cf500bcc..3134accc 100644 --- a/panpipes/python_scripts/collate_mdata.py +++ b/panpipes/python_scripts/collate_mdata.py @@ -34,8 +34,19 @@ L.info("Running with params: %s", args) -L.info("Reading in MuData from '%s'" % args.input_mudata) -mdata = mu.read(args.input_mudata) +#L.info("Reading in MuData from '%s'" % args.input_mudata) +#mdata = mu.read(args.input_mudata) +L.info("Reading in data from '%s'" % args.input_mudata) +if ".zarr" in args.input_mudata: + import spatialdata as sd + L.info("Reading in SpatialData from '%s'" % args.input_mudata) + mdata = sd.read_zarr(args.input_mudata) +else: + L.info("Reading in MuData from '%s'" % args.input_mudata) + mdata = mu.read(args.input_mudata) + + + L.info("Reading in cluster information") cf = pd.read_csv(args.clusters_files_csv) @@ -55,46 +66,78 @@ # add in the clusters +if isinstance(mdata, MuData): + L.info("Adding cluster information to MuData") + for i in range(cf.shape[0]): + cf_df = pd.read_csv(cf['fpath'][i], sep='\t', index_col=0) + cf_df['clusters'] = cf_df['clusters'].astype('str').astype('category') + cf_df = cf_df.rename(columns={"clusters":cf['new_key'][i]}) + + if cf['mod'][i] != "multimodal": + mdata[cf['mod'][i]].obs = mdata[cf['mod'][i]].obs.merge(cf_df, left_index=True, right_index=True) + else: + mdata.obs = mdata.obs.merge(cf_df, left_index=True, right_index=True) +elif isinstance(mdata, sd.SpatialData): + L.info("Adding cluster information to SpatialData") + for i in range(cf.shape[0]): + cf_df = pd.read_csv(cf['fpath'][i], sep='\t', index_col=0) + cf_df['clusters'] = cf_df['clusters'].astype('str').astype('category') + cf_df = cf_df.rename(columns={"clusters":cf['new_key'][i]}) + mdata["table"].obs = mdata["table"].obs.merge(cf_df, left_index=True, right_index=True) -L.info("Adding cluster information to MuData") -for i in range(cf.shape[0]): - cf_df = pd.read_csv(cf['fpath'][i], sep='\t', index_col=0) - cf_df['clusters'] = cf_df['clusters'].astype('str').astype('category') - cf_df = cf_df.rename(columns={"clusters":cf['new_key'][i]}) - - if cf['mod'][i] != "multimodal": - mdata[cf['mod'][i]].obs = mdata[cf['mod'][i]].obs.merge(cf_df, left_index=True, right_index=True) - else: - mdata.obs = mdata.obs.merge(cf_df, left_index=True, right_index=True) L.info("Adding UMAP coordinates to MuData") uf = pd.read_csv(args.umap_files_csv) -for i in range(uf.shape[0]): - uf_df = pd.read_csv(uf['fpath'][i], sep='\t', index_col=0) - mod = uf['mod'][i] - new_key = uf['new_key'][i] - if uf['mod'][i] != "multimodal": - if all(mdata[mod].obs_names == uf_df.index): - mdata[mod].obsm[new_key] = uf_df.to_numpy() +if isinstance(mdata, MuData): + for i in range(uf.shape[0]): + uf_df = pd.read_csv(uf['fpath'][i], sep='\t', index_col=0) + mod = uf['mod'][i] + new_key = uf['new_key'][i] + if uf['mod'][i] != "multimodal": + if all(mdata[mod].obs_names == uf_df.index): + mdata[mod].obsm[new_key] = uf_df.to_numpy() + else: + L.warn("Cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] ) else: - L.warn("Cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] ) - else: - # check the observations are the same - if set(mdata.obs_names).difference(uf_df.index) == set(): - # put the observations in the same order - uf_df = uf_df.loc[mdata.obs_names,:] - mdata.obsm[new_key] = uf_df.to_numpy() + # check the observations are the same + if set(mdata.obs_names).difference(uf_df.index) == set(): + # put the observations in the same order + uf_df = uf_df.loc[mdata.obs_names,:] + mdata.obsm[new_key] = uf_df.to_numpy() + else: + L.warning("Cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] ) +elif isinstance(mdata, sd.SpatialData): + for i in range(uf.shape[0]): + uf_df = pd.read_csv(uf['fpath'][i], sep='\t', index_col=0) + mod = uf['mod'][i] + new_key = uf['new_key'][i] + if uf['mod'][i] != "multimodal": + if all(mdata["table"].obs_names == uf_df.index): + mdata["table"].obsm[new_key] = uf_df.to_numpy() + else: + L.warn("Cannot integrate %s into adata as obs_names mismatch" % uf.iloc[i,:] ) else: - L.warning("Cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] ) - - -L.info("Saving updated MuData to '%s'" % args.output_mudata) -mdata.write(args.output_mudata) - -output_csv = re.sub(".h5mu", "_cell_metdata.tsv", args.output_mudata) -L.info("Saving metadata to '%s'" % output_csv) -mdata.obs.to_csv(output_csv, sep='\t') + # check the observations are the same + if set(mdata["table"].obs_names).difference(uf_df.index) == set(): + # put the observations in the same order + uf_df = uf_df.loc[mdata["table"].obs_names,:] + mdata["table"].obsm[new_key] = uf_df.to_numpy() + else: + L.warning("Cannot integrate %s into adata as obs_names mismatch" % uf.iloc[i,:] ) + +if isinstance(mdata, MuData): + L.info("Saving updated MuData to '%s'" % args.output_mudata) + mdata.write(args.output_mudata) + output_csv = re.sub(".h5mu", "_cell_metdata.tsv", args.output_mudata) + L.info("Saving metadata to '%s'" % output_csv) + mdata.obs.to_csv(output_csv, sep='\t') +elif isinstance(mdata, sd.SpatialData): + L.info("Saving updated SpatialData to '%s'" % args.output_mudata) + mdata.write(args.output_mudata) + output_csv = re.sub(".zarr", "_cell_metdata.tsv", args.output_mudata) + L.info("Saving metadata to '%s'" % output_csv) + mdata.obs.to_csv(output_csv, sep='\t') L.info("Done") diff --git a/panpipes/python_scripts/plot_cluster_umaps.py b/panpipes/python_scripts/plot_cluster_umaps.py index 39e73b19..18b804c3 100644 --- a/panpipes/python_scripts/plot_cluster_umaps.py +++ b/panpipes/python_scripts/plot_cluster_umaps.py @@ -90,9 +90,13 @@ def plot_spatial(adata,figdir): fig.savefig(os.path.join(figdir, ok + "_clusters.png")) - -L.info("Reading in MuData from '%s'" % args.infile) -mdata = read(args.infile) +if ".zarr" in args.infile: + import spatialdata as sd + L.info("Reading in SpatialData from '%s'" % args.infile) + data = sd.read_zarr(args.infile) +else: + L.info("Reading in MuData from '%s'" % args.infile) + data = read(args.infile) mods = args.modalities.split(',') # detemin initial figure directory based on object type @@ -102,21 +106,27 @@ def plot_spatial(adata,figdir): if os.path.exists("multimodal/figures") is False: os.makedirs("multimodal/figures") L.info("Plotting multimodal figures") - main(mdata, figdir="multimodal/figures") + main(data, figdir="multimodal/figures") # we also need to plot per modality -if type(mdata) is MuData: - for mod in mdata.mod.keys(): +if type(data) is MuData: + for mod in data.mod.keys(): if mod in mods: L.info("Plotting for modality: %s" % mod) figdir = os.path.join(mod, "figures") if os.path.exists(figdir) is False: os.makedirs(figdir) if mod == "spatial": # added separate function for spatial - plot_spatial(mdata[mod], figdir) + plot_spatial(data[mod], figdir) else: - main(mdata[mod], figdir) + main(data[mod], figdir) +elif isinstance(data, sd.SpatialData): + L.info("Plotting for modality: spatial") + figdir = os.path.join("spatial", "figures") + if os.path.exists(figdir) is False: + os.makedirs(figdir) + plot_spatial(data["table"], figdir) diff --git a/panpipes/python_scripts/plot_scanpy_markers.py b/panpipes/python_scripts/plot_scanpy_markers.py index 09fd4455..c7073b6c 100644 --- a/panpipes/python_scripts/plot_scanpy_markers.py +++ b/panpipes/python_scripts/plot_scanpy_markers.py @@ -115,17 +115,23 @@ def do_plots(adata, mod, group_col, mf, n=10, layer=None): # read data -L.info("Reading in MuData from '%s'" % args.infile) -mdata = mu.read(args.infile) - -if type(mdata) is AnnData: - adata = mdata - # main function only does rank_gene_groups on X, so -elif type(mdata) is mu.MuData and args.modality is not None: - adata = mdata[args.modality] -else: - L.error("If the input is a MuData object, a modality needs to be specified") - sys.exit('If the input is a MuData object, a modality needs to be specified') +if args.modality != "spatial": + L.info("Reading in MuData from '%s'" % args.infile) + mdata = mu.read(args.infile) + + if type(mdata) is AnnData: + adata = mdata + # main function only does rank_gene_groups on X, so + elif type(mdata) is mu.MuData and args.modality is not None: + adata = mdata[args.modality] + else: + L.error("If the input is a MuData object, a modality needs to be specified") + sys.exit('If the input is a MuData object, a modality needs to be specified') +else: + import spatialdata as sd + L.info("Reading in SpatialData from '%s'" % args.infile) + adata = sd.read_zarr(args.infile)["table"] + L.info("Loading marker information from '%s'" % args.marker_file) mf = pd.read_csv(args.marker_file, sep='\t' ) diff --git a/panpipes/python_scripts/rerun_find_neighbors_for_clustering.py b/panpipes/python_scripts/rerun_find_neighbors_for_clustering.py index ad675080..ba1af40a 100644 --- a/panpipes/python_scripts/rerun_find_neighbors_for_clustering.py +++ b/panpipes/python_scripts/rerun_find_neighbors_for_clustering.py @@ -4,6 +4,7 @@ import logging import scanpy as sc from muon import MuData, read + from panpipes.funcs.scmethods import run_neighbors_method_choice from panpipes.funcs.io import read_yaml from panpipes.funcs.scmethods import lsi @@ -37,53 +38,80 @@ sc.settings.n_jobs = int(args.n_threads) # read data -L.info("Reading in MuData from '%s'" % args.infile) -mdata = read(args.infile) +if ".zarr" in args.infile: + import spatialdata as sd + L.info("Reading in SpatialData from '%s'" % args.infile) + sdata = sd.read_zarr(args.infile) +else: + L.info("Reading in MuData from '%s'" % args.infile) + mdata = read(args.infile) for mod in neighbor_dict.keys(): - if mod in mdata.mod.keys(): + if mod != "spatial": + if mod in mdata.mod.keys(): + if neighbor_dict[mod]['use_existing']: + L.info('Using existing neighbors graph for %s' % mod) + pass + else: + L.info("Computing new neighbors for modality %s on %s" % (mod, neighbor_dict[mod]['dim_red'])) + if type(mdata) is MuData: + adata=mdata[mod] + if (neighbor_dict[mod]['dim_red'] == "X_pca") and ("X_pca" not in adata.obsm.keys()): + L.info("X_pca not found, computing it using default parameters") + sc.tl.pca(adata) + if (mod == "atac") and (neighbor_dict[mod]['dim_remove'] is not None): + dimrem = int(neighbor_dict[mod]['dim_remove']) + adata.obsm['X_pca'] = adata.obsm['X_pca'][:, dimrem:] + adata.varm["PCs"] = adata.varm["PCs"][:, dimrem:] + if mod == "atac": + if (neighbor_dict[mod]['dim_red'] == "X_lsi") and ("X_lsi" not in adata.obsm.keys()): + L.info("X_lsi not found, computing it using default parameters") + lsi(adata=adata, num_components=50) + if neighbor_dict[mod]['dim_remove'] is not None: + L.info("Removing dimension %s from X_lsi" % neighbor_dict[mod]['dim_remove']) + dimrem = int(neighbor_dict[mod]['dim_remove']) + adata.obsm['X_lsi'] = adata.obsm['X_lsi'][:, dimrem:] + adata.varm["LSI"] = adata.varm["LSI"][:, dimrem:] + adata.uns["lsi"]["stdev"] = adata.uns["lsi"]["stdev"][dimrem:] + + # run command + opts = dict(method=neighbor_dict[mod]['method'], + n_neighbors=int(neighbor_dict[mod]['k']), + n_pcs=int(neighbor_dict[mod]['n_dim_red']), + metric=neighbor_dict[mod]['metric'], + nthreads=args.n_threads, + use_rep=neighbor_dict[mod]['dim_red']) + + + run_neighbors_method_choice(adata,**opts) + mdata.mod[mod] = adata + mdata.update() + else: if neighbor_dict[mod]['use_existing']: L.info('Using existing neighbors graph for %s' % mod) pass else: L.info("Computing new neighbors for modality %s on %s" % (mod, neighbor_dict[mod]['dim_red'])) - if type(mdata) is MuData: - adata=mdata[mod] - if (neighbor_dict[mod]['dim_red'] == "X_pca") and ("X_pca" not in adata.obsm.keys()): + if (neighbor_dict[mod]['dim_red'] == "X_pca") and ("X_pca" not in sdata["table"].obsm.keys()): L.info("X_pca not found, computing it using default parameters") - sc.tl.pca(adata) - if (mod == "atac") and (neighbor_dict[mod]['dim_remove'] is not None): - dimrem = int(neighbor_dict[mod]['dim_remove']) - adata.obsm['X_pca'] = adata.obsm['X_pca'][:, dimrem:] - adata.varm["PCs"] = adata.varm["PCs"][:, dimrem:] - if mod == "atac": - if (neighbor_dict[mod]['dim_red'] == "X_lsi") and ("X_lsi" not in adata.obsm.keys()): - L.info("X_lsi not found, computing it using default parameters") - lsi(adata=adata, num_components=50) - if neighbor_dict[mod]['dim_remove'] is not None: - L.info("Removing dimension %s from X_lsi" % neighbor_dict[mod]['dim_remove']) - dimrem = int(neighbor_dict[mod]['dim_remove']) - adata.obsm['X_lsi'] = adata.obsm['X_lsi'][:, dimrem:] - adata.varm["LSI"] = adata.varm["LSI"][:, dimrem:] - adata.uns["lsi"]["stdev"] = adata.uns["lsi"]["stdev"][dimrem:] - - # run command + sc.tl.pca(sdata["table"]) opts = dict(method=neighbor_dict[mod]['method'], n_neighbors=int(neighbor_dict[mod]['k']), n_pcs=int(neighbor_dict[mod]['n_dim_red']), metric=neighbor_dict[mod]['metric'], nthreads=args.n_threads, use_rep=neighbor_dict[mod]['dim_red']) + # run command + run_neighbors_method_choice(sdata["table"],**opts) - run_neighbors_method_choice(adata,**opts) - mdata.mod[mod] = adata - mdata.update() - - +if ".zarr" in args.infile: + L.info("Saving updated SpatialData to '%s'" % args.outfile) + sdata.write(args.outfile) +else: + L.info("Saving updated MuData to '%s'" % args.outfile) + mdata.write(args.outfile) -L.info("Saving updated MuData to '%s'" % args.outfile) -mdata.write(args.outfile) -L.info("Done") \ No newline at end of file +L.info("Done") diff --git a/panpipes/python_scripts/run_clustering.py b/panpipes/python_scripts/run_clustering.py index fcd2e5c5..ee183c90 100644 --- a/panpipes/python_scripts/run_clustering.py +++ b/panpipes/python_scripts/run_clustering.py @@ -34,13 +34,20 @@ # read data L.info("Reading in data from '%s'" % args.infile) -mdata = mu.read(args.infile) -if type(mdata) is AnnData: - adata = mdata -elif args.modality is not None: - adata = mdata[args.modality] -else: - adata = mdata +if ".zarr" in args.infile: + import spatialdata as sd + L.info("Reading in SpatialData from '%s'" % args.infile) + sdata = sd.read_zarr(args.infile) + adata = sdata["table"] +else: + mdata = mu.read(args.infile) + if type(mdata) is AnnData: + adata = mdata + elif args.modality is not None: + adata = mdata[args.modality] + else: + adata = mdata + uns_key=args.neighbors_key # check sc.pp.neihgbours has been run diff --git a/panpipes/python_scripts/run_find_markers_multi.py b/panpipes/python_scripts/run_find_markers_multi.py index ba1422d7..0ae1d067 100644 --- a/panpipes/python_scripts/run_find_markers_multi.py +++ b/panpipes/python_scripts/run_find_markers_multi.py @@ -201,19 +201,22 @@ def main(adata, L.info("Running with params: %s", args) # read data -L.info("Reading in MuData from '%s'" % args.infile) -mdata = read(args.infile) - - -if type(mdata) is AnnData: - adata = mdata - # main function only does rank_gene_groups on X, so -elif type(mdata) is MuData and args.modality is not None: - adata = mdata[args.modality] -else: - L.error("If the input is a MuData object, a modality needs to be specified") - sys.exit('If the input is a MuData object, a modality needs to be specified') - +if args.modality != "spatial": + L.info("Reading in MuData from '%s'" % args.infile) + mdata = read(args.infile) + if type(mdata) is AnnData: + adata = mdata + # main function only does rank_gene_groups on X, so + elif type(mdata) is MuData and args.modality is not None: + adata = mdata[args.modality] + else: + L.error("If the input is a MuData object, a modality needs to be specified") + sys.exit('If the input is a MuData object, a modality needs to be specified') +else: + import spatialdata as sd + L.info("Reading in SpatialData from '%s'" % args.infile) + adata = sd.read_zarr(args.infile)["table"] + main(adata, mod=args.modality, diff --git a/panpipes/python_scripts/run_umap.py b/panpipes/python_scripts/run_umap.py index b70c19f3..112e9d6c 100644 --- a/panpipes/python_scripts/run_umap.py +++ b/panpipes/python_scripts/run_umap.py @@ -10,6 +10,7 @@ import muon as mu from anndata import AnnData + import sys import logging L = logging.getLogger() @@ -40,13 +41,19 @@ # read data L.info("Reading in data from '%s'" % args.infile) -mdata = mu.read(args.infile) -if type(mdata) is AnnData: - adata = mdata -elif args.modality is not None: - adata = mdata[args.modality] -else: - adata = mdata +if ".zarr" in args.infile: + import spatialdata as sd + L.info("Reading in SpatialData from '%s'" % args.infile) + sdata = sd.read_zarr(args.infile) + adata = sdata["table"] +else: + mdata = mu.read(args.infile) + if type(mdata) is AnnData: + adata = mdata + elif args.modality is not None: + adata = mdata[args.modality] + else: + adata = mdata # set seed From 5cb183bea023002a14efe4eb324fbca33c3c166b Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 10 Feb 2025 10:05:23 +0100 Subject: [PATCH 48/57] update to spatialData --- docs/usage/setup_for_spatial_workflows.md | 67 ++++++++++++++++++----- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/docs/usage/setup_for_spatial_workflows.md b/docs/usage/setup_for_spatial_workflows.md index 248c33b9..2959dd9f 100644 --- a/docs/usage/setup_for_spatial_workflows.md +++ b/docs/usage/setup_for_spatial_workflows.md @@ -1,28 +1,67 @@ Sample submission file for the ingestion of spatial data =========================== -The spatial transcriptomics ingestion workflow requires a sample submission file that specifies the location of the input files. The sample submission file is a tab-separated file with one row per sample. Panpipes currently supports the ingestion of `Visium` and `Vizgen` data. +The spatial transcriptomics ingestion workflow requires a sample submission file that specifies the location of the input files. The sample submission file is a tab-separated file with one row per sample. Panpipes currently supports the ingestion of `Visium`, `Vizgen`, and `Xenium` data. The data of different technologies needs to be ingested separately with different sample submission files. -The 6 columns of the sample submission file are: + +The minimum required (non-optional) columns for each submission file are **sample id**: Unique sample ID. -**spatial_path**: The root directory containing the data files. Please note, that the folder structure of the root directory needs to be structured as expected by the [squidpy.read.visium](https://squidpy.readthedocs.io/en/stable/api/squidpy.read.visium.html) (for `Visium` data) or [squidpy.read.vizgen](https://squidpy.readthedocs.io/en/stable/api/squidpy.read.vizgen.html) (for `Vizgen` data) functions. +**spatial_path**: The root directory containing the data files. Please note, that the folder structure of the root directory needs to be structured as expected by the [spatialdata_io.visium](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.visium.html) (for `Visium` data), [spatialdata_io.merscope](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.merscope.html) (for `Vizgen` data), or [spatialdata_io.xenium](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.xenium.html) (for `Xenium` data) functions. + +**spatial_filetype**: Either "vizgen", "visium", or "xenium". + + +## Visium + +The 7 columns of the Visium sample submission file are: + +sample_id | spatial_path | spatial_filetype | visium_feature_bc_matrix | visium_fullres_image_file | visium_tissue_positions_file | visium_scalefactors_file +----------|----------|------------|-----------|----------|-------------|------------- + +The following 4 columns are **optional**: + +**visium_feature_bc_matrix**: Name of the counts file. Corresponds to the `counts_file` parameter of [spatialdata_io.visium](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.visium.html) + +**visium_fullres_image_file**: Path to the full-resolution image. Corresponds to the `fullres_image_file` parameter of [spatialdata_io.visium](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.visium.html) + +**visium_tissue_positions_file**: Path to the tissue positions file. Corresponds to the `tissue_positions_file` parameter of [spatialdata_io.visium](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.visium.html) + +**visium_scalefactors_file**: Path to the scalefactors file. Corresponds to the `scalefactors_file` parameter of [spatialdata_io.visium](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.visium.html) + +#### [Example submission file](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_visium_data/sample_file_qc_visium.txt) + + +## Vizgen + +The 6 columns of the Vizgen sample submission file are: + +sample_id | spatial_path | spatial_filetype | vpt_cell_by_gene | vpt_cell_metadata | vpt_cell_boundaries +----------|----------|------------|----------|-------------|------------- + +The following 3 columns are **optional**: + +**vpt_cell_by_gene**: The file name of the output of the vizgen-postprocessing-tool. See [spatialdata_io.merscope](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.merscope.html) + +**vpt_cell_metadata**: The file name of the output of the vizgen-postprocessing-tool. See [spatialdata_io.merscope](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.merscope.html) + +**vpt_cell_boundaries**: The file name of the output of the vizgen-postprocessing-tool. See [spatialdata_io.merscope](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.merscope.html) + + +#### Example submission files [MERFISH](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_merfish_data/sample_file_qc_merfish.txt) [MERSCOPE](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_merscope_data/sample_file_qc_merscope.txt) + +## Xenium + +The 3 columns of the Xenium sample submission file are: + +sample_id | spatial_path | spatial_filetype | +----------|----------|------------ -**spatial_filetype**: Either "vizgen" or "visium". +#### [Example submission file](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_xenium_data/sample_file_qc_xenium.txt) -**spatial_counts**: The count matrix file. Usually `filtered_feature_bc_matrix.h5` or `raw_feature_bc_matrix.h5` for a `Visium` dataset. For `Vizgen` inputs, this file typically ends with `_cell_by_gene.csv.` -**spatial_metadata**: The metadata csv-file for `Vizgen` data. Leave empty for `Visium` data. -**spatial_transformation**: The transformation csv-file for `Vizgen` data. This column is **optional** for `Vizgen` data. Leave empty for `Visium` data. -**Note, that the columns, `sample_id`, `spatial_path`, `spatial_filetype`, and `spatial_counts` are required for both, `Visium` and `Vizgen` data. The `spatial_metadata`(required) and `spatial_transformation`(optional) columns are `Vizgen`-specific and should be left empty for `Visium` data.** -### Example submission file -| sample_id | spatial_path | spatial_filetype | spatial_counts | spatial_metadata | spatial_transformation | -| --------- |--------------|------------------|-----------------------------------------|------------------------------------------|--------------------| -| V1_Human_Heart |./data_visium/V1_Human_Heart |visium |V1_Human_Heart_filtered_feature_bc_matrix.h5 | -| V1_Human_Lymph_Node |./data_visium/V1_Human_Lymph_Node| visium | V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5 | -Mouse_Brain | ./data_vizgen | vizgen | cell_by_gene_S1R1.csv | cell_metadata_S1R1.csv | images_micron_to_mosaic_pixel_transform.csv From 96695cfbff37ce6e2cfb168e83bb309a706721d2 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 10 Feb 2025 10:13:13 +0100 Subject: [PATCH 49/57] check if columns exist --- panpipes/funcs/io.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/panpipes/funcs/io.py b/panpipes/funcs/io.py index af41d6ff..0a46f205 100644 --- a/panpipes/funcs/io.py +++ b/panpipes/funcs/io.py @@ -171,16 +171,19 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): visium_tissue_positions_file = None visium_scalefactors_file = None spatial_filetype = caf['spatial_filetype'][nn] - if pd.notna(caf['vpt_cell_by_gene'][nn]): - vpt_cell_by_gene = caf['vpt_cell_by_gene'][nn] + if "vpt_cell_by_gene" in caf[nn].columns: + if pd.notna(caf['vpt_cell_by_gene'][nn]): + vpt_cell_by_gene = caf['vpt_cell_by_gene'][nn] else: vpt_cell_by_gene = None - if pd.notna(caf['vpt_cell_metadata'][nn]): - vpt_cell_metadata = caf['vpt_cell_metadata'][nn] + if "vpt_cell_metadata" in caf[nn].columns: + if pd.notna(caf['vpt_cell_metadata'][nn]): + vpt_cell_metadata = caf['vpt_cell_metadata'][nn] else: vpt_cell_metadata = None - if pd.notna(caf['vpt_cell_boundaries'][nn]): - vpt_cell_boundaries = caf['vpt_cell_boundaries'][nn] + if "vpt_cell_boundaries" in caf[nn].columns: + if pd.notna(caf['vpt_cell_boundaries'][nn]): + vpt_cell_boundaries = caf['vpt_cell_boundaries'][nn] else: vpt_cell_boundaries = None elif caf['spatial_filetype'][nn]=="visium": @@ -189,23 +192,27 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): vpt_cell_boundaries = None spatial_filetype = caf['spatial_filetype'][nn] #counts file - if pd.notna(caf["visium_feature_bc_matrix"][nn]): - visium_feature_bc_matrix= caf["visium_feature_bc_matrix"][nn] + if "visium_feature_bc_matrix" in caf[nn].columns: + if pd.notna(caf["visium_feature_bc_matrix"][nn]): + visium_feature_bc_matrix= caf["visium_feature_bc_matrix"][nn] else: visium_feature_bc_matrix = None # fullres image - if pd.notna(caf["visium_fullres_image_file"][nn]): - visium_fullres_image_file= caf["visium_fullres_image_file"][nn] + if "visium_fullres_image_file" in caf[nn].columns: + if pd.notna(caf["visium_fullres_image_file"][nn]): + visium_fullres_image_file= caf["visium_fullres_image_file"][nn] else: visium_fullres_image_file = None # tissue position - if pd.notna(caf["visium_tissue_positions_file"][nn]): - visium_tissue_positions_file= caf["visium_tissue_positions_file"][nn] + if "visium_tissue_positions_file" in caf[nn].columns: + if pd.notna(caf["visium_tissue_positions_file"][nn]): + visium_tissue_positions_file= caf["visium_tissue_positions_file"][nn] else: visium_tissue_positions_file = None # scalefactor - if pd.notna(caf["visium_scalefactors_file"][nn]): - visium_scalefactors_file= caf["visium_scalefactors_file"][nn] + if "visium_scalefactors_file" in caf[nn].columns: + if pd.notna(caf["visium_scalefactors_file"][nn]): + visium_scalefactors_file= caf["visium_scalefactors_file"][nn] else: visium_scalefactors_file = None else: From 4d873f270d76ad6f52c9ed39a172631048aa93c1 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 10 Feb 2025 10:22:13 +0100 Subject: [PATCH 50/57] remove index --- panpipes/funcs/io.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/panpipes/funcs/io.py b/panpipes/funcs/io.py index 0a46f205..780228c6 100644 --- a/panpipes/funcs/io.py +++ b/panpipes/funcs/io.py @@ -171,17 +171,17 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): visium_tissue_positions_file = None visium_scalefactors_file = None spatial_filetype = caf['spatial_filetype'][nn] - if "vpt_cell_by_gene" in caf[nn].columns: + if "vpt_cell_by_gene" in caf.columns: if pd.notna(caf['vpt_cell_by_gene'][nn]): vpt_cell_by_gene = caf['vpt_cell_by_gene'][nn] else: vpt_cell_by_gene = None - if "vpt_cell_metadata" in caf[nn].columns: + if "vpt_cell_metadata" in caf.columns: if pd.notna(caf['vpt_cell_metadata'][nn]): vpt_cell_metadata = caf['vpt_cell_metadata'][nn] else: vpt_cell_metadata = None - if "vpt_cell_boundaries" in caf[nn].columns: + if "vpt_cell_boundaries" in caf.columns: if pd.notna(caf['vpt_cell_boundaries'][nn]): vpt_cell_boundaries = caf['vpt_cell_boundaries'][nn] else: @@ -192,25 +192,25 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): vpt_cell_boundaries = None spatial_filetype = caf['spatial_filetype'][nn] #counts file - if "visium_feature_bc_matrix" in caf[nn].columns: + if "visium_feature_bc_matrix" in caf.columns: if pd.notna(caf["visium_feature_bc_matrix"][nn]): visium_feature_bc_matrix= caf["visium_feature_bc_matrix"][nn] else: visium_feature_bc_matrix = None # fullres image - if "visium_fullres_image_file" in caf[nn].columns: + if "visium_fullres_image_file" in caf.columns: if pd.notna(caf["visium_fullres_image_file"][nn]): visium_fullres_image_file= caf["visium_fullres_image_file"][nn] else: visium_fullres_image_file = None # tissue position - if "visium_tissue_positions_file" in caf[nn].columns: + if "visium_tissue_positions_file" in caf.columns: if pd.notna(caf["visium_tissue_positions_file"][nn]): visium_tissue_positions_file= caf["visium_tissue_positions_file"][nn] else: visium_tissue_positions_file = None # scalefactor - if "visium_scalefactors_file" in caf[nn].columns: + if "visium_scalefactors_file" in caf.columns: if pd.notna(caf["visium_scalefactors_file"][nn]): visium_scalefactors_file= caf["visium_scalefactors_file"][nn] else: From 39d3c5730ba26474321233b4af08d3f0f63b60c9 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 10 Feb 2025 10:31:43 +0100 Subject: [PATCH 51/57] fix bug --- panpipes/funcs/io.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/panpipes/funcs/io.py b/panpipes/funcs/io.py index 780228c6..b5d635ab 100644 --- a/panpipes/funcs/io.py +++ b/panpipes/funcs/io.py @@ -171,22 +171,23 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): visium_tissue_positions_file = None visium_scalefactors_file = None spatial_filetype = caf['spatial_filetype'][nn] + vpt_cell_by_gene = None + vpt_cell_metadata = None + vpt_cell_boundaries = None if "vpt_cell_by_gene" in caf.columns: if pd.notna(caf['vpt_cell_by_gene'][nn]): vpt_cell_by_gene = caf['vpt_cell_by_gene'][nn] - else: - vpt_cell_by_gene = None if "vpt_cell_metadata" in caf.columns: if pd.notna(caf['vpt_cell_metadata'][nn]): vpt_cell_metadata = caf['vpt_cell_metadata'][nn] - else: - vpt_cell_metadata = None if "vpt_cell_boundaries" in caf.columns: if pd.notna(caf['vpt_cell_boundaries'][nn]): vpt_cell_boundaries = caf['vpt_cell_boundaries'][nn] - else: - vpt_cell_boundaries = None elif caf['spatial_filetype'][nn]=="visium": + visium_feature_bc_matrix = None + visium_fullres_image_file = None + visium_tissue_positions_file = None + visium_scalefactors_file = None vpt_cell_by_gene = None vpt_cell_metadata = None vpt_cell_boundaries = None @@ -195,26 +196,18 @@ def gen_load_spatial_jobs(caf, mode_dictionary = {}, load_raw=True): if "visium_feature_bc_matrix" in caf.columns: if pd.notna(caf["visium_feature_bc_matrix"][nn]): visium_feature_bc_matrix= caf["visium_feature_bc_matrix"][nn] - else: - visium_feature_bc_matrix = None # fullres image if "visium_fullres_image_file" in caf.columns: if pd.notna(caf["visium_fullres_image_file"][nn]): visium_fullres_image_file= caf["visium_fullres_image_file"][nn] - else: - visium_fullres_image_file = None # tissue position if "visium_tissue_positions_file" in caf.columns: if pd.notna(caf["visium_tissue_positions_file"][nn]): visium_tissue_positions_file= caf["visium_tissue_positions_file"][nn] - else: - visium_tissue_positions_file = None # scalefactor if "visium_scalefactors_file" in caf.columns: if pd.notna(caf["visium_scalefactors_file"][nn]): - visium_scalefactors_file= caf["visium_scalefactors_file"][nn] - else: - visium_scalefactors_file = None + visium_scalefactors_file= caf["visium_scalefactors_file"][nn] else: spatial_path= None spatial_filetype = None From 775e3346267d34306cf0f1594942ab4e7b7a3179 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 10 Feb 2025 10:39:39 +0100 Subject: [PATCH 52/57] use zip files --- .github/workflows/spatial_preprocess-ci.yml | 64 +++++---------------- 1 file changed, 15 insertions(+), 49 deletions(-) diff --git a/.github/workflows/spatial_preprocess-ci.yml b/.github/workflows/spatial_preprocess-ci.yml index fee80bce..ce322ee6 100644 --- a/.github/workflows/spatial_preprocess-ci.yml +++ b/.github/workflows/spatial_preprocess-ci.yml @@ -55,60 +55,21 @@ jobs: - name: Preparing the data run: | - mkdir spatial spatial/ingestion spatial/ingestion/data - cd spatial/ingestion/data - mkdir V1_Human_Heart V1_Human_Lymph_Node - cd V1_Human_Heart - curl -O https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Heart/V1_Human_Heart_filtered_feature_bc_matrix.h5 - curl -O https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Heart/V1_Human_Heart_spatial.tar.gz - tar -xf V1_Human_Heart_spatial.tar.gz - cd ../V1_Human_Lymph_Node - curl -O https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5 - curl -O https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_spatial.tar.gz - tar -xf V1_Human_Lymph_Node_spatial.tar.gz + mkdir spatial spatial/preprocess spatial/preprocess/data + cd spatial/preprocess/data + + curl -L -o V1_Human_Heart_unfilt.zarr.zip https://figshare.com/ndownloader/files/52236521 + unzip V1_Human_Heart_unfilt.zarr.zip + rm V1_Human_Heart_unfilt.zarr.zip + curl -L -o V1_Human_Lymph_Node_unfilt.zarr.zip https://figshare.com/ndownloader/files/52236575 + unzip V1_Human_Lymph_Node_unfilt.zarr.zip + rm V1_Human_Lymph_Node_unfilt.zarr.zip + # Note: we run the following to test that the commands works - name: Preparing the configuration file shell: bash -el {0} run: | - cd spatial/ingestion - panpipes qc_spatial config - - - name: Preparing the submission file - run: | - cd spatial/ingestion - curl -o sample_file_qc_visium.txt https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_visium_data/sample_file_qc_visium.txt - - name: Preparing the yaml file - run: | - cd spatial/ingestion - curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/sarah_spatialData/docs/ingesting_visium_data/pipeline.yml - - - name: File tree - if: env.debug == 'true' - run: tree spatial/ingestion - - - name: Review pipeline tasks - shell: bash -el {0} - run: | - cd spatial/ingestion - panpipes qc_spatial show full --local - - - name: Run pipeline tasks - shell: bash -el {0} - run: | - cd spatial/ingestion - panpipes qc_spatial make full --local - - - name: File tree - if: env.debug == 'true' - run: tree spatial/ingestion - - - # Note: we run the following to test that the commands works - - name: Preparing the configuration file - shell: bash -el {0} - run: | - mkdir spatial/preprocess cd spatial/preprocess panpipes preprocess_spatial config @@ -117,6 +78,11 @@ jobs: cd spatial/preprocess curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes-tutorials/main/docs/preprocess_spatial_data/pipeline.yml + - name: Replace template contents in configuration file + run: | + cd spatial/preprocess + sed -i 's+../ingestion/qc.data/+./data/+g' pipeline.yml + - name: File tree if: env.debug == 'true' run: tree spatial/preprocess From 66890d4e61f03f189b79f920ca29f992e8a6d6db Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 10 Feb 2025 10:52:12 +0100 Subject: [PATCH 53/57] change function and parameter names --- .../panpipes/pipeline_preprocess_spatial.py | 20 ++++++++--------- panpipes/panpipes/pipeline_qc_spatial.py | 16 +++++++------- panpipes/python_scripts/plot_qc_spatial.py | 12 +++++----- panpipes/python_scripts/run_filter_spatial.py | 22 +++++++++---------- .../python_scripts/run_preprocess_spatial.py | 20 ++++++++--------- 5 files changed, 45 insertions(+), 45 deletions(-) diff --git a/panpipes/panpipes/pipeline_preprocess_spatial.py b/panpipes/panpipes/pipeline_preprocess_spatial.py index d7a4053a..dbed8c9c 100644 --- a/panpipes/panpipes/pipeline_preprocess_spatial.py +++ b/panpipes/panpipes/pipeline_preprocess_spatial.py @@ -52,7 +52,7 @@ def gen_filter_jobs(): @mkdir("tables") @mkdir("filtered.data") @files(gen_filter_jobs) -def filter_mudata(infile_path,outfile): +def filter_spatialdata(infile_path,outfile): print('processing file = %s' % str(infile_path)) log_file = os.path.basename(outfile) log_file= "1_filtering."+log_file.replace("filtered.zarr","") + ".log" @@ -61,15 +61,15 @@ def filter_mudata(infile_path,outfile): filter_dict = dictionary_stripper(PARAMS['filtering']) cmd = """ python %(py_path)s/run_filter_spatial.py - --input_mudata %(infile_path)s - --output_mudata %(outfile)s + --input_spatialdata %(infile_path)s + --output_spatialdata %(outfile)s --filter_dict "%(filter_dict)s" """ if PARAMS['filtering_keep_barcodes'] is not None: cmd += " --keep_barcodes %(filtering_keep_barcodes)s" cmd += " > logs/%(log_file)s " job_kwargs["job_threads"] = PARAMS['resources_threads_low'] - log_msg = f"TASK: 'filter_mudata'" + f" IN CASE OF ERROR, PLEASE REFER TO : 'logs/{log_file}' FOR MORE INFORMATION." + log_msg = f"TASK: 'filter_spatialdata'" + f" IN CASE OF ERROR, PLEASE REFER TO : 'logs/{log_file}' FOR MORE INFORMATION." get_logger().info(log_msg) P.run(cmd, **job_kwargs) @@ -84,7 +84,7 @@ def run_plotqc_query(pqc_dict): @active_if(run_plotqc_query(PARAMS['plotqc'])) @active_if(PARAMS['filtering_run']) -@transform(filter_mudata, +@transform(filter_spatialdata, regex("./filtered.data/(.*)_filtered.zarr"), r"./logs/2_postfilterplot.\1.log") def postfilterplot_spatial(filt_file,log_file): @@ -93,7 +93,7 @@ def postfilterplot_spatial(filt_file,log_file): spatial_filetype = PARAMS["assay"] cmd = """ python %(py_path)s/plot_qc_spatial.py - --input_mudata %(filt_file)s + --input_spatialdata %(filt_file)s --spatial_filetype %(spatial_filetype)s --figdir ./figures/spatial """ @@ -108,7 +108,7 @@ def postfilterplot_spatial(filt_file,log_file): P.run(cmd, **job_kwargs) -@transform(filter_mudata, +@transform(filter_spatialdata, regex("./filtered.data/(.*)_filtered.zarr"), r"./logs/3_preprocess.\1.log") def spatial_preprocess(filt_file,log_file): @@ -119,8 +119,8 @@ def spatial_preprocess(filt_file,log_file): write_output = os.path.join("./tmp/",os.path.basename(filt_file)) cmd = """ python %(py_path)s/run_preprocess_spatial.py - --input_mudata %(filt_file)s - --output_mudata %(write_output)s + --input_spatialdata %(filt_file)s + --output_spatialdata %(write_output)s --figdir ./figures/spatial """ if PARAMS['spatial_norm_hvg_flavour'] is not None: @@ -154,7 +154,7 @@ def spatial_preprocess(filt_file,log_file): get_logger().info(log_msg) P.run(cmd, **job_kwargs) -@follows(filter_mudata, postfilterplot_spatial, spatial_preprocess) +@follows(filter_spatialdata, postfilterplot_spatial, spatial_preprocess) @originate("cleanup_done.txt") def cleanup(file): # remove any ctmp fails diff --git a/panpipes/panpipes/pipeline_qc_spatial.py b/panpipes/panpipes/pipeline_qc_spatial.py index 3bc90556..6342e8f7 100644 --- a/panpipes/panpipes/pipeline_qc_spatial.py +++ b/panpipes/panpipes/pipeline_qc_spatial.py @@ -56,7 +56,7 @@ def set_up_dirs(log_file): pass # ----------------------------------------------------------------------------------------------- -## Creating h5mu from filtered data files +## Creating spatialData from filtered data files # ----------------------------------------------------------------------------------------------- @@ -73,7 +73,7 @@ def gen_load_spatial_anndata_jobs(): @follows(mkdir("logs")) @follows(mkdir("tmp")) @files(gen_load_spatial_anndata_jobs) -def load_mudatas(spatial_path, outfile, +def load_spatialdatas(spatial_path, outfile, sample_id, spatial_filetype, visium_feature_bc_matrix, visium_fullres_image_file, visium_tissue_positions_file, visium_scalefactors_file, vpt_cell_by_gene, vpt_cell_metadata, vpt_cell_boundaries): @@ -119,19 +119,19 @@ def load_mudatas(spatial_path, outfile, --vpt_cell_metadata %(vpt_cell_metadata)s --vpt_cell_boundaries %(vpt_cell_boundaries)s """ - cmd += " > logs/1_make_mudatas_%(sample_id)s.log" + cmd += " > logs/1_make_spatialdatas_%(sample_id)s.log" job_kwargs["job_threads"] = PARAMS['resources_threads_medium'] - log_msg = f"TASK: 'load_mudatas'" + f" IN CASE OF ERROR, PLEASE REFER TO : 'logs/1_make_mudatas_{sample_id}.log' FOR MORE INFORMATION." + log_msg = f"TASK: 'load_spatialdatas'" + f" IN CASE OF ERROR, PLEASE REFER TO : 'logs/1_make_spatialdatas_{sample_id}.log' FOR MORE INFORMATION." get_logger().info(log_msg) P.run(cmd, **job_kwargs) -@follows(load_mudatas) +@follows(load_spatialdatas) @follows(mkdir("qc.data")) @follows(mkdir("./figures")) -@transform(load_mudatas, +@transform(load_spatialdatas, regex("./tmp/(.*)_raw.zarr"), r"./logs/2_spatialQC_\1.log") def spatialQC(infile,log_file): @@ -179,7 +179,7 @@ def run_plotqc_query(pqc_dict): @follows(spatialQC) @follows(mkdir("./figures/spatial")) @active_if(run_plotqc_query(PARAMS['plotqc'])) -@transform(load_mudatas, +@transform(load_spatialdatas, regex("./tmp/(.*)_raw.zarr"), r"./logs/3_qcplot.\1.log") def plotQC_spatial(unfilt_file,log_file): @@ -188,7 +188,7 @@ def plotQC_spatial(unfilt_file,log_file): unfilt_file = unfilt_file.replace("tmp", "qc.data") cmd = """ python %(py_path)s/plot_qc_spatial.py - --input_mudata %(unfilt_file)s + --input_spatialdata %(unfilt_file)s --spatial_filetype %(spatial_filetype)s --figdir ./figures/spatial """ diff --git a/panpipes/python_scripts/plot_qc_spatial.py b/panpipes/python_scripts/plot_qc_spatial.py index 558219c0..205bf74a 100644 --- a/panpipes/python_scripts/plot_qc_spatial.py +++ b/panpipes/python_scripts/plot_qc_spatial.py @@ -28,8 +28,8 @@ parser = argparse.ArgumentParser() -parser.add_argument("--input_mudata", - default="mudata_unfilt.h5mu", +parser.add_argument("--input_spatialdata", + default="spatialdata_unfilt.h5mu", help="") parser.add_argument("--figdir", default="./figures/", @@ -58,12 +58,12 @@ sc.settings.figdir = figdir sc.set_figure_params(scanpy=True, fontsize=14, dpi=300, facecolor='white', figsize=(5,5)) -L.info("Reading in SpatialData from '%s'" % args.input_mudata) -sdata = sd.read_zarr(args.input_mudata) -#mdata = mu.read(args.input_mudata) +L.info("Reading in SpatialData from '%s'" % args.input_spatialdata) +sdata = sd.read_zarr(args.input_spatialdata) +#mdata = mu.read(args.input_spatialdata) #spatial = mdata.mod['spatial'] -input_data = os.path.basename(args.input_mudata) +input_data = os.path.basename(args.input_spatialdata) pattern = r"_filtered.zarr" match = re.search(pattern, input_data) if match is None: diff --git a/panpipes/python_scripts/run_filter_spatial.py b/panpipes/python_scripts/run_filter_spatial.py index 733c8cad..c3a05c91 100644 --- a/panpipes/python_scripts/run_filter_spatial.py +++ b/panpipes/python_scripts/run_filter_spatial.py @@ -42,10 +42,10 @@ def test_matching_df_ignore_cat(new_df, old_df): # parse arguments parser = argparse.ArgumentParser() -parser.add_argument('--input_mudata', +parser.add_argument('--input_spatialdata', default='gut_minus1_amp.h5ad', help='') -parser.add_argument('--output_mudata', +parser.add_argument('--output_spatialdata', default='', help='') parser.add_argument('--filter_dict', @@ -53,7 +53,7 @@ def test_matching_df_ignore_cat(new_df, old_df): help='this is pull') # cross modalities args parser.add_argument('--keep_barcodes', default=None, - help='1 column list of barcodes to keep, note that they should match the mudata input, this filtering happens first') + help='1 column list of barcodes to keep, note that they should match the spatialdata input, this filtering happens first') # load options @@ -73,14 +73,14 @@ def test_matching_df_ignore_cat(new_df, old_df): filter_dict = dictionary_stripper(filter_dict) L.info("Filter dictionary:\n %s" %filter_dict) -# load mudata +# load spatialdata -L.info("Reading in SpatialData from '%s'" % args.input_mudata) -sdata = sd.read_zarr(args.input_mudata) -#mdata = mu.read(args.input_mudata) +L.info("Reading in SpatialData from '%s'" % args.input_spatialdata) +sdata = sd.read_zarr(args.input_spatialdata) +#mdata = mu.read(args.input_spatialdata) #if isinstance(mdata, AnnData): -# raise TypeError("Input '%s' should be of MuData format, not Anndata" % args.input_mudata) +# raise TypeError("Input '%s' should be of spatialdata format, not Anndata" % args.input_spatialdata) orig_obs = sdata["table"].obs.copy() @@ -147,7 +147,7 @@ def test_matching_df_ignore_cat(new_df, old_df): assert test_matching_df_ignore_cat(sdata["table"].obs, orig_obs) # write out obs -output_prefix = re.sub(".zarr", "", os.path.basename(args.output_mudata)) +output_prefix = re.sub(".zarr", "", os.path.basename(args.output_spatialdata)) L.info("Saving updated obs in a metadata tsv file to './tables/" + output_prefix + "_filtered_cell_metadata.tsv'") write_obs(sdata["table"], output_prefix=os.path.join("tables/",output_prefix), output_suffix="_filtered_cell_metadata.tsv") @@ -166,8 +166,8 @@ def test_matching_df_ignore_cat(new_df, old_df): #mdata.update() -L.info("Saving updated SpatialData to '%s'" % args.output_mudata) -sdata.write(args.output_mudata) +L.info("Saving updated SpatialData to '%s'" % args.output_spatialdata) +sdata.write(args.output_spatialdata) L.info("Done") diff --git a/panpipes/python_scripts/run_preprocess_spatial.py b/panpipes/python_scripts/run_preprocess_spatial.py index 5e389e04..250057ec 100644 --- a/panpipes/python_scripts/run_preprocess_spatial.py +++ b/panpipes/python_scripts/run_preprocess_spatial.py @@ -32,11 +32,11 @@ parser = argparse.ArgumentParser() -parser.add_argument("--input_mudata", - default="mudata_unfilt.h5mu", +parser.add_argument("--input_spatialdata", + default="spatialdata_unfilt.h5mu", help="") -parser.add_argument("--output_mudata", - default="mudata_unfilt.h5mu", +parser.add_argument("--output_spatialdata", + default="spatialdata_unfilt.h5mu", help="") parser.add_argument("--figdir", default="./figures/", @@ -89,12 +89,12 @@ sc.settings.figdir = figdir sc.set_figure_params(scanpy=True, fontsize=14, dpi=300, facecolor='white', figsize=(5,5)) -L.info("Reading in SpatialData from '%s'" % args.input_mudata) -sdata = sd.read_zarr(args.input_mudata) -#mdata = mu.read(args.input_mudata) +L.info("Reading in SpatialData from '%s'" % args.input_spatialdata) +sdata = sd.read_zarr(args.input_spatialdata) +#mdata = mu.read(args.input_spatialdata) #spatial = mdata.mod['spatial'] -input_data = os.path.basename(args.input_mudata) +input_data = os.path.basename(args.input_spatialdata) pattern = r"_filtered.zarr" match = re.search(pattern, input_data) sprefix = input_data[:match.start()] @@ -174,8 +174,8 @@ #mdata.update() -L.info("Saving updated SpatialData to '%s'" % args.output_mudata) -sdata.write(args.output_mudata) +L.info("Saving updated SpatialData to '%s'" % args.output_spatialdata) +sdata.write(args.output_spatialdata) L.info("Done") From 33259c3afcc5491c0042b0d06c6121835c3a19af Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 10 Feb 2025 10:55:22 +0100 Subject: [PATCH 54/57] decrease font size --- docs/usage/setup_for_spatial_workflows.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/usage/setup_for_spatial_workflows.md b/docs/usage/setup_for_spatial_workflows.md index 2959dd9f..456d519f 100644 --- a/docs/usage/setup_for_spatial_workflows.md +++ b/docs/usage/setup_for_spatial_workflows.md @@ -30,7 +30,7 @@ The following 4 columns are **optional**: **visium_scalefactors_file**: Path to the scalefactors file. Corresponds to the `scalefactors_file` parameter of [spatialdata_io.visium](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.visium.html) -#### [Example submission file](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_visium_data/sample_file_qc_visium.txt) +##### [Example submission file](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_visium_data/sample_file_qc_visium.txt) ## Vizgen @@ -49,7 +49,7 @@ The following 3 columns are **optional**: **vpt_cell_boundaries**: The file name of the output of the vizgen-postprocessing-tool. See [spatialdata_io.merscope](https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.merscope.html) -#### Example submission files [MERFISH](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_merfish_data/sample_file_qc_merfish.txt) [MERSCOPE](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_merscope_data/sample_file_qc_merscope.txt) +##### Example submission files [MERFISH](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_merfish_data/sample_file_qc_merfish.txt) [MERSCOPE](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_merscope_data/sample_file_qc_merscope.txt) ## Xenium @@ -58,7 +58,7 @@ The 3 columns of the Xenium sample submission file are: sample_id | spatial_path | spatial_filetype | ----------|----------|------------ -#### [Example submission file](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_xenium_data/sample_file_qc_xenium.txt) +##### [Example submission file](https://github.com/DendrouLab/panpipes-tutorials/blob/sarah_spatialData/docs/ingesting_xenium_data/sample_file_qc_xenium.txt) From b6844b560e4da46eb7890f56222e791902f623c5 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 10 Feb 2025 13:05:22 +0100 Subject: [PATCH 55/57] add changes to changelog --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ac8d32d..4baadcb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,24 @@ ## [latest] +### added +- moved from MuData to SpatialData +- xenium ingestion & ingest_xenium GitHub action +- `export_gene_by_spot` for Cell2Location +- separate sample submission files for the different spatial technologies +- separate GitHub actions for Cell2Location & Tangram +- separate GitHub actions for MERSCOPE & MERFISH + + +### fixed + + +### dependencies +- pinned "spatialdata==0.2.6", "spatialdata-io==0.1.6", "dask==2024.12.1" as temporary fix + + +## v1.1.0 + ### added ### fixed From bbf99ccf7f5f89516f7a7f10e791fdba5ce14bab Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Wed, 12 Feb 2025 15:26:14 +0100 Subject: [PATCH 56/57] update comment --- panpipes/python_scripts/make_spatialData_from_csv.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/panpipes/python_scripts/make_spatialData_from_csv.py b/panpipes/python_scripts/make_spatialData_from_csv.py index d6c85ac6..d9d8a02e 100644 --- a/panpipes/python_scripts/make_spatialData_from_csv.py +++ b/panpipes/python_scripts/make_spatialData_from_csv.py @@ -13,11 +13,8 @@ import os from pathlib import Path """ -this script copies the make_adata_from_csv.py that creates -ONE MUDATA PER SAMPLE, with in each ONE LAYER per modality -for cell-suspension, saves them to temp. -concatenation of the mudatas saved in tmp happens -in the concat_anndata.py script +This script is an adjustment of the make_adata_from_csv.py. It creates +ONE SPATIALDATA PER SAMPLE and saves them to temp. """ import sys From a96ea4ca5806bda45c56ce375f38d43e8191c601 Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Wed, 12 Feb 2025 15:34:31 +0100 Subject: [PATCH 57/57] update logging info --- panpipes/python_scripts/run_cell2location.py | 2 +- panpipes/python_scripts/run_tangram.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/python_scripts/run_cell2location.py b/panpipes/python_scripts/run_cell2location.py index 7cb427d8..9be32a1d 100644 --- a/panpipes/python_scripts/run_cell2location.py +++ b/panpipes/python_scripts/run_cell2location.py @@ -207,7 +207,7 @@ #1. read in the data #spatial: -L.info("Reading in spatial SpatialData from '%s'" % args.input_spatial) +L.info("Reading in SpatialData from '%s'" % args.input_spatial) sdata_st = sd.read_zarr(args.input_spatial) #mdata_spatial = mu.read(args.input_spatial) #adata_st = mdata_spatial.mod['spatial'] diff --git a/panpipes/python_scripts/run_tangram.py b/panpipes/python_scripts/run_tangram.py index 28eaeb0d..6f545771 100644 --- a/panpipes/python_scripts/run_tangram.py +++ b/panpipes/python_scripts/run_tangram.py @@ -101,7 +101,7 @@ #1. read in the data #spatial: -L.info("Reading in spatial SpatialData from '%s'" % args.input_spatial) +L.info("Reading in SpatialData from '%s'" % args.input_spatial) sdata_st = sd.read_zarr(args.input_spatial) #mdata_spatial = mu.read(args.input_spatial) #adata_st = mdata_spatial.mod['spatial']