Skip to content

Commit

Permalink
add log info
Browse files Browse the repository at this point in the history
  • Loading branch information
SarahOuologuem committed Apr 18, 2024
1 parent fcc0475 commit de6ae27
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 140 deletions.
4 changes: 2 additions & 2 deletions panpipes/python_scripts/batch_correct_bbknn.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
sc.tl.pca(adata, n_comps=min(50,adata.var.shape[0]-1), svd_solver='arpack', random_state=0)

if "X_pca" not in adata.obsm:
L.info("X_pca could not be found in adata.obsm. Computing PCA with default parameters.")
L.warning("X_pca could not be found in adata.obsm. Computing PCA with default parameters.")
n_pcs = 50
if adata.var.shape[0] < n_pcs:
L.info("You have less features than number of PCs you intend to calculate")
Expand Down Expand Up @@ -103,7 +103,7 @@
n_pcs = int(args.neighbors_n_pcs),
neighbors_within_batch=nnb) # calculates the neighbours

L.info("Calculating UMAP")
L.info("Computing UMAP")
sc.tl.umap(adata)

# write out
Expand Down
31 changes: 15 additions & 16 deletions panpipes/python_scripts/batch_correct_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,12 @@

args, opt = parser.parse_known_args()


L.info("reading data and starting integration pipeline with script: ")
L.info(os.path.basename(__file__))
L.info("Running with options: %s", args)
L.info(args)

# this should be an object that contains the full log normalised data (adata_log1p.h5ad)
# prior to hvgs and filtering
#adata = read_anndata(args.input_anndata, use_muon=use_muon, modality=args.modality)
L.info("Reading in MuData from '%s'" % args.input_anndata)
mdata = mu.read(args.input_anndata)
adata = mdata.mod[args.modality]

Expand All @@ -76,29 +74,31 @@
dimred = "X_lsi"

if dimred not in adata.obsm:
L.info("i need a dimred to start, computing pca with default param")
L.warning("Dimred '%s' could not be found in adata.obsm. Computing PCA with default parameters." % dimred)
dimred = "X_pca"
n_pcs = 50
if adata.var.shape[0] < n_pcs:
L.info("You have less features than number of PCs you intend to calculate")
n_pcs = adata.var.shape[0] - 1
L.info("Setting n PCS to %i" % int(n_pcs))
L.info("Setting n PCS to %i" % int(n_pcs))
L.info("Scaling data")
sc.pp.scale(adata)
L.info("Computing PCA")
sc.tl.pca(adata, n_comps=n_pcs,
svd_solver='arpack',
random_state=0)



if len(columns)>1:
L.info("using 2 columns to integrate on more variables")
L.info("Using 2 columns to integrate on more variables")
#comb_columns = "_".join(columns)
adata.obs["comb_columns"] = adata.obs[columns].apply(lambda x: '|'.join(x), axis=1)

# make sure that batch is a categorical
adata.obs["comb_columns"] = adata.obs["comb_columns"].astype("category")
# run harmony

L.info("Running Harmony")
ho = hm.run_harmony(adata.obsm[dimred][:,0:int(args.harmony_npcs)], adata.obs, ["comb_columns"],
sigma = float(args.sigma_val),theta = float(args.theta_val),verbose=True,max_iter_kmeans=30,
max_iter_harmony=40)
Expand All @@ -107,6 +107,7 @@
# make sure that batch is a categorical
adata.obs[args.integration_col] = adata.obs[args.integration_col].astype("category")
# run harmony
L.info("Running Harmony")
ho = hm.run_harmony(adata.obsm[dimred][:,0:int(args.harmony_npcs)],
adata.obs,
[args.integration_col],
Expand All @@ -116,18 +117,17 @@
max_iter_harmony=40)


L.info("integration run now calculate umap")

L.info("Saving harmony co-ords to .obsm['X_harmony']")
adjusted_pcs = pd.DataFrame(ho.Z_corr).T
adata.obsm['X_harmony']=adjusted_pcs.values
L.info("harmony co-ords derived")

if int(args.neighbors_n_pcs) >adata.obsm['X_harmony'].shape[1]:
L.warn(f"N PCs is larger than X_harmony dimensions, reducing n PCs to {adata.obsm['X_harmony'].shape[1] -1}")

n_pcs= min(int(args.neighbors_n_pcs), adata.obsm['X_harmony'].shape[1]-1)

# run neighbours and umap
L.info("Computing neighbors")
run_neighbors_method_choice(adata,
method=args.neighbors_method,
n_neighbors=int(args.neighbors_k),
Expand All @@ -137,10 +137,11 @@
nthreads=max([threads_available, 6]))


L.info("done n_neighbours, moving to umap")
L.info("Computing UMAP")
sc.tl.umap(adata)
L.info("done umap, saving stuff")

# write out
L.info("Saving UMAP coordinates to csv file '%s" % args.output_csv)
umap = pd.DataFrame(adata.obsm['X_umap'], adata.obs.index)
umap.to_csv(args.output_csv)

Expand All @@ -149,11 +150,9 @@


outfiletmp = ("tmp/harmony_scaled_adata_" + args.modality + ".h5ad" )

L.info("saving harmony corrected adata")
L.info("Saving AnnData to '%s'" % outfiletmp)
write_anndata(adata, outfiletmp, use_muon=False, modality=args.modality)


L.info("Done")


26 changes: 14 additions & 12 deletions panpipes/python_scripts/batch_correct_mofa.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,30 +68,31 @@
threads_available = multiprocessing.cpu_count()
params = pp.io.read_yaml("pipeline.yml")

L.info("Reading in MuData from '%s'" % args.scaled_anndata)
mdata = mu.read(args.scaled_anndata)

if params['multimodal']['mofa']['modalities'] is not None:
modalities= params['multimodal']['mofa']['modalities']
modalities = [x.strip() for x in modalities.split(",")]
L.info(f"using modalities :{modalities}")
L.info(f"Using modalities :{modalities}")
removed_mods = None
if all(x in modalities for x in mdata.mod.keys()):
tmp = mdata.copy()
L.info('using all modalities')
L.info('Using all modalities')
else:
tmp = mdata.copy()
removed_mods = list(set(mdata.mod.keys()) - set(modalities))
L.info(f"removing modalities {removed_mods}")
L.info(f"Removing modalities {removed_mods}")
for rmod in removed_mods:
del tmp.mod[rmod]
else:
L.warning("""you specified no modalities, so i will default to all available
this may be a problem if you have repertoire in here""")
L.warning("""No modalities were specified, therefore all available modalities will be used.
This may be problematic if repertoire is present as modality.""")
removed_mods = None
tmp = mdata.copy()

print(tmp.mod.keys())

L.info("Intersecting modality obs before running mofa")
mu.pp.intersect_obs(tmp)

mofa_kwargs={}
Expand All @@ -111,7 +112,6 @@
del mofa_kwargs['filter_by_hvg']
for mod in tmp.mod.keys():
if "highly_variable" not in tmp[mod].var.columns:
print(mod)
tmp[mod].var["highly_variable"] = True

tmp.update()
Expand Down Expand Up @@ -148,20 +148,19 @@
if args.use_gpu is True or args.use_gpu=='True':
mofa_kwargs['gpu_mode'] = True

L.info(mofa_kwargs)
L.info(tmp.var.columns)


L.info("Running mofa")
mu.tl.mofa(tmp, **mofa_kwargs)
#This adds X_mofa embeddings to the .obsm slot of the MuData object
#write the discovered latent rep to the original mudata object
L.info("Adding X_mofa to .obsm of MuData")
mdata.obsm['X_mofa'] = tmp.obsm['X_mofa'].copy()

if int(args.neighbors_n_pcs) > mdata.obsm['X_mofa'].shape[1]:
L.warning(f"N PCs is larger than X_mofa dimensions, reducing n PCs to {mdata.obsm['X_mofa'].shape[1]-1}")
n_pcs= min(int(args.neighbors_n_pcs), mdata.obsm['X_mofa'].shape[1]-1)


L.info("Computing neighbors")
run_neighbors_method_choice(mdata,
method=args.neighbors_method,
n_neighbors=int(args.neighbors_k),
Expand All @@ -170,18 +169,21 @@
use_rep='X_mofa',
nthreads=max([threads_available, 6]))

L.info("Computing UMAP")
sc.tl.umap(mdata, min_dist=0.4)
L.info("Computing Leiden clustering")
sc.tl.leiden(mdata, key_added="leiden_mofa")

L.info("Saving UMAP coordinates to csv file '%s" % args.output_csv)
umap = pd.DataFrame(mdata.obsm['X_umap'], mdata.obs.index)
umap.to_csv(args.output_csv)

if removed_mods is not None:
for rmd in removed_mods:
tmp.mod[rmd] = mdata.mod[rmd].copy()

L.info("Saving MuData to 'tmp/mofa_scaled_adata.h5mu'")
tmp.write("tmp/mofa_scaled_adata.h5mu")


L.info("Done")

Loading

0 comments on commit de6ae27

Please sign in to comment.