Skip to content

Commit e3de2cf

Browse files
author
SarahOuologuem
committed
add log info
1 parent 7b5ad47 commit e3de2cf

File tree

9 files changed

+114
-81
lines changed

9 files changed

+114
-81
lines changed

panpipes/R_scripts/plotclustree.R

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,14 @@ opt <- parse_args(OptionParser(option_list=option_list))
2424
# 2. name columns
2525
# 3. run clustree
2626

27-
message("Running with options:")
28-
29-
print(opt)
3027

3128
# # run clustree
3229
m = readr::read_tsv(opt$infile)
3330
# this is a little dodge, but works ;)
3431
example_column=colnames(m)[2]
3532
col_prefix=substr(example_column, 1, nchar(example_column)-3 )
3633
# run clustree
34+
print("Running Clustree")
3735
gg <- clustree(m, prefix =col_prefix) + ggtitle(opt$plot_title)
3836

3937

@@ -42,6 +40,7 @@ if (!(dir.exists(dirname(opt$outfile)))){
4240
}
4341

4442
# save
43+
print("Saving Clustree")
4544
ggsave(gg, filename=opt$outfile, height=10,width=12, type="cairo")
4645

47-
message("clustree done")
46+
print("Done")

panpipes/python_scripts/aggregate_csvs.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
formatter = logging.Formatter('%(asctime)s: %(levelname)s - %(message)s')
1414
log_handler.setFormatter(formatter)
1515
L.addHandler(log_handler)
16-
L.debug("test logging message")
16+
1717

1818
# parse arguments
1919
parser = argparse.ArgumentParser()
@@ -36,17 +36,20 @@
3636

3737
infiles = re.split(',', args.input_files_str)
3838
if args.clusters_or_markers == "clusters":
39+
L.info("Aggregating cluster columns")
3940
combined_csv = pd.concat([pd.read_csv(f, sep='\t', index_col=0) for f in infiles], axis=1)
4041
# get colnames
4142
cnames = []
4243
for f in infiles:
4344
alg = extract_parameter_from_fname(f, 'alg', prefix=args.sample_prefix)
4445
res = extract_parameter_from_fname(f, 'res', prefix=args.sample_prefix)
4546
cnames.append(alg + '_res_' + str(res))
47+
L.info("Saving combined cluster columns to tsv file '%s'" % args.output_file)
4648
combined_csv.to_csv(args.output_file, sep='\t', header=cnames, index=True)
4749

4850

4951
if args.clusters_or_markers == "markers":
52+
L.info("Aggregating marker files")
5053
li = []
5154
all_markers_file = re.sub("_top", "_all", args.output_file)
5255
excel_file = re.sub("_top.txt.gz", "_all.xlsx", args.output_file)
@@ -69,6 +72,7 @@
6972
frame.to_csv(all_markers_file, sep='\t', header=True, index=False)
7073
frame_sub = frame[frame['p.adj.bonferroni'] < 0.05]
7174
frame_sub = frame_sub[frame_sub['avg_logFC'] > 0]
75+
L.info("Saving combined marker files to tsv file '%s'" % args.output_file)
7276
frame_sub.to_csv(args.output_file, sep='\t', header=True, index=False)
7377

7478

panpipes/python_scripts/collate_mdata.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,13 @@
3131
default="mdata.h5mu",
3232
help="file name, format: .h5mu")
3333
args, opt = parser.parse_known_args()
34+
3435
L.info(args)
36+
37+
L.info("Reading in MuData from '%s'" % args.input_mudata)
3538
mdata = mu.read(args.input_mudata)
3639

37-
L.info("loading clusters")
40+
L.info("Reading in cluster information")
3841
cf = pd.read_csv(args.clusters_files_csv)
3942

4043
if isinstance(mdata, MuData):
@@ -44,7 +47,7 @@
4447
if len(mds)>1:
4548
sys.exit("You have clustered multiple modalities but are providing only a unimodal anndata")
4649
else:
47-
L.warn("found one modality, converting to mudata: %s " % mds[0] )
50+
L.warn("Found one modality, converting to mudata: %s " % mds[0] )
4851
tmp = MuData({mds[0]:mdata})
4952
del mdata
5053
mdata = tmp
@@ -53,7 +56,7 @@
5356
# add in the clusters
5457

5558

56-
59+
L.info("Adding cluster information to MuData")
5760
for i in range(cf.shape[0]):
5861
cf_df = pd.read_csv(cf['fpath'][i], sep='\t', index_col=0)
5962
cf_df['clusters'] = cf_df['clusters'].astype('str').astype('category')
@@ -64,8 +67,9 @@
6467
else:
6568
mdata.obs = mdata.obs.merge(cf_df, left_index=True, right_index=True)
6669

67-
uf = pd.read_csv(args.umap_files_csv)
6870

71+
L.info("Adding UMAP coordinates to MuData")
72+
uf = pd.read_csv(args.umap_files_csv)
6973

7074
for i in range(uf.shape[0]):
7175
uf_df = pd.read_csv(uf['fpath'][i], sep='\t', index_col=0)
@@ -75,17 +79,22 @@
7579
if all(mdata[mod].obs_names == uf_df.index):
7680
mdata[mod].obsm[new_key] = uf_df.to_numpy()
7781
else:
78-
L.warn("cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] )
82+
L.warn("Cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] )
7983
else:
8084
# check the observations are the same
8185
if set(mdata.obs_names).difference(uf_df.index) == set():
8286
# put the observations in the same order
8387
uf_df = uf_df.loc[mdata.obs_names,:]
8488
mdata.obsm[new_key] = uf_df.to_numpy()
8589
else:
86-
L.warning("cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] )
90+
L.warning("Cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] )
91+
8792

93+
L.info("Saving updated MuData to '%s'" % args.output_mudata)
8894
mdata.write(args.output_mudata)
89-
mdata.obs.to_csv(re.sub(".h5mu", "_cell_metdata.tsv", args.output_mudata), sep='\t')
9095

91-
L.info("done")
96+
output_csv = re.sub(".h5mu", "_cell_metdata.tsv", args.output_mudata)
97+
L.info("Saving metadata to '%s'" % output_csv)
98+
mdata.obs.to_csv(output_csv, sep='\t')
99+
100+
L.info("Done")

panpipes/python_scripts/plot_cluster_umaps.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
help="list of modalities to search for UMAPs in")
3434
args, opt = parser.parse_known_args()
3535

36-
L.info("running with:")
3736
L.info(args)
3837
# args = argparse.Namespace(infile='mdata_clustered.h5mu', figdir=None)
3938
# ---------
@@ -42,11 +41,11 @@ def main(adata,figdir):
4241
# get all possible umap coords
4342
pattern="X_umap(.*)"
4443
obsm_keys = [x for x in adata.obsm.keys() if re.search(pattern, x)]
45-
L.info("Umap keys founds %s" % obsm_keys)
46-
# get all possible clustersclusters
44+
L.info("UMAP keys found: %s" % obsm_keys)
45+
# get all possible clusters
4746
pattern=re.compile(r'^leiden|^louvain')
4847
cluster_keys = [x for x in adata.obs.columns if re.search(pattern, x)]
49-
L.info("Cluster keys founds %s" % cluster_keys)
48+
L.info("Cluster keys found: %s" % cluster_keys)
5049
if len(obsm_keys) == 0 or len(cluster_keys) == 0:
5150
return
5251

@@ -55,22 +54,24 @@ def main(adata,figdir):
5554
adata.obs[ck] = adata.obs[ck].astype('category')
5655
# plot all the umaps
5756
for ok in obsm_keys:
57+
L.info("Plotting UMAP on %s coloured by %s" % (ok, cluster_keys))
5858
fig = sc.pl.embedding(adata, basis = ok,color=cluster_keys,
5959
show=False, return_fig=True, legend_loc='on data')
6060
for ax in fig.axes:
6161
ax.set(xlabel="UMAP_1", ylabel="UMAP_2")
6262
fig.suptitle(ok, y=1.0)
63+
L.info("Saving figure to '%s'" % os.path.join(figdir, ok + "_clusters.png"))
6364
fig.savefig(os.path.join(figdir, ok + "_clusters.png"))
6465

6566
def plot_spatial(adata,figdir):
6667
# get all possible umap coords
6768
pattern="spatial(.*)"
6869
obsm_keys = [x for x in adata.obsm.keys() if re.search(pattern, x)]
69-
L.info("Umap keys founds %s" % obsm_keys)
70+
L.info("UMAP keys found: %s" % obsm_keys)
7071
# get all possible clustersclusters
7172
pattern=re.compile(r'^leiden|^louvain')
7273
cluster_keys = [x for x in adata.obs.columns if re.search(pattern, x)]
73-
L.info("Cluster keys founds %s" % cluster_keys)
74+
L.info("Cluster keys found: %s" % cluster_keys)
7475
if len(obsm_keys) == 0 or len(cluster_keys) == 0:
7576
return
7677

@@ -79,15 +80,18 @@ def plot_spatial(adata,figdir):
7980
adata.obs[ck] = adata.obs[ck].astype('category')
8081
# plot all the umaps
8182
for ok in obsm_keys:
83+
L.info("Plotting UMAP on %s coloured by %s" % (ok, cluster_keys))
8284
fig = sc.pl.embedding(adata, basis = ok,color=cluster_keys,
8385
show=False, return_fig=True, legend_loc='on data')
8486
for ax in fig.axes:
8587
ax.set(xlabel="spatial1", ylabel="spatial2")
8688
fig.suptitle(ok, y=1.0)
89+
L.info("Saving figure to '%s'" % os.path.join(figdir, ok + "_clusters.png"))
8790
fig.savefig(os.path.join(figdir, ok + "_clusters.png"))
8891

8992

90-
L.debug("load data")
93+
94+
L.info("Reading in MuData from '%s'" % args.infile)
9195
mdata = read(args.infile)
9296

9397
mods = args.modalities.split(',')
@@ -97,14 +101,15 @@ def plot_spatial(adata,figdir):
97101
if 'multimodal' in mods:
98102
if os.path.exists("multimodal/figures") is False:
99103
os.makedirs("multimodal/figures")
104+
L.info("Plotting multimodal figures")
100105
main(mdata, figdir="multimodal/figures")
101106

102107

103108
# we also need to plot per modality
104109
if type(mdata) is MuData:
105110
for mod in mdata.mod.keys():
106111
if mod in mods:
107-
L.info("plotting for modality: %s" % mod)
112+
L.info("Plotting for modality: %s" % mod)
108113
figdir = os.path.join(mod, "figures")
109114
if os.path.exists(figdir) is False:
110115
os.makedirs(figdir)
@@ -115,6 +120,4 @@ def plot_spatial(adata,figdir):
115120

116121

117122

118-
119-
120-
L.info('done')
123+
L.info('Done')

panpipes/python_scripts/plot_scanpy_markers.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,17 @@ def calc_dendrogram(adata, group_col):
7171

7272

7373
def do_plots(adata, mod, group_col, mf, n=10, layer=None):
74-
L.debug("check layers")
7574
# get markers for plotting
75+
L.info("Subsetting on markers with avg logFC > 0")
7676
mf = mf[mf['avg_logFC'] > 0]
77+
L.info("Extracting top markers for each cluster")
78+
mf['scores'] = pd.to_numeric(mf["scores"])
7779
df = mf.groupby(group_col).apply(lambda x: x.nlargest(n, ['scores'])).reset_index(drop=True)
7880
marker_list={str(k): list(v) for k,v in df.groupby(group_col)["gene"]}
7981
# add cluseter col to obs
8082
# check whether a dendrogram is computed/
8183
incl_dendrogram = calc_dendrogram(adata, group_col)
82-
L.info("start plotting")
84+
L.info("Plotting stacked violin")
8385
sc.pl.stacked_violin(adata,
8486
marker_list,
8587
groupby=group_col,
@@ -88,18 +90,21 @@ def do_plots(adata, mod, group_col, mf, n=10, layer=None):
8890
dendrogram=incl_dendrogram,
8991
# figsize=(24, 5)
9092
)
93+
L.info("Plotting matrix plot")
9194
sc.pl.matrixplot(adata,
9295
marker_list,
9396
groupby=group_col,
9497
save= '_top_markers'+ mod +'.png',
9598
dendrogram=incl_dendrogram,
9699
figsize=(24, 5))
100+
L.info("Plotting dotplot")
97101
sc.pl.dotplot(adata,
98102
marker_list,
99103
groupby=group_col,
100104
save= '_top_markers'+ mod +'.png',
101105
dendrogram=incl_dendrogram,
102106
figsize=(24, 5))
107+
L.info("Plotting heatmap")
103108
sc.pl.heatmap(adata,
104109
marker_list,
105110
groupby=group_col,
@@ -110,6 +115,7 @@ def do_plots(adata, mod, group_col, mf, n=10, layer=None):
110115

111116

112117
# read data
118+
L.info("Reading in MuData from '%s'" % args.infile)
113119
mdata = mu.read(args.infile)
114120

115121
if type(mdata) is AnnData:
@@ -118,10 +124,10 @@ def do_plots(adata, mod, group_col, mf, n=10, layer=None):
118124
elif type(mdata) is mu.MuData and args.modality is not None:
119125
adata = mdata[args.modality]
120126
else:
121-
sys.exit('if inputting a mudata object, you need to specify a modality')
122-
127+
L.error("If the input is a MuData object, a modality needs to be specified")
128+
sys.exit('If the input is a MuData object, a modality needs to be specified')
123129

124-
L.info("load marker file")
130+
L.info("Loading marker information from '%s'" % args.marker_file)
125131
mf = pd.read_csv(args.marker_file, sep='\t' )
126132
mf[args.group_col] = mf['cluster'].astype('category')
127133

panpipes/python_scripts/rerun_find_neighbors_for_clustering.py

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -43,41 +43,44 @@
4343

4444

4545
for mod in neighbor_dict.keys():
46-
if neighbor_dict[mod]['use_existing']:
47-
L.info('Using existing neighbors graph for %s' % mod)
48-
pass
49-
else:
50-
L.info("Computing new neighbors for %s" % mod)
51-
if type(mdata) is MuData:
52-
adata=mdata[mod]
53-
if (neighbor_dict[mod]['dim_red'] == "X_pca") and ("X_pca" not in adata.obsm.keys()):
54-
L.info("X_pca not found, computing it using default parameters")
55-
sc.tl.pca(adata)
56-
if (mod == "atac") and (neighbor_dict[mod]['dim_remove'] is not None):
57-
dimrem = int(neighbor_dict[mod]['dim_remove'])
58-
adata.obsm['X_pca'] = adata.obsm['X_pca'][:, dimrem:]
59-
adata.varm["PCs"] = adata.varm["PCs"][:, dimrem:]
60-
if mod == "atac":
61-
if (neighbor_dict[mod]['dim_red'] == "X_lsi") and ("X_lsi" not in adata.obsm.keys()):
62-
L.info("X_lsi not found, computing it using default parameters")
63-
lsi(adata=adata, num_components=50)
64-
if neighbor_dict[mod]['dim_remove'] is not None:
46+
if mod in mdata.mod.keys():
47+
if neighbor_dict[mod]['use_existing']:
48+
L.info('Using existing neighbors graph for %s' % mod)
49+
pass
50+
else:
51+
L.info("Computing new neighbors for modality %s on %s" % (mod, neighbor_dict[mod]['dim_red']))
52+
if type(mdata) is MuData:
53+
adata=mdata[mod]
54+
if (neighbor_dict[mod]['dim_red'] == "X_pca") and ("X_pca" not in adata.obsm.keys()):
55+
L.info("X_pca not found, computing it using default parameters")
56+
sc.tl.pca(adata)
57+
if (mod == "atac") and (neighbor_dict[mod]['dim_remove'] is not None):
6558
dimrem = int(neighbor_dict[mod]['dim_remove'])
66-
adata.obsm['X_lsi'] = adata.obsm['X_lsi'][:, dimrem:]
67-
adata.varm["LSI"] = adata.varm["LSI"][:, dimrem:]
68-
adata.uns["lsi"]["stdev"] = adata.uns["lsi"]["stdev"][dimrem:]
59+
adata.obsm['X_pca'] = adata.obsm['X_pca'][:, dimrem:]
60+
adata.varm["PCs"] = adata.varm["PCs"][:, dimrem:]
61+
if mod == "atac":
62+
if (neighbor_dict[mod]['dim_red'] == "X_lsi") and ("X_lsi" not in adata.obsm.keys()):
63+
L.info("X_lsi not found, computing it using default parameters")
64+
lsi(adata=adata, num_components=50)
65+
if neighbor_dict[mod]['dim_remove'] is not None:
66+
L.info("Removing dimension %s from X_lsi" % neighbor_dict[mod]['dim_remove'])
67+
dimrem = int(neighbor_dict[mod]['dim_remove'])
68+
adata.obsm['X_lsi'] = adata.obsm['X_lsi'][:, dimrem:]
69+
adata.varm["LSI"] = adata.varm["LSI"][:, dimrem:]
70+
adata.uns["lsi"]["stdev"] = adata.uns["lsi"]["stdev"][dimrem:]
6971

70-
# run command
71-
opts = dict(method=neighbor_dict[mod]['method'],
72-
n_neighbors=int(neighbor_dict[mod]['k']),
73-
n_pcs=int(neighbor_dict[mod]['n_dim_red']),
74-
metric=neighbor_dict[mod]['metric'],
75-
nthreads=args.n_threads,
76-
use_rep=neighbor_dict[mod]['dim_red'])
72+
# run command
73+
opts = dict(method=neighbor_dict[mod]['method'],
74+
n_neighbors=int(neighbor_dict[mod]['k']),
75+
n_pcs=int(neighbor_dict[mod]['n_dim_red']),
76+
metric=neighbor_dict[mod]['metric'],
77+
nthreads=args.n_threads,
78+
use_rep=neighbor_dict[mod]['dim_red'])
7779

7880

79-
run_neighbors_method_choice(adata,**opts)
80-
mdata.update()
81+
run_neighbors_method_choice(adata,**opts)
82+
mdata.mod[mod] = adata
83+
mdata.update()
8184

8285

8386

panpipes/python_scripts/run_clustering.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,17 @@
4646
# check sc.pp.neihgbours has been run
4747
if uns_key not in adata.uns.keys():
4848
# sys.exit("Error: sc.pp.neighbours has not been run on this object")
49-
L.warning("Running neighbors with default parameters since no neighbors graph found in this data object")
49+
L.warning("Running neighbors for modality %s with default parameters since no neighbors graph found in this data object" % args.modality)
5050
sc.pp.neighbors(adata)
5151
uns_key="neighbors"
5252

5353

5454
# run command
5555
if args.algorithm == "louvain":
56-
L.info("Running Louvain clustering")
56+
L.info("Running Louvain clustering for modality %s and resolution %s on %s", (args.modality, args.resolution, uns_key))
5757
sc.tl.louvain(adata, resolution=float(args.resolution), key_added='clusters', neighbors_key=uns_key)
5858
elif args.algorithm == "leiden":
59-
L.info("Running Leiden clustering")
59+
L.info("Running Leiden clustering for modality %s and resolution %s on %s", (args.modality, args.resolution, uns_key))
6060
sc.tl.leiden(adata, resolution=float(args.resolution), key_added='clusters', neighbors_key=uns_key)
6161
else:
6262
L.error("Could not find clustering algorithm '%s'. Please specify 'louvain' or 'leiden'" % args.algorithm)

0 commit comments

Comments
 (0)