Skip to content

Commit

Permalink
add log info
Browse files Browse the repository at this point in the history
  • Loading branch information
SarahOuologuem committed Apr 16, 2024
1 parent 7b5ad47 commit e3de2cf
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 81 deletions.
7 changes: 3 additions & 4 deletions panpipes/R_scripts/plotclustree.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,14 @@ opt <- parse_args(OptionParser(option_list=option_list))
# 2. name columns
# 3. run clustree

message("Running with options:")

print(opt)

# # run clustree
m = readr::read_tsv(opt$infile)
# this is a little dodge, but works ;)
example_column=colnames(m)[2]
col_prefix=substr(example_column, 1, nchar(example_column)-3 )
# run clustree
print("Running Clustree")
gg <- clustree(m, prefix =col_prefix) + ggtitle(opt$plot_title)


Expand All @@ -42,6 +40,7 @@ if (!(dir.exists(dirname(opt$outfile)))){
}

# save
print("Saving Clustree")
ggsave(gg, filename=opt$outfile, height=10,width=12, type="cairo")

message("clustree done")
print("Done")
6 changes: 5 additions & 1 deletion panpipes/python_scripts/aggregate_csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
formatter = logging.Formatter('%(asctime)s: %(levelname)s - %(message)s')
log_handler.setFormatter(formatter)
L.addHandler(log_handler)
L.debug("test logging message")


# parse arguments
parser = argparse.ArgumentParser()
Expand All @@ -36,17 +36,20 @@

infiles = re.split(',', args.input_files_str)
if args.clusters_or_markers == "clusters":
L.info("Aggregating cluster columns")
combined_csv = pd.concat([pd.read_csv(f, sep='\t', index_col=0) for f in infiles], axis=1)
# get colnames
cnames = []
for f in infiles:
alg = extract_parameter_from_fname(f, 'alg', prefix=args.sample_prefix)
res = extract_parameter_from_fname(f, 'res', prefix=args.sample_prefix)
cnames.append(alg + '_res_' + str(res))
L.info("Saving combined cluster columns to tsv file '%s'" % args.output_file)
combined_csv.to_csv(args.output_file, sep='\t', header=cnames, index=True)


if args.clusters_or_markers == "markers":
L.info("Aggregating marker files")
li = []
all_markers_file = re.sub("_top", "_all", args.output_file)
excel_file = re.sub("_top.txt.gz", "_all.xlsx", args.output_file)
Expand All @@ -69,6 +72,7 @@
frame.to_csv(all_markers_file, sep='\t', header=True, index=False)
frame_sub = frame[frame['p.adj.bonferroni'] < 0.05]
frame_sub = frame_sub[frame_sub['avg_logFC'] > 0]
L.info("Saving combined marker files to tsv file '%s'" % args.output_file)
frame_sub.to_csv(args.output_file, sep='\t', header=True, index=False)


Expand Down
25 changes: 17 additions & 8 deletions panpipes/python_scripts/collate_mdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@
default="mdata.h5mu",
help="file name, format: .h5mu")
args, opt = parser.parse_known_args()

L.info(args)

L.info("Reading in MuData from '%s'" % args.input_mudata)
mdata = mu.read(args.input_mudata)

L.info("loading clusters")
L.info("Reading in cluster information")
cf = pd.read_csv(args.clusters_files_csv)

if isinstance(mdata, MuData):
Expand All @@ -44,7 +47,7 @@
if len(mds)>1:
sys.exit("You have clustered multiple modalities but are providing only a unimodal anndata")
else:
L.warn("found one modality, converting to mudata: %s " % mds[0] )
L.warn("Found one modality, converting to mudata: %s " % mds[0] )
tmp = MuData({mds[0]:mdata})
del mdata
mdata = tmp
Expand All @@ -53,7 +56,7 @@
# add in the clusters



L.info("Adding cluster information to MuData")
for i in range(cf.shape[0]):
cf_df = pd.read_csv(cf['fpath'][i], sep='\t', index_col=0)
cf_df['clusters'] = cf_df['clusters'].astype('str').astype('category')
Expand All @@ -64,8 +67,9 @@
else:
mdata.obs = mdata.obs.merge(cf_df, left_index=True, right_index=True)

uf = pd.read_csv(args.umap_files_csv)

L.info("Adding UMAP coordinates to MuData")
uf = pd.read_csv(args.umap_files_csv)

for i in range(uf.shape[0]):
uf_df = pd.read_csv(uf['fpath'][i], sep='\t', index_col=0)
Expand All @@ -75,17 +79,22 @@
if all(mdata[mod].obs_names == uf_df.index):
mdata[mod].obsm[new_key] = uf_df.to_numpy()
else:
L.warn("cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] )
L.warn("Cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] )
else:
# check the observations are the same
if set(mdata.obs_names).difference(uf_df.index) == set():
# put the observations in the same order
uf_df = uf_df.loc[mdata.obs_names,:]
mdata.obsm[new_key] = uf_df.to_numpy()
else:
L.warning("cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] )
L.warning("Cannot integrate %s into mdata as obs_names mismatch" % uf.iloc[i,:] )


L.info("Saving updated MuData to '%s'" % args.output_mudata)
mdata.write(args.output_mudata)
mdata.obs.to_csv(re.sub(".h5mu", "_cell_metdata.tsv", args.output_mudata), sep='\t')

L.info("done")
output_csv = re.sub(".h5mu", "_cell_metdata.tsv", args.output_mudata)
L.info("Saving metadata to '%s'" % output_csv)
mdata.obs.to_csv(output_csv, sep='\t')

L.info("Done")
25 changes: 14 additions & 11 deletions panpipes/python_scripts/plot_cluster_umaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
help="list of modalities to search for UMAPs in")
args, opt = parser.parse_known_args()

L.info("running with:")
L.info(args)
# args = argparse.Namespace(infile='mdata_clustered.h5mu', figdir=None)
# ---------
Expand All @@ -42,11 +41,11 @@ def main(adata,figdir):
# get all possible umap coords
pattern="X_umap(.*)"
obsm_keys = [x for x in adata.obsm.keys() if re.search(pattern, x)]
L.info("Umap keys founds %s" % obsm_keys)
# get all possible clustersclusters
L.info("UMAP keys found: %s" % obsm_keys)
# get all possible clusters
pattern=re.compile(r'^leiden|^louvain')
cluster_keys = [x for x in adata.obs.columns if re.search(pattern, x)]
L.info("Cluster keys founds %s" % cluster_keys)
L.info("Cluster keys found: %s" % cluster_keys)
if len(obsm_keys) == 0 or len(cluster_keys) == 0:
return

Expand All @@ -55,22 +54,24 @@ def main(adata,figdir):
adata.obs[ck] = adata.obs[ck].astype('category')
# plot all the umaps
for ok in obsm_keys:
L.info("Plotting UMAP on %s coloured by %s" % (ok, cluster_keys))
fig = sc.pl.embedding(adata, basis = ok,color=cluster_keys,
show=False, return_fig=True, legend_loc='on data')
for ax in fig.axes:
ax.set(xlabel="UMAP_1", ylabel="UMAP_2")
fig.suptitle(ok, y=1.0)
L.info("Saving figure to '%s'" % os.path.join(figdir, ok + "_clusters.png"))
fig.savefig(os.path.join(figdir, ok + "_clusters.png"))

def plot_spatial(adata,figdir):
# get all possible umap coords
pattern="spatial(.*)"
obsm_keys = [x for x in adata.obsm.keys() if re.search(pattern, x)]
L.info("Umap keys founds %s" % obsm_keys)
L.info("UMAP keys found: %s" % obsm_keys)
# get all possible clustersclusters
pattern=re.compile(r'^leiden|^louvain')
cluster_keys = [x for x in adata.obs.columns if re.search(pattern, x)]
L.info("Cluster keys founds %s" % cluster_keys)
L.info("Cluster keys found: %s" % cluster_keys)
if len(obsm_keys) == 0 or len(cluster_keys) == 0:
return

Expand All @@ -79,15 +80,18 @@ def plot_spatial(adata,figdir):
adata.obs[ck] = adata.obs[ck].astype('category')
# plot all the umaps
for ok in obsm_keys:
L.info("Plotting UMAP on %s coloured by %s" % (ok, cluster_keys))
fig = sc.pl.embedding(adata, basis = ok,color=cluster_keys,
show=False, return_fig=True, legend_loc='on data')
for ax in fig.axes:
ax.set(xlabel="spatial1", ylabel="spatial2")
fig.suptitle(ok, y=1.0)
L.info("Saving figure to '%s'" % os.path.join(figdir, ok + "_clusters.png"))
fig.savefig(os.path.join(figdir, ok + "_clusters.png"))


L.debug("load data")

L.info("Reading in MuData from '%s'" % args.infile)
mdata = read(args.infile)

mods = args.modalities.split(',')
Expand All @@ -97,14 +101,15 @@ def plot_spatial(adata,figdir):
if 'multimodal' in mods:
if os.path.exists("multimodal/figures") is False:
os.makedirs("multimodal/figures")
L.info("Plotting multimodal figures")
main(mdata, figdir="multimodal/figures")


# we also need to plot per modality
if type(mdata) is MuData:
for mod in mdata.mod.keys():
if mod in mods:
L.info("plotting for modality: %s" % mod)
L.info("Plotting for modality: %s" % mod)
figdir = os.path.join(mod, "figures")
if os.path.exists(figdir) is False:
os.makedirs(figdir)
Expand All @@ -115,6 +120,4 @@ def plot_spatial(adata,figdir):





L.info('done')
L.info('Done')
16 changes: 11 additions & 5 deletions panpipes/python_scripts/plot_scanpy_markers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,17 @@ def calc_dendrogram(adata, group_col):


def do_plots(adata, mod, group_col, mf, n=10, layer=None):
L.debug("check layers")
# get markers for plotting
L.info("Subsetting on markers with avg logFC > 0")
mf = mf[mf['avg_logFC'] > 0]
L.info("Extracting top markers for each cluster")
mf['scores'] = pd.to_numeric(mf["scores"])
df = mf.groupby(group_col).apply(lambda x: x.nlargest(n, ['scores'])).reset_index(drop=True)
marker_list={str(k): list(v) for k,v in df.groupby(group_col)["gene"]}
# add cluseter col to obs
# check whether a dendrogram is computed/
incl_dendrogram = calc_dendrogram(adata, group_col)
L.info("start plotting")
L.info("Plotting stacked violin")
sc.pl.stacked_violin(adata,
marker_list,
groupby=group_col,
Expand All @@ -88,18 +90,21 @@ def do_plots(adata, mod, group_col, mf, n=10, layer=None):
dendrogram=incl_dendrogram,
# figsize=(24, 5)
)
L.info("Plotting matrix plot")
sc.pl.matrixplot(adata,
marker_list,
groupby=group_col,
save= '_top_markers'+ mod +'.png',
dendrogram=incl_dendrogram,
figsize=(24, 5))
L.info("Plotting dotplot")
sc.pl.dotplot(adata,
marker_list,
groupby=group_col,
save= '_top_markers'+ mod +'.png',
dendrogram=incl_dendrogram,
figsize=(24, 5))
L.info("Plotting heatmap")
sc.pl.heatmap(adata,
marker_list,
groupby=group_col,
Expand All @@ -110,6 +115,7 @@ def do_plots(adata, mod, group_col, mf, n=10, layer=None):


# read data
L.info("Reading in MuData from '%s'" % args.infile)
mdata = mu.read(args.infile)

if type(mdata) is AnnData:
Expand All @@ -118,10 +124,10 @@ def do_plots(adata, mod, group_col, mf, n=10, layer=None):
elif type(mdata) is mu.MuData and args.modality is not None:
adata = mdata[args.modality]
else:
sys.exit('if inputting a mudata object, you need to specify a modality')

L.error("If the input is a MuData object, a modality needs to be specified")
sys.exit('If the input is a MuData object, a modality needs to be specified')

L.info("load marker file")
L.info("Loading marker information from '%s'" % args.marker_file)
mf = pd.read_csv(args.marker_file, sep='\t' )
mf[args.group_col] = mf['cluster'].astype('category')

Expand Down
65 changes: 34 additions & 31 deletions panpipes/python_scripts/rerun_find_neighbors_for_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,41 +43,44 @@


for mod in neighbor_dict.keys():
if neighbor_dict[mod]['use_existing']:
L.info('Using existing neighbors graph for %s' % mod)
pass
else:
L.info("Computing new neighbors for %s" % mod)
if type(mdata) is MuData:
adata=mdata[mod]
if (neighbor_dict[mod]['dim_red'] == "X_pca") and ("X_pca" not in adata.obsm.keys()):
L.info("X_pca not found, computing it using default parameters")
sc.tl.pca(adata)
if (mod == "atac") and (neighbor_dict[mod]['dim_remove'] is not None):
dimrem = int(neighbor_dict[mod]['dim_remove'])
adata.obsm['X_pca'] = adata.obsm['X_pca'][:, dimrem:]
adata.varm["PCs"] = adata.varm["PCs"][:, dimrem:]
if mod == "atac":
if (neighbor_dict[mod]['dim_red'] == "X_lsi") and ("X_lsi" not in adata.obsm.keys()):
L.info("X_lsi not found, computing it using default parameters")
lsi(adata=adata, num_components=50)
if neighbor_dict[mod]['dim_remove'] is not None:
if mod in mdata.mod.keys():
if neighbor_dict[mod]['use_existing']:
L.info('Using existing neighbors graph for %s' % mod)
pass
else:
L.info("Computing new neighbors for modality %s on %s" % (mod, neighbor_dict[mod]['dim_red']))
if type(mdata) is MuData:
adata=mdata[mod]
if (neighbor_dict[mod]['dim_red'] == "X_pca") and ("X_pca" not in adata.obsm.keys()):
L.info("X_pca not found, computing it using default parameters")
sc.tl.pca(adata)
if (mod == "atac") and (neighbor_dict[mod]['dim_remove'] is not None):
dimrem = int(neighbor_dict[mod]['dim_remove'])
adata.obsm['X_lsi'] = adata.obsm['X_lsi'][:, dimrem:]
adata.varm["LSI"] = adata.varm["LSI"][:, dimrem:]
adata.uns["lsi"]["stdev"] = adata.uns["lsi"]["stdev"][dimrem:]
adata.obsm['X_pca'] = adata.obsm['X_pca'][:, dimrem:]
adata.varm["PCs"] = adata.varm["PCs"][:, dimrem:]
if mod == "atac":
if (neighbor_dict[mod]['dim_red'] == "X_lsi") and ("X_lsi" not in adata.obsm.keys()):
L.info("X_lsi not found, computing it using default parameters")
lsi(adata=adata, num_components=50)
if neighbor_dict[mod]['dim_remove'] is not None:
L.info("Removing dimension %s from X_lsi" % neighbor_dict[mod]['dim_remove'])
dimrem = int(neighbor_dict[mod]['dim_remove'])
adata.obsm['X_lsi'] = adata.obsm['X_lsi'][:, dimrem:]
adata.varm["LSI"] = adata.varm["LSI"][:, dimrem:]
adata.uns["lsi"]["stdev"] = adata.uns["lsi"]["stdev"][dimrem:]

# run command
opts = dict(method=neighbor_dict[mod]['method'],
n_neighbors=int(neighbor_dict[mod]['k']),
n_pcs=int(neighbor_dict[mod]['n_dim_red']),
metric=neighbor_dict[mod]['metric'],
nthreads=args.n_threads,
use_rep=neighbor_dict[mod]['dim_red'])
# run command
opts = dict(method=neighbor_dict[mod]['method'],
n_neighbors=int(neighbor_dict[mod]['k']),
n_pcs=int(neighbor_dict[mod]['n_dim_red']),
metric=neighbor_dict[mod]['metric'],
nthreads=args.n_threads,
use_rep=neighbor_dict[mod]['dim_red'])


run_neighbors_method_choice(adata,**opts)
mdata.update()
run_neighbors_method_choice(adata,**opts)
mdata.mod[mod] = adata
mdata.update()



Expand Down
6 changes: 3 additions & 3 deletions panpipes/python_scripts/run_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@
# check sc.pp.neihgbours has been run
if uns_key not in adata.uns.keys():
# sys.exit("Error: sc.pp.neighbours has not been run on this object")
L.warning("Running neighbors with default parameters since no neighbors graph found in this data object")
L.warning("Running neighbors for modality %s with default parameters since no neighbors graph found in this data object" % args.modality)
sc.pp.neighbors(adata)
uns_key="neighbors"


# run command
if args.algorithm == "louvain":
L.info("Running Louvain clustering")
L.info("Running Louvain clustering for modality %s and resolution %s on %s", (args.modality, args.resolution, uns_key))
sc.tl.louvain(adata, resolution=float(args.resolution), key_added='clusters', neighbors_key=uns_key)
elif args.algorithm == "leiden":
L.info("Running Leiden clustering")
L.info("Running Leiden clustering for modality %s and resolution %s on %s", (args.modality, args.resolution, uns_key))
sc.tl.leiden(adata, resolution=float(args.resolution), key_added='clusters', neighbors_key=uns_key)
else:
L.error("Could not find clustering algorithm '%s'. Please specify 'louvain' or 'leiden'" % args.algorithm)
Expand Down
Loading

0 comments on commit e3de2cf

Please sign in to comment.