1
1
from collections import ChainMap
2
2
from collections .abc import Sequence
3
- from typing import Any , Literal , Union
3
+ from typing import Any , Literal
4
4
5
5
import blitzgsea
6
6
import numpy as np
@@ -64,6 +64,7 @@ def score(
64
64
method : Literal ["mean" , "seurat" ] = "mean" ,
65
65
n_bins : int = 25 ,
66
66
ctrl_size : int = 50 ,
67
+ key_added : str = "pertpy_enrichment" ,
67
68
) -> None :
68
69
"""Obtain per-cell scoring of gene groups of interest.
69
70
@@ -88,6 +89,9 @@ def score(
88
89
layer: Specifies which `.layers` of AnnData to use for expression values. Defaults to `.X` if None.
89
90
n_bins: The number of expression bins for the `'seurat'` method.
90
91
ctrl_size: The number of genes to randomly sample from each expression bin for the `"seurat"` method.
92
+ key_added: Prefix key that adds the results to `uns`.
93
+ Note that the actual values are `key_added_score`, `key_added_variables`, `key_added_genes`, `key_added_all_genes`.
94
+ Defaults to `pertpy_enrichment`.
91
95
92
96
Returns:
93
97
An AnnData object with scores.
@@ -144,15 +148,15 @@ def score(
144
148
seurat = np .dot (control_profiles , drug_weights )
145
149
scores = scores - seurat
146
150
147
- adata .uns ["pertpy_enrichment_score " ] = scores
148
- adata .uns ["pertpy_enrichment_variables " ] = weights .columns
151
+ adata .uns [f" { key_added } _score " ] = scores
152
+ adata .uns [f" { key_added } _variables " ] = weights .columns
149
153
150
- adata .uns ["pertpy_enrichment_genes " ] = {"var" : pd .DataFrame (columns = ["genes" ]).astype (object )}
151
- adata .uns ["pertpy_enrichment_all_genes " ] = {"var" : pd .DataFrame (columns = ["all_genes" ]).astype (object )}
154
+ adata .uns [f" { key_added } _genes " ] = {"var" : pd .DataFrame (columns = ["genes" ]).astype (object )}
155
+ adata .uns [f" { key_added } _all_genes " ] = {"var" : pd .DataFrame (columns = ["all_genes" ]).astype (object )}
152
156
153
157
for drug in weights .columns :
154
- adata .uns ["pertpy_enrichment_genes " ]["var" ].loc [drug , "genes" ] = "|" .join (adata .var_names [targets [drug ]])
155
- adata .uns ["pertpy_enrichment_all_genes " ]["var" ].loc [drug , "all_genes" ] = "|" .join (full_targets [drug ])
158
+ adata .uns [f" { key_added } _genes " ]["var" ].loc [drug , "genes" ] = "|" .join (adata .var_names [targets [drug ]])
159
+ adata .uns [f" { key_added } _all_genes " ]["var" ].loc [drug , "all_genes" ] = "|" .join (full_targets [drug ])
156
160
157
161
def hypergeometric (
158
162
self ,
@@ -172,9 +176,11 @@ def hypergeometric(
172
176
If `None`, will use `d2c.score()` output if present, and if not present load the ChEMBL-derived drug target sets distributed with the package.
173
177
Accepts two forms:
174
178
- A dictionary with the names of the groups as keys, and the entries being the corresponding gene lists.
175
- - A dictionary of dictionaries defined like above, with names of gene group categories as keys. If passing one of those, specify `nested=True`.
179
+ - A dictionary of dictionaries defined like above, with names of gene group categories as keys.
180
+ If passing one of those, specify `nested=True`.
176
181
nested: Whether `targets` is a dictionary of dictionaries with group categories as keys.
177
- categories: If `targets=None` or `nested=True`, this argument can be used to subset the gene groups to one or more categories (keys of the original dictionary). In case of the ChEMBL drug targets, these are ATC level 1/level 2 category codes.
182
+ categories: If `targets=None` or `nested=True`, this argument can be used to subset the gene groups to one or more categories (keys of the original dictionary).
183
+ In case of the ChEMBL drug targets, these are ATC level 1/level 2 category codes.
178
184
pvals_adj_thresh: The `pvals_adj` cutoff to use on the `sc.tl.rank_genes_groups()` output to identify markers.
179
185
direction: Whether to seek out up/down-regulated genes for the groups, based on the values from `scores`.
180
186
Can be `up`, `down`, or `both` (for no selection).
@@ -235,6 +241,7 @@ def gsea(
235
241
nested : bool = False ,
236
242
categories : str | list [str ] | None = None ,
237
243
absolute : bool = False ,
244
+ key_added : str = "pertpy_enrichment_gsea" ,
238
245
) -> dict [str , pd .DataFrame ] | tuple [dict [str , pd .DataFrame ], dict [str , dict ]]: # pragma: no cover
239
246
"""Perform gene set enrichment analysis on the marker gene scores using blitzgsea.
240
247
@@ -251,6 +258,8 @@ def gsea(
251
258
applicable if `targets=None` or `nested=True`. Defaults to None.
252
259
absolute: If True, passes the absolute values of scores to GSEA, improving
253
260
statistical power. Defaults to False.
261
+ key_added: Prefix key that adds the results to `uns`.
262
+ Defaults to `pertpy_enrichment_gsea`.
254
263
255
264
Returns:
256
265
A dictionary with clusters as keys and data frames of test results sorted on
@@ -272,7 +281,7 @@ def gsea(
272
281
enrichment [cluster ] = blitzgsea .gsea (df , targets )
273
282
plot_gsea_args ["scores" ][cluster ] = df
274
283
275
- adata .uns ["pertpy_enrichment_gsea" ] = plot_gsea_args
284
+ adata .uns [key_added ] = plot_gsea_args
276
285
277
286
return enrichment
278
287
@@ -282,6 +291,7 @@ def plot_dotplot(
282
291
targets : dict [str , list [str ]] | dict [str , dict [str , list [str ]]] = None ,
283
292
categories : Sequence [str ] = None ,
284
293
groupby : str = None ,
294
+ key : str = "pertpy_enrichment" ,
285
295
** kwargs ,
286
296
) -> DotPlot | dict | None :
287
297
"""Plots a dotplot by groupby and categories.
@@ -298,6 +308,8 @@ def plot_dotplot(
298
308
categories: To subset the gene groups to specific categories, especially when `targets=None` or `nested=True`.
299
309
For ChEMBL drug targets, these are ATC level 1/level 2 category codes.
300
310
groupby: dotplot groupby such as clusters or cell types.
311
+ key: Prefix key of enrichment results in `uns`.
312
+ Defaults to `pertpy_enrichment`.
301
313
kwargs: Passed to scanpy dotplot.
302
314
303
315
Returns:
@@ -330,8 +342,8 @@ def plot_dotplot(
330
342
var_group_labels : list [str ] = []
331
343
start = 0
332
344
333
- enrichment_score_adata = AnnData (adata .uns ["pertpy_enrichment_score " ], obs = adata .obs )
334
- enrichment_score_adata .var_names = adata .uns ["pertpy_enrichment_variables " ]
345
+ enrichment_score_adata = AnnData (adata .uns [f" { key } _score " ], obs = adata .obs )
346
+ enrichment_score_adata .var_names = adata .uns [f" { key } _variables " ]
335
347
336
348
for group in targets :
337
349
targets [group ] = list ( # type: ignore
@@ -352,7 +364,9 @@ def plot_dotplot(
352
364
353
365
return sc .pl .dotplot (enrichment_score_adata , groupby = groupby , swap_axes = True , ** plot_args , ** kwargs )
354
366
355
- def plot_gsea (self , adata : AnnData , enrichment : dict [str , pd .DataFrame ], n : int = 10 ) -> None :
367
+ def plot_gsea (
368
+ self , adata : AnnData , enrichment : dict [str , pd .DataFrame ], n : int = 10 , key : str = "pertpy_enrichment_gsea"
369
+ ) -> None :
356
370
"""Generates a blitzgsea top_table plot.
357
371
358
372
This function is designed to visualize the results from a Gene Set Enrichment Analysis (GSEA).
@@ -363,11 +377,12 @@ def plot_gsea(self, adata: AnnData, enrichment: dict[str, pd.DataFrame], n: int
363
377
adata: AnnData object to plot.
364
378
enrichment: Cluster names as keys, blitzgsea's ``gsea()`` output as values.
365
379
n: How many top scores to show for each group. Defaults to 10.
380
+ key: GSEA results key in `uns`. Defaults to "pertpy_enrichment_gsea".
366
381
"""
367
382
for cluster in enrichment :
368
383
fig = blitzgsea .plot .top_table (
369
- adata .uns ["pertpy_enrichment_gsea" ]["scores" ][cluster ],
370
- adata .uns ["pertpy_enrichment_gsea" ]["targets" ],
384
+ adata .uns [key ]["scores" ][cluster ],
385
+ adata .uns [key ]["targets" ],
371
386
enrichment [cluster ],
372
387
n = n ,
373
388
)
0 commit comments