@@ -39,6 +39,7 @@ def __init__(self):
39
39
self .proteomics = None
40
40
self .drug_response_gdsc1 = None
41
41
self .drug_response_gdsc2 = None
42
+ self .drug_response_prism = None
42
43
43
44
def _download_cell_line (self , cell_line_source : Literal ["DepMap" , "Cancerrxgene" ] = "DepMap" ) -> None :
44
45
if cell_line_source == "DepMap" :
@@ -157,7 +158,7 @@ def _download_proteomics(self) -> None:
157
158
def _download_gdsc (self , gdsc_dataset : Literal [1 , 2 ] = 1 ) -> None :
158
159
if gdsc_dataset == 1 :
159
160
# Download GDSC drug response data
160
- # Source: https://www.cancerrxgene.org/downloads/bulk_download (Drug Screening - IC50s)
161
+ # Source: https://www.cancerrxgene.org/downloads/bulk_download (Drug Screening - IC50s and AUC )
161
162
# URL: https://cog.sanger.ac.uk/cancerrxgene/GDSC_release8.4/GDSC1_fitted_dose_response_24Jul22.xlsx
162
163
drug_response_gdsc1_file_path = Path (settings .cachedir ) / "gdsc1_info.csv"
163
164
if not Path (drug_response_gdsc1_file_path ).exists ():
@@ -181,6 +182,23 @@ def _download_gdsc(self, gdsc_dataset: Literal[1, 2] = 1) -> None:
181
182
)
182
183
self .drug_response_gdsc2 = pd .read_csv (drug_response_gdsc2_file_path , index_col = 0 )
183
184
185
+ def _download_prism (self ) -> None :
186
+ # Download PRISM drug response data
187
+ # Source: DepMap PRISM Repurposing 19Q4 secondary screen dose response curve parameters
188
+ drug_response_prism_file_path = Path (settings .cachedir ) / "prism_info.csv"
189
+ if not Path (drug_response_prism_file_path ).exists ():
190
+ _download (
191
+ url = "https://figshare.com/ndownloader/files/20237739" ,
192
+ output_file_name = "prism_info.csv" ,
193
+ output_path = settings .cachedir ,
194
+ block_size = 4096 ,
195
+ is_zip = False ,
196
+ )
197
+ df = pd .read_csv (drug_response_prism_file_path , index_col = 0 )[["depmap_id" , "name" , "ic50" , "ec50" , "auc" ]]
198
+ df = df .dropna (subset = ["depmap_id" , "name" ])
199
+ df = df .groupby (["depmap_id" , "name" ]).mean ().reset_index ()
200
+ self .drug_response_prism = df
201
+
184
202
def annotate (
185
203
self ,
186
204
adata : AnnData ,
@@ -197,13 +215,13 @@ def annotate(
197
215
198
216
Args:
199
217
adata: The data object to annotate.
200
- query_id: The column of `.obs` with cell line information.
218
+ query_id: The column of `` .obs` ` with cell line information.
201
219
reference_id: The type of cell line identifier in the metadata, e.g. ModelID, CellLineName or StrippedCellLineName.
202
220
If fetching cell line metadata from Cancerrxgene, it is recommended to choose "stripped_cell_line_name".
203
221
fetch: The metadata to fetch.
204
222
cell_line_source: The source of cell line metadata, DepMap or Cancerrxgene.
205
223
verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
206
- copy: Determines whether a copy of the ` adata` is returned.
224
+ copy: Determines whether a copy of `` adata` ` is returned.
207
225
208
226
Returns:
209
227
Returns an AnnData object with cell line annotation.
@@ -316,7 +334,8 @@ def annotate_bulk_rna(
316
334
317
335
Args:
318
336
adata: The data object to annotate.
319
- query_id: The column of `.obs` with cell line information. Defaults to "cell_line_name" if `cell_line_source` is sanger, otherwise "DepMap_ID".
337
+ query_id: The column of `.obs` with cell line information.
338
+ Defaults to "cell_line_name" if `cell_line_source` is sanger, otherwise "DepMap_ID".
320
339
cell_line_source: The bulk rna expression data from either broad or sanger cell line.
321
340
verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
322
341
copy: Determines whether a copy of the `adata` is returned.
@@ -491,7 +510,7 @@ def annotate_from_gdsc(
491
510
) -> AnnData :
492
511
"""Fetch drug response data from GDSC.
493
512
494
- For each cell, we fetch drug response data as natural log of the fitted IC50 for its
513
+ For each cell, we fetch drug response data as natural log of the fitted IC50 and AUC for its
495
514
corresponding cell line and perturbation from GDSC fitted data results file.
496
515
497
516
Args:
@@ -554,9 +573,82 @@ def annotate_from_gdsc(
554
573
adata .obs = (
555
574
adata .obs .reset_index ()
556
575
.set_index ([query_id , query_perturbation ])
557
- .assign (ln_ic50 = gdsc_data .set_index ([reference_id , reference_perturbation ]).ln_ic50 )
576
+ .assign (ln_ic50_gdsc = gdsc_data .set_index ([reference_id , reference_perturbation ]).ln_ic50 )
577
+ .assign (auc_gdsc = gdsc_data .set_index ([reference_id , reference_perturbation ]).auc )
578
+ .reset_index ()
579
+ .set_index (old_index_name )
580
+ )
581
+
582
+ return adata
583
+
584
+ def annotate_from_prism (
585
+ self ,
586
+ adata : AnnData ,
587
+ query_id : str = "DepMap_ID" ,
588
+ query_perturbation : str = "perturbation" ,
589
+ verbosity : int | str = 5 ,
590
+ copy : bool = False ,
591
+ ) -> AnnData :
592
+ """Fetch drug response data from PRISM.
593
+
594
+ For each cell, we fetch drug response data as IC50, EC50 and AUC for its
595
+ corresponding cell line and perturbation from PRISM fitted data results file.
596
+ Note that all rows where either `depmap_id` or `name` is missing will be dropped.
597
+
598
+ Args:
599
+ adata: The data object to annotate.
600
+ query_id: The column of `.obs` with cell line information.
601
+ query_perturbation: The column of `.obs` with perturbation information.
602
+ verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
603
+ copy: Determines whether a copy of the `adata` is returned.
604
+
605
+ Returns:
606
+ Returns an AnnData object with drug response annotation.
607
+
608
+ Examples:
609
+ >>> import pertpy as pt
610
+ >>> adata = pt.dt.mcfarland_2020()
611
+ >>> pt_metadata = pt.md.CellLine()
612
+ >>> pt_metadata.annotate_from_prism(adata, query_id="DepMap_ID")
613
+ """
614
+ if copy :
615
+ adata = adata .copy ()
616
+ if query_id not in adata .obs .columns :
617
+ raise ValueError (
618
+ f"The specified `query_id` { query_id } can't be found in the `adata.obs`. \n "
619
+ "Ensure that you are using one of the available query IDs present in 'adata.obs' for the annotation.\n "
620
+ "If the desired query ID is not available, you can fetch the cell line metadata "
621
+ "using the `annotate()` function before calling `annotate_from_prism()`. "
622
+ "This ensures that the required query ID is included in your data."
623
+ )
624
+ if self .drug_response_prism is None :
625
+ self ._download_prism ()
626
+ prism_data = self .drug_response_prism
627
+ # PRISM starts most drug names with a lowercase letter, so we want to make it case-insensitive
628
+ prism_data ["name_lower" ] = prism_data ["name" ].str .lower ()
629
+ adata .obs ["perturbation_lower" ] = adata .obs [query_perturbation ].str .lower ()
630
+
631
+ identifier_num_all = len (adata .obs [query_id ].unique ())
632
+ not_matched_identifiers = list (set (adata .obs [query_id ]) - set (prism_data ["depmap_id" ]))
633
+ self ._warn_unmatch (
634
+ total_identifiers = identifier_num_all ,
635
+ unmatched_identifiers = not_matched_identifiers ,
636
+ query_id = query_id ,
637
+ reference_id = "depmap_id" ,
638
+ metadata_type = "drug response" ,
639
+ verbosity = verbosity ,
640
+ )
641
+
642
+ old_index_name = "index" if adata .obs .index .name is None else adata .obs .index .name
643
+ adata .obs = (
644
+ adata .obs .reset_index ()
645
+ .set_index ([query_id , "perturbation_lower" ])
646
+ .assign (ic50_prism = prism_data .set_index (["depmap_id" , "name" ]).ic50 )
647
+ .assign (ec50_prism = prism_data .set_index (["depmap_id" , "name" ]).ec50 )
648
+ .assign (auc_prism = prism_data .set_index (["depmap_id" , "name" ]).auc )
558
649
.reset_index ()
559
650
.set_index (old_index_name )
651
+ .drop (columns = "perturbation_lower" )
560
652
)
561
653
562
654
return adata
@@ -577,7 +669,7 @@ def lookup(self) -> LookUp:
577
669
>>> pt_metadata = pt.md.CellLine()
578
670
>>> lookup = pt_metadata.lookup()
579
671
"""
580
- # Fetch the metadata if it hasn't beed downloaded yet
672
+ # Fetch the metadata if it hasn't been downloaded yet
581
673
if self .depmap is None :
582
674
self ._download_cell_line (cell_line_source = "DepMap" )
583
675
if self .cancerxgene is None :
@@ -594,6 +686,8 @@ def lookup(self) -> LookUp:
594
686
self ._download_gdsc (gdsc_dataset = 1 )
595
687
if self .drug_response_gdsc2 is None :
596
688
self ._download_gdsc (gdsc_dataset = 2 )
689
+ if self .drug_response_prism is None :
690
+ self ._download_prism ()
597
691
598
692
# Transfer the data
599
693
return LookUp (
@@ -607,6 +701,7 @@ def lookup(self) -> LookUp:
607
701
self .proteomics ,
608
702
self .drug_response_gdsc1 ,
609
703
self .drug_response_gdsc2 ,
704
+ self .drug_response_prism ,
610
705
],
611
706
)
612
707
0 commit comments