@@ -39,6 +39,7 @@ def __init__(self):
3939 self .proteomics = None
4040 self .drug_response_gdsc1 = None
4141 self .drug_response_gdsc2 = None
42+ self .drug_response_prism = None
4243
4344 def _download_cell_line (self , cell_line_source : Literal ["DepMap" , "Cancerrxgene" ] = "DepMap" ) -> None :
4445 if cell_line_source == "DepMap" :
@@ -157,7 +158,7 @@ def _download_proteomics(self) -> None:
157158 def _download_gdsc (self , gdsc_dataset : Literal [1 , 2 ] = 1 ) -> None :
158159 if gdsc_dataset == 1 :
159160 # Download GDSC drug response data
160- # Source: https://www.cancerrxgene.org/downloads/bulk_download (Drug Screening - IC50s)
161+ # Source: https://www.cancerrxgene.org/downloads/bulk_download (Drug Screening - IC50s and AUC )
161162 # URL: https://cog.sanger.ac.uk/cancerrxgene/GDSC_release8.4/GDSC1_fitted_dose_response_24Jul22.xlsx
162163 drug_response_gdsc1_file_path = Path (settings .cachedir ) / "gdsc1_info.csv"
163164 if not Path (drug_response_gdsc1_file_path ).exists ():
@@ -181,6 +182,23 @@ def _download_gdsc(self, gdsc_dataset: Literal[1, 2] = 1) -> None:
181182 )
182183 self .drug_response_gdsc2 = pd .read_csv (drug_response_gdsc2_file_path , index_col = 0 )
183184
185+ def _download_prism (self ) -> None :
186+ # Download PRISM drug response data
187+ # Source: DepMap PRISM Repurposing 19Q4 secondary screen dose response curve parameters
188+ drug_response_prism_file_path = Path (settings .cachedir ) / "prism_info.csv"
189+ if not Path (drug_response_prism_file_path ).exists ():
190+ _download (
191+ url = "https://figshare.com/ndownloader/files/20237739" ,
192+ output_file_name = "prism_info.csv" ,
193+ output_path = settings .cachedir ,
194+ block_size = 4096 ,
195+ is_zip = False ,
196+ )
197+ df = pd .read_csv (drug_response_prism_file_path , index_col = 0 )[["depmap_id" , "name" , "ic50" , "ec50" , "auc" ]]
198+ df = df .dropna (subset = ["depmap_id" , "name" ])
199+ df = df .groupby (["depmap_id" , "name" ]).mean ().reset_index ()
200+ self .drug_response_prism = df
201+
184202 def annotate (
185203 self ,
186204 adata : AnnData ,
@@ -197,13 +215,13 @@ def annotate(
197215
198216 Args:
199217 adata: The data object to annotate.
200- query_id: The column of `.obs` with cell line information.
218+ query_id: The column of `` .obs` ` with cell line information.
201219 reference_id: The type of cell line identifier in the metadata, e.g. ModelID, CellLineName or StrippedCellLineName.
202220 If fetching cell line metadata from Cancerrxgene, it is recommended to choose "stripped_cell_line_name".
203221 fetch: The metadata to fetch.
204222 cell_line_source: The source of cell line metadata, DepMap or Cancerrxgene.
205223 verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
206- copy: Determines whether a copy of the ` adata` is returned.
224+ copy: Determines whether a copy of `` adata` ` is returned.
207225
208226 Returns:
209227 Returns an AnnData object with cell line annotation.
@@ -316,7 +334,8 @@ def annotate_bulk_rna(
316334
317335 Args:
318336 adata: The data object to annotate.
319- query_id: The column of `.obs` with cell line information. Defaults to "cell_line_name" if `cell_line_source` is sanger, otherwise "DepMap_ID".
337+ query_id: The column of `.obs` with cell line information.
338+ Defaults to "cell_line_name" if `cell_line_source` is sanger, otherwise "DepMap_ID".
320339 cell_line_source: The bulk rna expression data from either broad or sanger cell line.
321340 verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
322341 copy: Determines whether a copy of the `adata` is returned.
@@ -491,7 +510,7 @@ def annotate_from_gdsc(
491510 ) -> AnnData :
492511 """Fetch drug response data from GDSC.
493512
494- For each cell, we fetch drug response data as natural log of the fitted IC50 for its
513+ For each cell, we fetch drug response data as natural log of the fitted IC50 and AUC for its
495514 corresponding cell line and perturbation from GDSC fitted data results file.
496515
497516 Args:
@@ -554,9 +573,82 @@ def annotate_from_gdsc(
554573 adata .obs = (
555574 adata .obs .reset_index ()
556575 .set_index ([query_id , query_perturbation ])
557- .assign (ln_ic50 = gdsc_data .set_index ([reference_id , reference_perturbation ]).ln_ic50 )
576+ .assign (ln_ic50_gdsc = gdsc_data .set_index ([reference_id , reference_perturbation ]).ln_ic50 )
577+ .assign (auc_gdsc = gdsc_data .set_index ([reference_id , reference_perturbation ]).auc )
578+ .reset_index ()
579+ .set_index (old_index_name )
580+ )
581+
582+ return adata
583+
584+ def annotate_from_prism (
585+ self ,
586+ adata : AnnData ,
587+ query_id : str = "DepMap_ID" ,
588+ query_perturbation : str = "perturbation" ,
589+ verbosity : int | str = 5 ,
590+ copy : bool = False ,
591+ ) -> AnnData :
592+ """Fetch drug response data from PRISM.
593+
594+ For each cell, we fetch drug response data as IC50, EC50 and AUC for its
595+ corresponding cell line and perturbation from PRISM fitted data results file.
596+ Note that all rows where either `depmap_id` or `name` is missing will be dropped.
597+
598+ Args:
599+ adata: The data object to annotate.
600+ query_id: The column of `.obs` with cell line information.
601+ query_perturbation: The column of `.obs` with perturbation information.
602+ verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
603+ copy: Determines whether a copy of the `adata` is returned.
604+
605+ Returns:
606+ Returns an AnnData object with drug response annotation.
607+
608+ Examples:
609+ >>> import pertpy as pt
610+ >>> adata = pt.dt.mcfarland_2020()
611+ >>> pt_metadata = pt.md.CellLine()
612+ >>> pt_metadata.annotate_from_prism(adata, query_id="DepMap_ID")
613+ """
614+ if copy :
615+ adata = adata .copy ()
616+ if query_id not in adata .obs .columns :
617+ raise ValueError (
618+ f"The specified `query_id` { query_id } can't be found in the `adata.obs`. \n "
619+ "Ensure that you are using one of the available query IDs present in 'adata.obs' for the annotation.\n "
620+ "If the desired query ID is not available, you can fetch the cell line metadata "
621+ "using the `annotate()` function before calling `annotate_from_prism()`. "
622+ "This ensures that the required query ID is included in your data."
623+ )
624+ if self .drug_response_prism is None :
625+ self ._download_prism ()
626+ prism_data = self .drug_response_prism
627+ # PRISM starts most drug names with a lowercase letter, so we want to make it case-insensitive
628+ prism_data ["name_lower" ] = prism_data ["name" ].str .lower ()
629+ adata .obs ["perturbation_lower" ] = adata .obs [query_perturbation ].str .lower ()
630+
631+ identifier_num_all = len (adata .obs [query_id ].unique ())
632+ not_matched_identifiers = list (set (adata .obs [query_id ]) - set (prism_data ["depmap_id" ]))
633+ self ._warn_unmatch (
634+ total_identifiers = identifier_num_all ,
635+ unmatched_identifiers = not_matched_identifiers ,
636+ query_id = query_id ,
637+ reference_id = "depmap_id" ,
638+ metadata_type = "drug response" ,
639+ verbosity = verbosity ,
640+ )
641+
642+ old_index_name = "index" if adata .obs .index .name is None else adata .obs .index .name
643+ adata .obs = (
644+ adata .obs .reset_index ()
645+ .set_index ([query_id , "perturbation_lower" ])
646+ .assign (ic50_prism = prism_data .set_index (["depmap_id" , "name" ]).ic50 )
647+ .assign (ec50_prism = prism_data .set_index (["depmap_id" , "name" ]).ec50 )
648+ .assign (auc_prism = prism_data .set_index (["depmap_id" , "name" ]).auc )
558649 .reset_index ()
559650 .set_index (old_index_name )
651+ .drop (columns = "perturbation_lower" )
560652 )
561653
562654 return adata
@@ -577,7 +669,7 @@ def lookup(self) -> LookUp:
577669 >>> pt_metadata = pt.md.CellLine()
578670 >>> lookup = pt_metadata.lookup()
579671 """
580- # Fetch the metadata if it hasn't beed downloaded yet
672+ # Fetch the metadata if it hasn't been downloaded yet
581673 if self .depmap is None :
582674 self ._download_cell_line (cell_line_source = "DepMap" )
583675 if self .cancerxgene is None :
@@ -594,6 +686,8 @@ def lookup(self) -> LookUp:
594686 self ._download_gdsc (gdsc_dataset = 1 )
595687 if self .drug_response_gdsc2 is None :
596688 self ._download_gdsc (gdsc_dataset = 2 )
689+ if self .drug_response_prism is None :
690+ self ._download_prism ()
597691
598692 # Transfer the data
599693 return LookUp (
@@ -607,6 +701,7 @@ def lookup(self) -> LookUp:
607701 self .proteomics ,
608702 self .drug_response_gdsc1 ,
609703 self .drug_response_gdsc2 ,
704+ self .drug_response_prism ,
610705 ],
611706 )
612707
0 commit comments