diff --git a/pertpy/metadata/_cell_line.py b/pertpy/metadata/_cell_line.py
index ab0e39eb..742eeac1 100644
--- a/pertpy/metadata/_cell_line.py
+++ b/pertpy/metadata/_cell_line.py
@@ -30,12 +30,12 @@ def __init__(self):
 
         # Download cell line metadata from DepMap
         # Source: https://depmap.org/portal/download/all/ (DepMap Public 22Q2)
-        depmap_cell_line_path = Path(settings.cachedir) / "sample_info.csv"
+        depmap_cell_line_path = Path(settings.cachedir) / "depmap_info.csv"
         if not Path(depmap_cell_line_path).exists():
             print("[bold yellow]No DepMap metadata file found. Starting download now.")
             _download(
                 url="https://ndownloader.figshare.com/files/35020903",
-                output_file_name="sample_info.csv",
+                output_file_name="depmap_info.csv",
                 output_path=settings.cachedir,
                 block_size=4096,
                 is_zip=False,
@@ -44,11 +44,11 @@ def __init__(self):
 
         # Download cell line metadata from The Genomics of Drug Sensitivity in Cancer Project
         # Source: https://www.cancerrxgene.org/celllines
-        cell_line_cancer_project_file_path = Path(settings.cachedir) / "cell_line_cancer_project.csv"
-        cell_line_cancer_project_transformed_path = Path(settings.cachedir) / "cell_line_cancer_project_transformed.csv"
+        cancerxgene_cell_line_path = Path(settings.cachedir) / "cell_line_cancer_project.csv"
+        transformed_cancerxgene_cell_line_path = Path(settings.cachedir) / "cancerrxgene_info.csv"
 
-        if not Path(cell_line_cancer_project_transformed_path).exists():
-            if not Path(cell_line_cancer_project_file_path).exists():
+        if not Path(transformed_cancerxgene_cell_line_path).exists():
+            if not Path(cancerxgene_cell_line_path).exists():
                 print(
                     "[bold yellow]No cell line metadata file from The Genomics of Drug Sensitivity "
                     "in Cancer Project found. Starting download now."
@@ -68,123 +68,127 @@ def __init__(self):
                     is_zip=False,
                 )
 
-            self.cl_cancer_project_meta = pd.read_csv(cell_line_cancer_project_file_path)
-            self.cl_cancer_project_meta.columns = self.cl_cancer_project_meta.columns.str.strip()
-            self.cl_cancer_project_meta["stripped_cell_line_name"] = (
-                self.cl_cancer_project_meta["Cell line Name"]
-                .str.replace(r"\-|\.", "", regex=True)
-                .str.upper()
-                .astype("category")
+            self.cancerxgene = pd.read_csv(cancerxgene_cell_line_path)
+            self.cancerxgene.columns = self.cancerxgene.columns.str.strip()
+            self.cancerxgene["stripped_cell_line_name"] = (
+                self.cancerxgene["Cell line Name"].str.replace(r"\-|\.", "", regex=True).str.upper().astype("category")
             )
             # pivot the data frame so that each cell line has only one row of metadata
-            index_col = set(self.cl_cancer_project_meta.columns) - {
+            index_col = set(self.cancerxgene.columns) - {
                 "Datasets",
                 "number of drugs",
             }
-            self.cl_cancer_project_meta = self.cl_cancer_project_meta.pivot(
-                index=index_col, columns="Datasets", values="number of drugs"
-            )
-            self.cl_cancer_project_meta.columns.name = None
-            self.cl_cancer_project_meta = self.cl_cancer_project_meta.reset_index().rename(
-                columns={"Cell line Name": "cell_line_name"}
-            )
-            self.cl_cancer_project_meta.to_csv(cell_line_cancer_project_transformed_path)
+            self.cancerxgene = self.cancerxgene.pivot(index=index_col, columns="Datasets", values="number of drugs")
+            self.cancerxgene.columns.name = None
+            self.cancerxgene = self.cancerxgene.reset_index().rename(columns={"Cell line Name": "cell_line_name"})
+            self.cancerxgene.to_csv(transformed_cancerxgene_cell_line_path)
         else:
-            self.cl_cancer_project_meta = pd.read_csv(cell_line_cancer_project_transformed_path, index_col=0)
+            self.cancerxgene = pd.read_csv(transformed_cancerxgene_cell_line_path, index_col=0)
 
         # Download metadata for driver genes from DepMap.Sanger
         # Source: https://cellmodelpassports.sanger.ac.uk/downloads (Gene annotation)
-        gene_annotation_file_path = Path(settings.cachedir) / "gene_identifiers_20191101.csv"
+        gene_annotation_file_path = Path(settings.cachedir) / "genes_info.csv"
         if not Path(gene_annotation_file_path).exists():
             print("[bold yellow]No metadata file was found for gene annotation. Starting download now.")
             _download(
                 url="https://cog.sanger.ac.uk/cmp/download/gene_identifiers_20191101.csv",
-                output_file_name="gene_identifiers_20191101.csv",
+                output_file_name="genes_info.csv",
                 output_path=settings.cachedir,
                 block_size=4096,
                 is_zip=False,
             )
         self.gene_annotation = pd.read_table(gene_annotation_file_path, delimiter=",")
 
-        # Download bulk RNA-seq data collated by the Wellcome Sanger Institute and the Broad Institute from DepMap.Sanger
-        # Source: https://cellmodelpassports.sanger.ac.uk/downloads (Expression data)
-        # issue: read count values contain random whitespace
-        # solution: remove the white space and convert to int before depmap updates the metadata
-        bulk_rna_sanger_file_path = Path(settings.cachedir) / "rnaseq_read_count_20220624_processed.csv"
-        if not Path(bulk_rna_sanger_file_path).exists():
-            print(
-                "[bold yellow]No metadata file was found for bulk RNA-seq data of Sanger cell line."
-                " Starting download now."
-            )
-            _download(
-                url="https://figshare.com/ndownloader/files/42467103",
-                output_file_name="rnaseq_read_count_20220624_processed.csv",
-                output_path=settings.cachedir,
-                block_size=4096,
-                is_zip=False,
-            )
-        self.bulk_rna_sanger = pd.read_csv(bulk_rna_sanger_file_path, index_col=0, dtype="unicode")
-
-        # Download CCLE expression data from DepMap
-        # Source: https://depmap.org/portal/download/all/ (DepMap Public 22Q2)
-        bulk_rna_broad_file_path = Path(settings.cachedir) / "CCLE_expression_full.csv"
-        if not Path(bulk_rna_broad_file_path).exists():
-            print("[bold yellow]No metadata file was found for CCLE expression data. Starting download now.")
-            _download(
-                url="https://figshare.com/ndownloader/files/34989922",
-                output_file_name="CCLE_expression_full.csv",
-                output_path=settings.cachedir,
-                block_size=4096,
-                is_zip=False,
-            )
-        self.bulk_rna_broad = pd.read_csv(bulk_rna_broad_file_path, index_col=0)
+        self.bulk_rna_sanger = None
+        self.bulk_rna_broad = None
+        self.proteomics = None
+        self.drug_response_gdsc1 = None
+        self.drug_response_gdsc2 = None
+
+    def _download_bulk_rna(self, cell_line_source: Literal["broad", "sanger", "both"] = "both") -> None:
+        if cell_line_source != "broad":
+            # Download bulk RNA-seq data collated by the Wellcome Sanger Institute and the Broad Institute from DepMap.Sanger
+            # Source: https://cellmodelpassports.sanger.ac.uk/downloads (Expression data)
+            # issue: read count values contain random whitespace
+            # solution: remove the white space and convert to int before depmap updates the metadata
+            bulk_rna_sanger_file_path = Path(settings.cachedir) / "rnaseq_sanger_info.csv"
+            if not Path(bulk_rna_sanger_file_path).exists():
+                print(
+                    "[bold yellow]No metadata file was found for bulk RNA-seq data of Sanger cell line."
+                    " Starting download now."
+                )
+                _download(
+                    url="https://figshare.com/ndownloader/files/42467103",
+                    output_file_name="rnaseq_sanger_info.csv",
+                    output_path=settings.cachedir,
+                    block_size=4096,
+                    is_zip=False,
+                )
+            self.bulk_rna_sanger = pd.read_csv(bulk_rna_sanger_file_path, index_col=0, dtype="unicode")
+        if cell_line_source != "sanger":
+            # Download CCLE expression data from DepMap
+            # Source: https://depmap.org/portal/download/all/ (DepMap Public 22Q2)
+            bulk_rna_broad_file_path = Path(settings.cachedir) / "rnaseq_depmap_info.csv"
+            if not Path(bulk_rna_broad_file_path).exists():
+                print("[bold yellow]No metadata file was found for CCLE expression data. Starting download now.")
+                _download(
+                    url="https://figshare.com/ndownloader/files/34989922",
+                    output_file_name="rnaseq_depmap_info.csv",
+                    output_path=settings.cachedir,
+                    block_size=4096,
+                    is_zip=False,
+                )
+            self.bulk_rna_broad = pd.read_csv(bulk_rna_broad_file_path, index_col=0)
 
+    def _download_proteomics(self) -> None:
         # Download proteomics data processed by DepMap.Sanger
         # Source: https://cellmodelpassports.sanger.ac.uk/downloads (Proteomics)
-        proteomics_file_path = Path(settings.cachedir) / "proteomics_all_20221214_processed.csv"
+        proteomics_file_path = Path(settings.cachedir) / "proteomics_info.csv"
         if not Path(proteomics_file_path).exists():
             print("[bold yellow]No metadata file was found for proteomics data (DepMap.Sanger). Starting download now.")
             _download(
                 url="https://figshare.com/ndownloader/files/42468393",
-                output_file_name="proteomics_all_20221214_processed.csv",
+                output_file_name="proteomics_info.csv",
                 output_path=settings.cachedir,
                 block_size=4096,
                 is_zip=False,
             )
         self.proteomics = pd.read_csv(proteomics_file_path, index_col=0)
 
-        # Download GDSC drug response data
-        # Source: https://www.cancerrxgene.org/downloads/bulk_download (Drug Screening - IC50s)
-        # URL: https://cog.sanger.ac.uk/cancerrxgene/GDSC_release8.4/GDSC1_fitted_dose_response_24Jul22.xlsx
-        drug_response_gdsc1_file_path = Path(settings.cachedir) / "ic50_gdsc1.csv"
-        if not Path(drug_response_gdsc1_file_path).exists():
-            print(
-                "[bold yellow]No metadata file was found for drug response data of GDSC1 dataset."
-                " Starting download now."
-            )
-            _download(
-                url="https://figshare.com/ndownloader/files/43757235",
-                output_file_name="ic50_gdsc1.csv",
-                output_path=settings.cachedir,
-                block_size=4096,
-                is_zip=False,
-            )
-        self.drug_response_gdsc1 = pd.read_csv(drug_response_gdsc1_file_path, index_col=0)
-
-        drug_response_gdsc2_file_path = Path(settings.cachedir) / "ic50_gdsc2.csv"
-        if not Path(drug_response_gdsc2_file_path).exists():
-            print(
-                "[bold yellow]No metadata file was found for drug response data of GDSC2 dataset."
-                " Starting download now."
-            )
-            _download(
-                url="https://figshare.com/ndownloader/files/43757232",
-                output_file_name="ic50_gdsc2.csv",
-                output_path=settings.cachedir,
-                block_size=4096,
-                is_zip=False,
-            )
-        self.drug_response_gdsc2 = pd.read_csv(drug_response_gdsc2_file_path, index_col=0)
+    def _download_gdsc(self, gdsc_dataset: Literal[1, 2] = 1) -> None:
+        if gdsc_dataset == 1:
+            # Download GDSC drug response data
+            # Source: https://www.cancerrxgene.org/downloads/bulk_download (Drug Screening - IC50s)
+            # URL: https://cog.sanger.ac.uk/cancerrxgene/GDSC_release8.4/GDSC1_fitted_dose_response_24Jul22.xlsx
+            drug_response_gdsc1_file_path = Path(settings.cachedir) / "gdsc1_info.csv"
+            if not Path(drug_response_gdsc1_file_path).exists():
+                print(
+                    "[bold yellow]No metadata file was found for drug response data of GDSC1 dataset."
+                    " Starting download now."
+                )
+                _download(
+                    url="https://figshare.com/ndownloader/files/43757235",
+                    output_file_name="gdsc1_info.csv",
+                    output_path=settings.cachedir,
+                    block_size=4096,
+                    is_zip=False,
+                )
+            self.drug_response_gdsc1 = pd.read_csv(drug_response_gdsc1_file_path, index_col=0)
+        if gdsc_dataset == 2:
+            drug_response_gdsc2_file_path = Path(settings.cachedir) / "gdsc2_info.csv"
+            if not Path(drug_response_gdsc2_file_path).exists():
+                print(
+                    "[bold yellow]No metadata file was found for drug response data of GDSC2 dataset."
+                    " Starting download now."
+                )
+                _download(
+                    url="https://figshare.com/ndownloader/files/43757232",
+                    output_file_name="gdsc2_info.csv",
+                    output_path=settings.cachedir,
+                    block_size=4096,
+                    is_zip=False,
+                )
+            self.drug_response_gdsc2 = pd.read_csv(drug_response_gdsc2_file_path, index_col=0)
 
     def annotate(
         self,
@@ -196,9 +200,9 @@ def annotate(
         verbosity: int | str = 5,
         copy: bool = False,
     ) -> AnnData:
-        """Annotate cell lines by DepMap information.
+        """Annotate cell lines.
 
-        For each cell, we fetch cell line annotation from Dependency Map (DepMap).
+        For each cell, we fetch cell line annotation from either the Dependency Map (DepMap) or The Genomics of Drug Sensitivity in Cancer Project (Cancerxgene).
 
         Args:
             adata: The data object to annotate.
@@ -241,7 +245,7 @@ def annotate(
                     "Ensure that stripped cell line names are available in 'adata.obs.' ",
                     "or use the DepMap as `cell_line_source` to annotate the cell line first ",
                 )
-            cell_line_meta = self.cl_cancer_project_meta
+            cell_line_meta = self.cancerxgene
 
         if query_id not in adata.obs.columns:
             raise ValueError(f"The requested query_id {query_id} is not in `adata.obs`.")
@@ -354,7 +358,11 @@ def annotate_bulk_rna(
             )
 
         identifier_num_all = len(adata.obs[query_id].unique())
+
+        # Lazily download the bulk rna expression data
         if cell_line_source == "sanger":
+            if self.bulk_rna_sanger is None:
+                self._download_bulk_rna(cell_line_source="sanger")
             reference_id = "model_name"
             not_matched_identifiers = list(set(adata.obs[query_id]) - set(self.bulk_rna_sanger.index))
         else:
@@ -365,7 +373,8 @@ def annotate_bulk_rna(
                 "Ensure that `DepMap_ID` is available in 'adata.obs'. ",
                 "Alternatively, use `annotate()` to annotate the cell line first ",
             )
-
+            if self.bulk_rna_broad is None:
+                self._download_bulk_rna(cell_line_source="broad")
             if query_id == "cell_line_name":
                 query_id = "DepMap_ID"
             not_matched_identifiers = list(set(adata.obs[query_id]) - set(self.bulk_rna_broad.index))
@@ -387,7 +396,7 @@ def annotate_bulk_rna(
         else:
             if gene_identifier == "gene_ID":
                 self.bulk_rna_broad.columns = [
-                    gene_name.split(" (")[1].split(")")[0] if "(" in gene_name else gene_name
+                    (gene_name.split(" (")[1].split(")")[0] if "(" in gene_name else gene_name)
                     for gene_name in self.bulk_rna_broad.columns
                 ]
             elif gene_identifier == "gene_name":
@@ -454,7 +463,9 @@ def annotate_protein_expression(
                 "using the `annotate()` function before calling annotate_protein_expression(). \n"
                 "This ensures that the required query ID is included in your data."
             )
-
+        # Lazily download the proteomics data
+        if self.proteomics is None:
+            self._download_proteomics()
         if reference_id not in self.proteomics.columns:
             raise ValueError(
                 f"The specified `reference_id`{reference_id} can't be found in the protein expression data. \n"
@@ -536,9 +547,14 @@ def annotate_from_gdsc(
                 "using the `annotate()` function before calling `annotate_from_gdsc()`. "
                 "This ensures that the required query ID is included in your data."
             )
+        # Lazily download the GDSC data
         if gdsc_dataset == 1:
+            if self.drug_response_gdsc1 is None:
+                self._download_gdsc(gdsc_dataset=1)
             gdsc_data = self.drug_response_gdsc1
         else:
+            if self.drug_response_gdsc2 is None:
+                self._download_gdsc(gdsc_dataset=2)
             gdsc_data = self.drug_response_gdsc2
 
         identifier_num_all = len(adata.obs[query_id].unique())
@@ -579,11 +595,24 @@ def lookup(self) -> LookUp:
             >>> pt_metadata = pt.md.CellLine()
             >>> lookup = pt_metadata.lookup()
         """
+        # Fetch the metadata if it hasn't beed downloaded yet
+        if self.bulk_rna_broad is None:
+            self._download_bulk_rna(cell_line_source="broad")
+        if self.bulk_rna_sanger is None:
+            self._download_bulk_rna(cell_line_source="sanger")
+        if self.proteomics is None:
+            self._download_proteomics()
+        if self.drug_response_gdsc1 is None:
+            self._download_gdsc(gdsc_dataset=1)
+        if self.drug_response_gdsc2 is None:
+            self._download_gdsc(gdsc_dataset=2)
+
+        # Transfer the data
         return LookUp(
             type="cell_line",
             transfer_metadata=[
                 self.depmap,
-                self.cl_cancer_project_meta,
+                self.cancerxgene,
                 self.gene_annotation,
                 self.bulk_rna_sanger,
                 self.bulk_rna_broad,
@@ -689,8 +718,8 @@ def plot_correlation(
 
         Args:
             adata: Input data object.
-            corr: Pearson correlation scores. If not available, please call the function `pt.md.CellLine.correlate()` first.
-            pval: P-values for pearson correlation. If not available, please call the function `pt.md.CellLine.correlate()` first.
+            corr: Pearson correlation scores.
+            pval: P-values for pearson correlation.
             identifier: Column in `.obs` containing the identifiers. Defaults to 'DepMap_ID'.
             metadata_key: Key of the AnnData obsm for comparison with the X matrix. Defaults to 'bulk_rna_broad'.
             category: The category for correlation comparison. Defaults to "cell line".
@@ -701,6 +730,11 @@ def plot_correlation(
         Returns:
             Pearson correlation coefficients and their corresponding p-values for matched and unmatched cell lines separately.
         """
+        if corr is None or pval is None:
+            raise ValueError(
+                "Missing required input parameter: 'corr' or 'pval'. Please call the function `pt.md.CellLine.correlate()` to generate these outputs before proceeding."
+            )
+
         if category == "cell line":
             if subset_identifier is None:
                 annotation = "\n".join(
@@ -716,32 +750,39 @@ def plot_correlation(
                 subset_identifier_list = (
                     [subset_identifier] if isinstance(subset_identifier, str | int) else list(subset_identifier)
                 )
-
-                if all(isinstance(id, int) and 0 <= id < adata.n_obs for id in subset_identifier_list):
-                    # Visualize the chosen cell line at the given index
-                    subset_identifier_list = adata.obs[identifier].values[subset_identifier_list]
-                elif not all(isinstance(id, str) for id in subset_identifier_list) or not set(
-                    subset_identifier_list
-                ).issubset(adata.obs[identifier].unique()):
-                    # The chosen cell line must be found in `identifier`
-                    raise ValueError(
-                        "`Subset_identifier` must contain either all strings or all integers within the index."
-                    )
+                # Convert the valid identifiers to the index list
+                if all(isinstance(id, str) for id in subset_identifier_list):
+                    if set(subset_identifier_list).issubset(adata.obs[identifier].unique()):
+                        subset_identifier_list = np.where(
+                            np.in1d(adata.obs[identifier].values, subset_identifier_list)
+                        )[0]
+                    else:
+                        raise ValueError("`Subset_identifier` must be found in adata.obs.`identifier`.")
+                elif all(isinstance(id, int) and 0 <= id < adata.n_obs for id in subset_identifier_list):
+                    pass
+                elif all(isinstance(id, int) and (id < 0 or id >= adata.n_obs) for id in subset_identifier_list):
+                    raise ValueError("`Subset_identifier` out of index.")
+                else:
+                    raise ValueError("`Subset_identifier` must contain either all strings or all integers.")
 
                 plt.scatter(
-                    x=adata.obsm[metadata_key].loc[subset_identifier_list],
-                    y=adata[adata.obs[identifier].isin(subset_identifier_list)].X,
+                    x=adata.obsm[metadata_key].iloc[subset_identifier_list],
+                    y=adata[subset_identifier_list].X,
                 )
                 plt.xlabel(
-                    f"{metadata_key}: {subset_identifier_list[0]}"
+                    f"{metadata_key}: {adata.obs[identifier].values[subset_identifier_list[0]]}"
                     if len(subset_identifier_list) == 1
                     else f"{metadata_key}"
                 )
-                plt.ylabel(f"Baseline: {subset_identifier_list[0]}" if len(subset_identifier_list) == 1 else "Baseline")
+                plt.ylabel(
+                    f"Baseline: {adata.obs[identifier].values[subset_identifier_list[0]]}"
+                    if len(subset_identifier_list) == 1
+                    else "Baseline"
+                )
 
                 # Annotate with the correlation coefficient and p-value of the chosen cell lines
-                subset_cor = np.mean(np.diag(corr.loc[subset_identifier_list, subset_identifier_list]))
-                subset_pval = np.mean(np.diag(pval.loc[subset_identifier_list, subset_identifier_list]))
+                subset_cor = np.mean(np.diag(corr.iloc[subset_identifier_list, subset_identifier_list]))
+                subset_pval = np.mean(np.diag(pval.iloc[subset_identifier_list, subset_identifier_list]))
                 annotation = "\n".join(
                     (
                         f"Pearson correlation: {subset_cor:.4f}",