Skip to content

Commit

Permalink
CDRP chemical and phenotypic space notebook - SF4
Browse files Browse the repository at this point in the history
  • Loading branch information
Arkkienkeli committed Jan 8, 2024
1 parent 17662e9 commit ce6a220
Showing 1 changed file with 199 additions and 0 deletions.
199 changes: 199 additions & 0 deletions cdrp/Chemical-and-phenotypic-spaces.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import seaborn as sb\n",
"import umap"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"main = pd.read_csv('data/treatment_level_aux_combined.csv.gz')\n",
"columns = [str(i) for i in range(672)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fingerprints = np.load('data/fingerprints_cdrp.npz')['features']\n",
"cdrp_smiles_scaffolds = pd.read_csv('data/cdrp_smiles_scaffolds.csv')\n",
"Y = pd.read_csv(\"data/CDRP_MOA_MATCHES_official.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#get new UMAP embeddings\n",
"reducer = umap.UMAP()\n",
"embeddings = reducer.fit_transform(fingerprints)\n",
"print(fingerprints.shape, embeddings.shape)\n",
"aux = pd.concat((pd.DataFrame(embeddings, columns=[\"UMAP 1\", \"UMAP 2\"]), cdrp_smiles_scaffolds.reset_index()), axis=1)\n",
"#aux\n",
"aux = pd.merge(aux, Y, left_on = 'Metadata_BROAD_ID', right_on = 'Var1', how = 'left')\n",
"\n",
"#to read aux used in publication uncomment next line\n",
"#aux = pd.read_csv('data/chemical_aux_umap.csv')\n",
"\n",
"#UMAP embeddings that were used for the supplementary figure are already in this repository\n",
"#aux.to_csv('data/chemical_aux_umap.csv', index = False)\n",
"\n",
"sb.scatterplot(data=aux, x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"lightpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"moas = []\n",
"for k,r in Y.iterrows():\n",
" for i in r[\"Metadata_moa.x\"].split(\"|\"):\n",
" moas.append(i)\n",
"\n",
"moas = pd.DataFrame({'MoA': moas })"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(10,10))\n",
"a = \"lipoxygenase inhibitor\"\n",
"g = sb.scatterplot(data=aux[~aux['Metadata_moa.x'].str.contains(a)], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"h = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(a)], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
"x_lims = (None, None)\n",
"y_lims = (None, None)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"selected_moas = ['adenosine receptor agonist', 'adrenergic receptor antagonist', 'dopamine receptor agonist', 'egfr inhibitor', \n",
" 'estrogen receptor agonist', 'glucocorticoid receptor agonist', \"tyrosine kinase inhibitor\",\n",
" 'opioid receptor antagonist', \"bacterial dna gyrase inhibitor\", \"hmgcr inhibitor\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(10,10))\n",
"h = sb.scatterplot(data=aux[~aux['Metadata_moa.x'].str.contains('|'.join(selected_moas))], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"\n",
"u = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[0])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"mediumorchid\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"v = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[1])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"indigo\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"w = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[2])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"teal\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"x = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[3])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"y = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[4])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"gold\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"z = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[5])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"blue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"\n",
"k = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[6])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"salmon\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"l = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[7])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"rosybrown\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"m = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[8])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"hotpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"n = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[9])], x=\"UMAP 1\", y=\"UMAP 2\", s=100, color=\"crimson\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"\n",
"\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
"x_lims = (None, None)\n",
"y_lims = (None, None)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig.savefig(\"chemical_space_moa.png\") \n",
"fig.savefig(\"chemical_space_moa.svg\") "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(10,10))\n",
"h = sb.scatterplot(data=main[~main['Metadata_moa.x'].str.contains('|'.join(selected_moas))], x=\"X\", y=\"Y\", s=100, color=\"dodgerblue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"\n",
"u = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[0])], x=\"X\", y=\"Y\", s=100, color=\"mediumorchid\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"v = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[1])], x=\"X\", y=\"Y\", s=100, color=\"indigo\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"w = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[2])], x=\"X\", y=\"Y\", s=100, color=\"teal\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"x = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[3])], x=\"X\", y=\"Y\", s=100, color=\"limegreen\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"y = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[4])], x=\"X\", y=\"Y\", s=100, color=\"gold\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"z = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[5])], x=\"X\", y=\"Y\", s=100, color=\"blue\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"\n",
"k = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[6])], x=\"X\", y=\"Y\", s=100, color=\"salmon\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"l = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[7])], x=\"X\", y=\"Y\", s=100, color=\"rosybrown\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"m = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[8])], x=\"X\", y=\"Y\", s=100, color=\"hotpink\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"n = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[9])], x=\"X\", y=\"Y\", s=100, color=\"crimson\", linewidth=0.5, edgecolor=\"black\", alpha=0.8)\n",
"\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
"x_lims = (None, None)\n",
"y_lims = (None, None)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig.savefig(\"phenotypic_space_moa.png\") \n",
"fig.savefig(\"phenotypic_space_moa.svg\") "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

0 comments on commit ce6a220

Please sign in to comment.