-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathutils.py
93 lines (67 loc) · 3.01 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import pandas as pd
from typing import Optional, Tuple
from chembl_webresource_client.new_client import new_client as ch
EBI_URL = "https://www.ebi.ac.uk/chembl/"
def name_to_molecule(name: str) -> Tuple[str, str]:
columns = ['molecule_chembl_id', 'molecule_structures']
ret = ch.molecule.filter(molecule_synonyms__molecule_synonym__iexact=name).only(columns)
best_match = ret[0]
return best_match["molecule_structures"]["molfile"], best_match["molecule_chembl_id"]
def id_to_molecule(chembl_id: str) -> Tuple[str, str]:
return ch.molecule.filter(chembl_id=chembl_id).only('molecule_structures')[0]["molecule_structures"]["molfile"]
def style_table(df: pd.DataFrame) -> pd.io.formats.style.Styler:
return df.style.hide_index().format(
subset=['Similarity'],
decimal=',', precision=2
).bar(
subset=['Similarity'],
align="mid",
cmap="coolwarm"
).applymap(lambda x: 'background-color: #aaaaaa', subset=['Image'])
def style_predictions(df: pd.DataFrame) -> pd.io.formats.style.Styler:
return df.style.hide_index().format(
subset=['Prediction'],
decimal=',', precision=2
).bar(
subset=['Prediction'],
align="mid",
cmap="plasma_r",
vmax=1.0,
vmin=0.8
)
def render_chembl_url(chembl_id: str) -> str:
return f'<a href="{EBI_URL}compound_report_card/{chembl_id}/">{chembl_id}</a>'
def render_chembl_img(chembl_id: str) -> str:
return f'<img src="{EBI_URL}api/data/image/{chembl_id}.svg" height="100px" width="100px">'
def render_row(row):
return {
"Similarity": float(row["similarity"]),
"Preferred name": row["pref_name"],
"ChEMBL ID": render_chembl_url(row["molecule_chembl_id"]),
"Image": render_chembl_img(row["molecule_chembl_id"])
}
def render_target(target):
return {
"Prediction": float(target["pred"]),
"ChEMBL ID": render_chembl_url(target["chembl_id"])
}
def find_similar_molecules(smiles: str, threshold: int):
columns = ['molecule_chembl_id', 'similarity', 'pref_name', 'molecule_structures']
try:
return ch.similarity.filter(smiles=smiles, similarity=threshold).only(columns)
except Exception as _:
return None
def render_similarity_table(similar_molecules) -> Optional[str]:
records = [render_row(row) for row in similar_molecules if row["molecule_structures"]]
df = pd.DataFrame.from_records(records)
styled = style_table(df)
return styled.to_html(render_links=True)
def render_target_predictions_table(predictions) -> Optional[str]:
df = pd.DataFrame(predictions)
records = [render_target(target) for target in
df.sort_values(by=['pred'], ascending=False).head(20).to_dict('records')]
df = pd.DataFrame.from_records(records)
styled = style_predictions(df)
return styled.to_html(render_links=True)
def get_similar_smiles(similar_molecules):
return [mol["molecule_structures"]["canonical_smiles"] for mol in similar_molecules if mol["molecule_structures"]]