Skip to content

Commit 72e60e3

Browse files
committed
downgraded rdkit to 2020.09.5 avoid bugs
1 parent 1e21947 commit 72e60e3

11 files changed

+65
-40
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@ __pycache__/
44
/conda/*
55
*.egg-info/*
66
test_hyper.param
7-
7+
conda-pkg/
8+
build/
9+
dist/

environment.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@ channels:
88
dependencies:
99
- python>=3.7
1010
- pytorch
11-
- rdkit>=2020.03.3.0
11+
- rdkit=2020.09.5
12+
## Note: The new code for Chem.MolFragmentToSmiles() will not work on the JTVAE
13+
## in the newer versions of rdkit (starting from v2021.03.1 and hopefully ends with 03.4?)
14+
## See issue https://github.com/rdkit/rdkit/issues/3998
15+
## and https://github.com/chemprop/chemprop/pull/182
16+
## The work on the patch is to be released in 2021.03.4
1217
- scipy
1318
- rxdock
1419
- tmap

hyper.param

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@
22

33
############### Parameters for SnD #####################
44
receptor_name = CDK2_5IEV # name the receptor to create dir
5-
receptor_file = /home/ziqiaoxu/SampleDock/targets/CDK2_5IEV/5IEV.mol2
5+
receptor_file = ./targets/CDK2_5IEV/5IEV.mol2
66
# must be mol2 format
7-
ligand_file = /home/ziqiaoxu/SampleDock/targets/CDK2_5IEV/Roniciclib.sd
7+
ligand_file = ./targets/CDK2_5IEV/Roniciclib.sd
88
# must be sd format
99
ncycle = 1600 # number of cycles to be run
1010
ndesign = 20 # number of designs to be generated per cycle
11-
ensemble = 1 # top of number of design to generate the average structure
11+
ensemble = 1 # number of top designs to generate the average structure
1212
# (1 being just the top scoring structure)
13-
seed_smi = C1(C(NC2=CC=CC=C2)=NC=N3)=C3C=CC=C1
13+
nseeds = 1 # number of top designs to be used as seeds for distributed generation
14+
# nseeds overrides ensemble
15+
seed_smi = C1=CC=CC=C1
1416
# initial seeding SMILES for the first cycle, default to benzene
1517

1618
############### Parameters for rDock ###################
17-
cavity_protocol = rbcavity -was -d -r # cmd and option for creating pocket
18-
# -W option for rxdock precompiled bin file, -was for locally compiled rdock installation
1919
docking_prm = dock.prm # docking protocol (-p), no solvation term by default
2020
npose = 100 # number of poses generated (-n)
2121
prefix = pose_docked_ # prefix of the output files from rDock

sampledock/SnD/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from .docking import dock, sort_pose, save_pose
2-
from .pocket_prepare import prep_prm
32
from .sampler_util import hyperparam_loader, create_wd, smiles_to_sdfile
43
from .generator import single_generator, distributed_generator
54
from .post_process import mkdf, combine_designs

sampledock/SnD/docking.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
def dock(ligs, dock_dir, prmfile, docking_prm, npose, prefix = 'docked'):
1515
# ligs must be a list of file path
16-
print('[INFO]: Docking in Progress\t', end = '\r')
16+
print('[INFO]: Docking in Progress\t\t', end = '\r')
1717
sys.stdout.flush()
1818
procs = []
1919
for i,lig in enumerate(ligs):
@@ -40,7 +40,7 @@ def sort_pose(dock_dir, sort_by, prefix = None):
4040
if x.endswith('.sd')]
4141

4242
if len(poses_mols) == 0:
43-
raise Exception('No .sd file matching the criteria in %s'%dock_dir)
43+
raise FileNotFoundError('No .sd file matching the criteria in %s'%dock_dir)
4444

4545
best_poses = []
4646
for mol_path in poses_mols:

sampledock/SnD/pocket_prepare.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ def wrt_prm(parameter,filename='pocket_docking.prm'):
5151
def prep_prm(receptor,ligand,recep_name,target_dir):
5252

5353
receptor = os.path.abspath(receptor)
54-
if not os.path.exists(receptor): print(receptor+'\nRECEPTOR FILE NOT EXIST!'); return None
54+
if not os.path.exists(receptor): raise FileNotFoundError(receptor+'\nRECEPTOR FILE NOT EXIST!')
5555

5656
ligand = os.path.abspath(ligand)
57-
if not os.path.exists(ligand): print(ligand+'\nLIGAND FILE NOT EXIST!'); return None
57+
if not os.path.exists(ligand): raise FileNotFoundError(ligand+'\nLIGAND FILE NOT EXIST!')
5858

5959
cav_dir = os.path.abspath(target_dir)+'/cavity'
6060
os.makedirs(cav_dir)
@@ -65,10 +65,16 @@ def prep_prm(receptor,ligand,recep_name,target_dir):
6565

6666
def create_cav(prmfile):
6767
# rbcavity must be installed/loaded to execute the cmdline
68-
cmdline = "rbcavity -was -d -r %s"%prmfile
68+
cmdline = "rbcavity -W -d -r %s"%prmfile
6969
proc = subprocess.Popen(cmdline, shell=True)
70-
proc.wait()
71-
print('Docking pocket grid created for: \n'+prmfile+'\n')
70+
returncode = proc.wait()
71+
if returncode == 2:
72+
cmdline = "rbcavity -was -d -r %s"%prmfile
73+
proc = subprocess.Popen(cmdline, shell=True)
74+
returncode = proc.wait()
75+
if returncode == 0:
76+
print('Docking pocket grid created for: \n'+prmfile+'\n')
77+
else: raise Exception('Cavity creation failed')
7278

7379
if __name__ == "__main__":
7480
import argparse

sampledock/__main__.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
rdBase.DisableLog('rdApp.error')
2020
from .jtvae import Vocab, JTNNVAE
2121
# Sample and Dock tools
22-
from .SnD import prep_prm
22+
from .SnD.pocket_prepare import prep_prm, create_cav
2323
from .SnD import dock, sort_pose, save_pose
2424
from .SnD import hyperparam_loader, create_wd, smiles_to_sdfile
2525
from .SnD import single_generator, distributed_generator
@@ -61,10 +61,7 @@
6161
prmfile, cav_dir = prep_prm(p.receptor_file,p.ligand_file,p.receptor_name,wd)
6262

6363
## create pocket
64-
cmdline = p.cavity_protocol+' %s > %s/create_cavity.out'%(prmfile,cav_dir)
65-
proc = subprocess.Popen(cmdline, shell=True)
66-
proc.wait()
67-
print('Docking pocket grid created')
64+
create_cav(prmfile)
6865

6966
## Main loop: VAE on subsequent returned compounds
7067
for j in range(p.ncycle):

sampledock/jtvae/chemutils.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ def copy_edit_mol(mol):
6767

6868
def get_clique_mol(mol, atoms):
6969
smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True)
70+
## Comment by Truman 6/23/2021
71+
## The above code will not longer work on this perticular task
72+
## with newer versions of rdkit (starting from v2021.03.1 and hopefully ends with 03.4?)
73+
## See issue https://github.com/rdkit/rdkit/issues/3998
74+
## and https://github.com/chemprop/chemprop/pull/182
75+
## The work on the patch is to be released in 2021.03.4
7076
new_mol = Chem.MolFromSmiles(smiles, sanitize=False)
7177
new_mol = copy_edit_mol(new_mol).GetMol()
7278
new_mol = sanitize(new_mol) #We assume this is not None
@@ -118,7 +124,9 @@ def tree_decomp(mol):
118124
cnei = nei_list[atom]
119125
bonds = [c for c in cnei if len(cliques[c]) == 2]
120126
rings = [c for c in cnei if len(cliques[c]) > 4]
121-
if len(bonds) > 2 or (len(bonds) == 2 and len(cnei) > 2): #In general, if len(cnei) >= 3, a singleton should be added, but 1 bond + 2 ring is currently not dealt with.
127+
if len(bonds) > 2 or (len(bonds) == 2 and len(cnei) > 2):
128+
# In general, if len(cnei) >= 3, a singleton should be added,
129+
# but 1 bond + 2 ring is currently not dealt with.
122130
cliques.append([atom])
123131
c2 = len(cliques) - 1
124132
for c1 in cnei:
@@ -134,9 +142,10 @@ def tree_decomp(mol):
134142
c1,c2 = cnei[i],cnei[j]
135143
inter = set(cliques[c1]) & set(cliques[c2])
136144
if edges[(c1,c2)] < len(inter):
137-
edges[(c1,c2)] = len(inter) #cnei[i] < cnei[j] by construction
145+
edges[(c1,c2)] = len(inter)
146+
#cnei[i] < cnei[j] by construction
138147

139-
edges = [u + (MST_MAX_WEIGHT-v,) for u,v in edges.items()] #Changed from .iteritems() to .items() by Truman for python 3.7
148+
edges = [u + (MST_MAX_WEIGHT-v,) for u,v in edges.items()]
140149
if len(edges) == 0:
141150
return cliques, edges
142151

sampledock/jtvae/jtnn_vae.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ def __init__(self, vocab, hidden_size, latent_size, depthT, depthG):
3232
self.A_assm = nn.Linear(latent_size, hidden_size, bias=False)
3333
self.assm_loss = nn.CrossEntropyLoss(reduction='sum')
3434

35-
## nn.Linear: Applies a linear transformation to the incoming data (y = xA^T + b)
36-
## Not sure how the biases and weights are specified this way
3735
self.T_mean = nn.Linear(hidden_size, latent_size)
3836
self.T_var = nn.Linear(hidden_size, latent_size)
3937
self.G_mean = nn.Linear(hidden_size, latent_size)
@@ -98,7 +96,9 @@ def find_ensemble(self,smiles_list):
9896
# This is due to difference in parsing of SMILES (especially rings)
9997
## TODO: Convert sampledock to OOP structure and use the vectors directly
10098
except KeyError as key:
101-
print('[KeyError]',key,'is not part of the vocabulary (the model was not trained with this scaffold)')
99+
print('[KeyError]',key,\
100+
'is not part of the vocabulary \
101+
(the model was not trained with this scaffold)')
102102
continue
103103
tree_mean = self.T_mean(x_tree)
104104
tree_log_var = -torch.abs(self.T_var(x_tree))
@@ -223,8 +223,11 @@ def decode(self, x_tree_vecs, x_mol_vecs, prob_decode):
223223

224224
cur_mol = cur_mol.GetMol()
225225
set_atommap(cur_mol)
226-
cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol))
227-
return Chem.MolToSmiles(cur_mol) if cur_mol is not None else None
226+
try:
227+
Chem.SanitizeMol(cur_mol)
228+
return Chem.MolToSmiles(cur_mol)
229+
except rdkit.Chem.rdchem.AtomKekulizeException:
230+
return None
228231

229232
def dfs_assemble(self, y_tree_mess, x_mol_vecs, all_nodes, cur_mol, global_amap, fa_amap, cur_node, fa_node, prob_decode, check_aroma):
230233
fa_nid = fa_node.nid if fa_node is not None else -1

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
setup(
1111
name='sampledock',
12-
version='0.5',
12+
version='0.5.1',
1313
description='Molecular design framework the merges generative AI and molecular docking',
1414
author='Ziqiao Xu and Aaron Frank',
1515
author_email='[email protected]',

test_hyper.param

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,23 @@
1-
## All relative path anchors on the location of working directory, not this file
1+
## All relative path starts from the location of working directory not this file
22

33
############### Parameters for SnD #####################
4-
receptor_name = CDK2-5IEV # name the receptor to create dir
5-
receptor_file = ./targets/CDK2_5IEV/5IEV.mol2 # must be mol2 format
6-
ligand_file = ./targets/CDK2_5IEV/Roniciclib.sd # must be sd format
7-
ncycle = 2 # number of cycles to be run
8-
ndesign = 10 # number of designs to be generated per cycle
9-
ensemble = 5 # top of number of design to generate the average structure
4+
receptor_name = CDK2_5IEV # name the receptor to create dir
5+
receptor_file = ./targets/CDK2_5IEV/5IEV.mol2
6+
# must be mol2 format
7+
ligand_file = ./targets/CDK2_5IEV/Roniciclib.sd
8+
# must be sd format
9+
ncycle = 5 # number of cycles to be run
10+
ndesign = 20 # number of designs to be generated per cycle
11+
ensemble = 1 # number of top designs to generate the average structure
1012
# (1 being just the top scoring structure)
11-
seed_smi = C1=CC=CC=C1 # initial seeding SMILES for the first cycle, default to benzene
13+
nseeds = 1 # number of top designs to be used as seeds for distributed generation
14+
# nseeds overrides ensemble
15+
seed_smi = C1=CC=CC=C1
16+
# initial seeding SMILES for the first cycle, default to benzene
1217

1318
############### Parameters for rDock ###################
14-
cavity_protocol = rbcavity -was -d -r # cmd and option for creating pocket
1519
docking_prm = dock.prm # docking protocol (-p), no solvation term by default
16-
npose = 10 # number of poses generated (-n)
20+
npose = 20 # number of poses generated (-n)
1721
prefix = pose_docked_ # prefix of the output files from rDock
1822
sort_by = SCORE.INTER # filter for sorting the designs
1923

0 commit comments

Comments
 (0)