Skip to content

Commit c23d273

Browse files
authored
Merge pull request #217 from datamol-io/feat/isomers
Added function to get the number of stereoisomers
2 parents 9e94d02 + e812492 commit c23d273

File tree

4 files changed

+76
-1
lines changed

4 files changed

+76
-1
lines changed

datamol/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
"save_df": "datamol.io",
136136
# isomers
137137
"enumerate_stereoisomers": "datamol.isomers",
138+
"count_stereoisomers": "datamol.isomers",
138139
"enumerate_tautomers": "datamol.isomers",
139140
"enumerate_structisomers": "datamol.isomers",
140141
"canonical_tautomer": "datamol.isomers",
@@ -329,6 +330,7 @@ def __dir__():
329330
from .io import open_df
330331

331332
from .isomers import enumerate_stereoisomers
333+
from .isomers import count_stereoisomers
332334
from .isomers import enumerate_tautomers
333335
from .isomers import enumerate_structisomers
334336
from .isomers import canonical_tautomer

datamol/isomers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33
from ._enumerate import enumerate_stereoisomers
44
from ._enumerate import enumerate_tautomers
55
from ._enumerate import enumerate_structisomers
6+
from ._enumerate import count_stereoisomers
67
from ._enumerate import remove_stereochemistry
78
from ._enumerate import canonical_tautomer

datamol/isomers/_enumerate.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from rdkit.Chem.MolStandardize import rdMolStandardize
77
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers
8+
from rdkit.Chem.EnumerateStereoisomers import GetStereoisomerCount
89
from rdkit.Chem.EnumerateStereoisomers import StereoEnumerationOptions
910

1011
import datamol as dm
@@ -82,7 +83,8 @@ def enumerate_stereoisomers(
8283

8384
# in case any bonds/centers are missing stereo chem flag it here
8485
Chem.AssignStereochemistry(mol, force=False, flagPossibleStereoCenters=True, cleanIt=clean_it) # type: ignore
85-
Chem.FindPotentialStereoBonds(mol, cleanIt=clean_it) # type: ignore
86+
# lu: do not clean (overwrite bond stereo information) when set `undefined_only=Ture`
87+
Chem.FindPotentialStereoBonds(mol, cleanIt=not undefined_only and clean_it)
8688

8789
# set up the options
8890
stereo_opts = StereoEnumerationOptions(
@@ -117,6 +119,68 @@ def enumerate_stereoisomers(
117119
return variants
118120

119121

122+
def count_stereoisomers(
123+
mol: dm.Mol,
124+
n_variants: int = 20,
125+
undefined_only: bool = False,
126+
rationalise: bool = True,
127+
timeout_seconds: int = None,
128+
clean_it: bool = True,
129+
precise: bool = False,
130+
):
131+
"""Get the number of possible stereoisomers for a molecule.
132+
133+
Warning: By default, this function compute an estimtion number based on the stereo bonds which
134+
gives an upper bound of possible stereoisomers. By setting `precise=True`, the number is computed
135+
by enumrerating the stereoisomers. However, it can be computationnaly intensive.
136+
137+
Args:
138+
mol: The molecule whose state we should enumerate.
139+
n_variants: The maximum amount of molecules that should be returned.
140+
undefined_only: If we should enumerate all stereocenters and bonds or only those
141+
with undefined stereochemistry.
142+
rationalise: If we should try to build and rationalise the molecule to ensure it
143+
can exist.
144+
timeout_seconds: The maximum amount of time to spend on enumeration. None
145+
will disable the timeout. Note that the timeout might be inaccurate as a running single variant
146+
computation is not stopped when the duration is reached.
147+
clean_it: A flag for assigning stereochemistry. If True, it will remove previous stereochemistry
148+
markings on the bonds.
149+
precise: Whether compute counts by enumerate the stereoisomers using `enumerate_stereoisomers`.
150+
151+
"""
152+
if precise:
153+
num_variants = len(
154+
enumerate_stereoisomers(
155+
mol=mol,
156+
n_variants=n_variants,
157+
undefined_only=undefined_only,
158+
rationalise=rationalise,
159+
timeout_seconds=timeout_seconds,
160+
clean_it=clean_it,
161+
)
162+
)
163+
else:
164+
# safety first
165+
mol = dm.copy_mol(mol)
166+
167+
# in case any bonds/centers are missing stereo chem flag it here
168+
Chem.AssignStereochemistry(mol, force=False, flagPossibleStereoCenters=True, cleanIt=clean_it) # type: ignore
169+
# lu: do not clean (overwrite bond stereo information) when set `undefined_only=Ture`
170+
Chem.FindPotentialStereoBonds(mol, cleanIt=not undefined_only and clean_it)
171+
172+
# set up the options
173+
stereo_opts = StereoEnumerationOptions(
174+
tryEmbedding=rationalise,
175+
onlyUnassigned=undefined_only,
176+
unique=True,
177+
)
178+
179+
num_variants = GetStereoisomerCount(mol, options=stereo_opts)
180+
181+
return num_variants
182+
183+
120184
def enumerate_structisomers(
121185
mol: dm.Mol,
122186
n_variants: int = 20,

tests/test_isomers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ def test_enumerate_stereo_timeout():
4343
dm.enumerate_stereoisomers(mol, n_variants=2, timeout_seconds=1)
4444

4545

46+
def test_count_stereoisomers():
47+
num_isomers_1 = dm.count_stereoisomers(dm.to_mol("CC=CC"), undefined_only=True)
48+
num_isomers_2 = dm.count_stereoisomers(dm.to_mol("CC=CC"), undefined_only=False)
49+
assert num_isomers_1 == num_isomers_2
50+
51+
assert dm.count_stereoisomers(dm.to_mol("Br/C=C\\Br"), undefined_only=True) == 1
52+
53+
4654
def test_enumerate_structural():
4755
mol = dm.to_mol("CCCCC") # pentane has only three structural isomers
4856

0 commit comments

Comments
 (0)