|
5 | 5 |
|
6 | 6 | from rdkit.Chem.MolStandardize import rdMolStandardize
|
7 | 7 | from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers
|
| 8 | +from rdkit.Chem.EnumerateStereoisomers import GetStereoisomerCount |
8 | 9 | from rdkit.Chem.EnumerateStereoisomers import StereoEnumerationOptions
|
9 | 10 |
|
10 | 11 | import datamol as dm
|
@@ -82,7 +83,8 @@ def enumerate_stereoisomers(
|
82 | 83 |
|
83 | 84 | # in case any bonds/centers are missing stereo chem flag it here
|
84 | 85 | Chem.AssignStereochemistry(mol, force=False, flagPossibleStereoCenters=True, cleanIt=clean_it) # type: ignore
|
85 |
| - Chem.FindPotentialStereoBonds(mol, cleanIt=clean_it) # type: ignore |
| 86 | + # lu: do not clean (overwrite bond stereo information) when set `undefined_only=Ture` |
| 87 | + Chem.FindPotentialStereoBonds(mol, cleanIt=not undefined_only and clean_it) |
86 | 88 |
|
87 | 89 | # set up the options
|
88 | 90 | stereo_opts = StereoEnumerationOptions(
|
@@ -117,6 +119,68 @@ def enumerate_stereoisomers(
|
117 | 119 | return variants
|
118 | 120 |
|
119 | 121 |
|
| 122 | +def count_stereoisomers( |
| 123 | + mol: dm.Mol, |
| 124 | + n_variants: int = 20, |
| 125 | + undefined_only: bool = False, |
| 126 | + rationalise: bool = True, |
| 127 | + timeout_seconds: int = None, |
| 128 | + clean_it: bool = True, |
| 129 | + precise: bool = False, |
| 130 | +): |
| 131 | + """Get the number of possible stereoisomers for a molecule. |
| 132 | +
|
| 133 | + Warning: By default, this function compute an estimtion number based on the stereo bonds which |
| 134 | + gives an upper bound of possible stereoisomers. By setting `precise=True`, the number is computed |
| 135 | + by enumrerating the stereoisomers. However, it can be computationnaly intensive. |
| 136 | +
|
| 137 | + Args: |
| 138 | + mol: The molecule whose state we should enumerate. |
| 139 | + n_variants: The maximum amount of molecules that should be returned. |
| 140 | + undefined_only: If we should enumerate all stereocenters and bonds or only those |
| 141 | + with undefined stereochemistry. |
| 142 | + rationalise: If we should try to build and rationalise the molecule to ensure it |
| 143 | + can exist. |
| 144 | + timeout_seconds: The maximum amount of time to spend on enumeration. None |
| 145 | + will disable the timeout. Note that the timeout might be inaccurate as a running single variant |
| 146 | + computation is not stopped when the duration is reached. |
| 147 | + clean_it: A flag for assigning stereochemistry. If True, it will remove previous stereochemistry |
| 148 | + markings on the bonds. |
| 149 | + precise: Whether compute counts by enumerate the stereoisomers using `enumerate_stereoisomers`. |
| 150 | +
|
| 151 | + """ |
| 152 | + if precise: |
| 153 | + num_variants = len( |
| 154 | + enumerate_stereoisomers( |
| 155 | + mol=mol, |
| 156 | + n_variants=n_variants, |
| 157 | + undefined_only=undefined_only, |
| 158 | + rationalise=rationalise, |
| 159 | + timeout_seconds=timeout_seconds, |
| 160 | + clean_it=clean_it, |
| 161 | + ) |
| 162 | + ) |
| 163 | + else: |
| 164 | + # safety first |
| 165 | + mol = dm.copy_mol(mol) |
| 166 | + |
| 167 | + # in case any bonds/centers are missing stereo chem flag it here |
| 168 | + Chem.AssignStereochemistry(mol, force=False, flagPossibleStereoCenters=True, cleanIt=clean_it) # type: ignore |
| 169 | + # lu: do not clean (overwrite bond stereo information) when set `undefined_only=Ture` |
| 170 | + Chem.FindPotentialStereoBonds(mol, cleanIt=not undefined_only and clean_it) |
| 171 | + |
| 172 | + # set up the options |
| 173 | + stereo_opts = StereoEnumerationOptions( |
| 174 | + tryEmbedding=rationalise, |
| 175 | + onlyUnassigned=undefined_only, |
| 176 | + unique=True, |
| 177 | + ) |
| 178 | + |
| 179 | + num_variants = GetStereoisomerCount(mol, options=stereo_opts) |
| 180 | + |
| 181 | + return num_variants |
| 182 | + |
| 183 | + |
120 | 184 | def enumerate_structisomers(
|
121 | 185 | mol: dm.Mol,
|
122 | 186 | n_variants: int = 20,
|
|
0 commit comments