Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable the assignment of Forcefiled parameters to generated molecules. #48

Merged
merged 16 commits into from
Dec 13, 2023
Merged
9 changes: 8 additions & 1 deletion .trunk/config/.cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@
"pydotplus",
"Carbocycles",
"xlabel",
"ylabel"
"ylabel",
"ffnonbonded",
"forcefield",
"opls",
"nonbonded",
"ffparam",
"atomnum",
"ffbonded"
]
}
1 change: 1 addition & 0 deletions .trunk/trunk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ lint:
paths:
- tests/test_molecule.py
- .gitignore
- bigsmiles_gen/data/*
actions:
enabled:
- trunk-announce
Expand Down
2,701 changes: 2,701 additions & 0 deletions bigsmiles_gen/data/ffbonded.itp

Large diffs are not rendered by default.

857 changes: 857 additions & 0 deletions bigsmiles_gen/data/ffnonbonded.itp

Large diffs are not rendered by default.

731 changes: 731 additions & 0 deletions bigsmiles_gen/data/opls.par

Large diffs are not rendered by default.

165 changes: 165 additions & 0 deletions bigsmiles_gen/forcefield_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import dataclasses
from importlib.resources import files

from rdkit import Chem

_global_nonbonded_itp_file = None
_global_smarts_rule_file = None
_global_assignment_class = None


@dataclasses.dataclass
class FFParam:
mass: float
charge: float
sigma: float
epsilon: float
bond_type_name: str
bond_type_id: int = -1


class SMARTS_ASSIGNMENTS:
def _read_smarts_rules(self, filename):
if filename is None:
filename = files("bigsmiles_gen").joinpath("data", "opls.par")
self._type_dict = {}
self._type_dict_rev = {}
self._rule_dict = {}

opls_counter = 0
with open(filename, "r") as smarts_file:
for line in smarts_file:
line = line.strip()
if line and line[0] != "*":
ELE, SYMBOL, TYPE, RULE = line.split("|")
TYPE = TYPE.strip()
RULE = RULE.strip()
try:
type_id = self._type_dict[TYPE]
except KeyError:
type_id = opls_counter
opls_counter += 1
self._type_dict[TYPE] = type_id
self._type_dict_rev[type_id] = TYPE
self._rule_dict[RULE] = TYPE

def _read_nb_param(self, filename):
def assign_bond_index(type_param):
bond_dict = {}
bond_dict_rev = {}

bond_counter = 0
for name in type_param:
bond_name = type_param[name].bond_type_name
if bond_name not in bond_dict:
bond_dict[bond_name] = bond_counter
bond_dict_rev[bond_counter] = bond_name
bond_counter += 1
type_param[name].bond_type_id = bond_dict[type_param[name].bond_type_name]
return bond_dict, bond_dict_rev

self._type_param = {}

if filename is None:
filename = files("bigsmiles_gen").joinpath("data", "ffnonbonded.itp")
with open(filename, "r") as ff_file:
for line in ff_file:
if line and line[0] != "[" and line[0] != ";":
line = line.strip()
line = line.split()
if line[0] in self._type_dict:
bond_type_name = line[1]
mass = float(line[3])
charge = float(line[4])
sigma = float(line[6])
epsilon = float(line[7])
self._type_param[line[0]] = FFParam(
mass=mass,
charge=charge,
sigma=sigma,
epsilon=epsilon,
bond_type_name=bond_type_name,
)
bond_dict, bond_dict_rev = assign_bond_index(self._type_param)

def __init__(self, smarts_filename, nb_filename):
self._read_smarts_rules(smarts_filename)
self._read_nb_param(nb_filename)

def get_type(self, type):
try:
return self._type_dict_rev[type]
except KeyError:
pass
try:
return self._type_dict[type]
except KeyError:
pass
# Fallback, we generate a key error
return self._type_dict[type]

def get_ffparam(self, type):
return self._type_param[self.get_type(type)]

def get_type_assignments(self, mol):
match_dict = {}
for rule in self._rule_dict:
rule_mol = Chem.MolFromSmarts(rule)
matches = mol.GetSubstructMatches(rule_mol)
for match in matches:
if len(match) > 1:
RuntimeError("Match with more then atom, that doesn't make sense here.")
match = match[0]
try:
match_dict[match].append(rule)
except KeyError:
match_dict[match] = [rule]
if len(match_dict) != mol.GetNumAtoms():
raise RuntimeError("Not all atoms could be assigned")

for atom_num in match_dict:
final_match = match_dict[atom_num][0]
for match_rule in match_dict[atom_num]:
# Debatable rule, that longer SMARTS strings make a better assignment
if len(match_rule) > len(final_match):
final_match = match_rule
match_dict[atom_num] = final_match

final_dict = {}
for atom_num in match_dict:
final_dict[atom_num] = self.get_ffparam(
self.get_type(self._rule_dict[match_dict[atom_num]])
)
return final_dict

# graph = nx.Graph()
# for atom_num in match_dict:
# atom = mol.GetAtomWithIdx(atom_num)
# graph.add_node(
# atom_num,
# atomic=atom.GetAtomicNum(),
# param=opls_param[rule_dict[match_dict[atom_num]]],
# )
# for node in graph.nodes():
# atom = mol.GetAtomWithIdx(node)
# for bond in atom.GetBonds():
# graph.add_edge(
# bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond_type=bond.GetBondType()
# )

# return graph


def get_assignment_class(smarts_filename, nb_filename):
global _global_assignment_class, _global_nonbonded_itp_file, _global_smarts_rule_file
if (
_global_assignment_class is None
or smarts_filename != _global_nonbonded_itp_file
or nb_filename != _global_smarts_rule_file
):
_global_nonbonded_itp_file = nb_filename
_global_nonbonded_itp_file = smarts_filename
_global_assignment_class = SMARTS_ASSIGNMENTS(
_global_smarts_rule_file, _global_nonbonded_itp_file
)
return _global_assignment_class
17 changes: 17 additions & 0 deletions bigsmiles_gen/mol_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from rdkit.Chem import Descriptors as rdDescriptors
from rdkit.Chem import rdFingerprintGenerator

from .forcefield_helper import get_assignment_class

_RDKGEN = rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=512)


Expand Down Expand Up @@ -204,3 +206,18 @@ def weight(self):
def add_graph_res(self, residues):
for n in self.graph:
self.graph.nodes[n]["res"] = residues.index(self.graph.nodes[n]["smiles"])

def get_forcefield_types(self, smarts_filename=None, nb_filename=None):
if not self.fully_generated:
raise RuntimeError(
"Forcefield assignment is only possible for fully generated molecules"
)

assigner = get_assignment_class(smarts_filename, nb_filename)
mol = self.mol
mol = Chem.AddHs(mol)
return assigner.get_type_assignments(mol), mol

@property
def forcefield_types(self):
return self.get_forcefield_types(smarts_filename=None, nb_filename=None)
3 changes: 2 additions & 1 deletion play.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,13 @@ def gen_calc_prob(big):
# bigA = "F{[<] [<]CC[>] [>]}|uniform(0, 20)|[H]"
# gen_calc_prob(bigA)

bigA = "CCOC(=S)S{[$] O([<|3|])(C([$])C[$]), [>]CCO[<|0 0 0 1 0 2|] ; [>][H] [$]}|poisson(900)|CCCC"
bigA = "CCOC{[$] O([<|3|])(C([$])C[$]), [>]CCO[<|0 0 0 1 0 2|] ; [>][H] [$]}|poisson(900)|CCCC"


mol = bigsmiles_gen.Molecule(bigA)
mol_gen = mol.generate()
print(mol_gen.smiles)
ffparam, mol = mol_gen.forcefield_types
molSize = (450, 150)
mc = Chem.Mol(mol_gen.mol.ToBinary())
drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0], molSize[1])
Expand Down
24 changes: 21 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,28 @@
[build-system]
requires = ["setuptools>=41.0", "wheel>=0.33", "setuptools_scm[toml]>=6.0"]

# [tool.pytest.ini_options]
# testpaths = [
# "tests",
# ]
[build-system]
requires = ["setuptools >= 43", "wheel", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[project]
name = "bigsmiles_gen"
authors = [
{ name = "Ludwig Schneider", email = "[email protected]" },
]
license = { text = "GPL-3.0", files = ["LICENSE.md"] }
description = "Bigsmiles extension handling the generation of ensembles smiles strings."
requires-python = ">=3.9"
dependencies = ["scipy", "numpy", "networkx", "matplotlib"]
dynamic = ["version"]

[tool.setuptools.dynamic]
version = { attr = "bigsmiles_gen._version.version" }

[tool.setuptools_scm]
write_to = "bigsmiles_gen/_version.py"
version_file = "bigsmiles_gen/_version.py"

[tool.black]
line-length = 100
Expand All @@ -23,3 +38,6 @@ omit = [
"*/py/*",
"*/six.py",
]

[tool.setuptools.package-data]
bigsmiles_gen = ["data/ffbonded.itp", "data/ffnonbonded.itp", "data/opls.par"]
21 changes: 0 additions & 21 deletions setup.cfg

This file was deleted.

9 changes: 9 additions & 0 deletions tests/test_forcefield.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import bigsmiles_gen


def test_forcefield_assignment():
smi = "CCC(C){[>][<]CC([>])c1ccccc1[<]}|schulz_zimm(2000, 1800)|{[>][<]CC([>])C(=O)OC[<]}|schulz_zimm(1000, 900)|[H]"
mol = bigsmiles_gen.Molecule(smi)
mol_gen = mol.generate()
ffparam, mol = mol_gen.forcefield_types
assert len(ffparam) == mol.GetNumAtoms()
Loading