Skip to content

Commit

Permalink
feat: Add a feature of merge isomers (#1563)
Browse files Browse the repository at this point in the history
* add a feature of merge isomers

* Update _mergeiso.py

remove code for debug

* format changed codes

* merge same code

Co-authored-by: Jinzhe Zeng <[email protected]>
  • Loading branch information
yohsama and njzjz authored Apr 23, 2022
1 parent 74ec3f1 commit 324d027
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 9 deletions.
23 changes: 16 additions & 7 deletions reacnetgenerator/_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,15 @@ def _readstepfunc(self, item):
pass

def _connectmolecule(self, bond, level):
# return list([b''.join((listtobytes(mol),
# listtobytes(bondlist))) for mol, bondlist in zip(*dps(bond, level))])
# int() because sometimes, the elements in bondlist are type numpy.int64 sometimes are int
# which cause the different bytes results from same bond-network.
return list([b''.join((listtobytes(mol),
listtobytes(bondlist))) for mol, bondlist in zip(*dps(bond, level))])
listtobytes([(int(i[0]), int(i[1]))
for i in bondlist]).replace(b'\n', b'\t'),
listtobytes([int(i[2]) for i in bondlist])
)) for mol, bondlist in zip(*dps(bond, level))])

def _writemoleculetempfile(self, d):
with WriteBuffer(tempfile.NamedTemporaryFile('wb', delete=False)) as f:
Expand Down Expand Up @@ -328,7 +335,7 @@ def _readstepfunc(self, item):
s = line.split()
ss.append(list(map(float, s)))
ss = np.array(ss)
if ss.shape[1]>2:
if ss.shape[1] > 2:
xy = ss[0][2]
xz = ss[1][2]
yz = ss[2][2]
Expand All @@ -339,10 +346,10 @@ def _readstepfunc(self, item):
ylo = ss[1][0] - min(0., yz)
yhi = ss[1][1] - max(0., yz)
zlo = ss[2][0]
zhi = ss[2][1]
zhi = ss[2][1]
boxsize = np.array([[xhi-xlo, 0., 0.],
[xy, yhi-ylo, 0.],
[xz, yz, zhi-zlo]])
[xy, yhi-ylo, 0.],
[xz, yz, zhi-zlo]])
_, step_atoms = zip(*sorted(step_atoms, key=operator.itemgetter(0)))
step_atoms = Atoms(step_atoms)
bond, level = self._getbondfromcrd(step_atoms, boxsize)
Expand All @@ -355,7 +362,8 @@ class _Detectxyz(_DetectCrd):
"""xyz file. Two frames are connected. Cell information must be inputed by user."""

def _readNfunc(self, f):
atomname_dict = dict(zip(self.atomname.tolist(), range(self.atomname.size)))
atomname_dict = dict(
zip(self.atomname.tolist(), range(self.atomname.size)))
for index, line in enumerate(f):
s = line.split()
if index == 0:
Expand All @@ -380,7 +388,8 @@ def _readstepfunc(self, item):
for index, line in enumerate(lines):
s = line.split()
if index > 1:
step_atoms.append((index-1, Atom(s[0] ,tuple((float(x) for x in s[1:4])))))
step_atoms.append(
(index-1, Atom(s[0], tuple((float(x) for x in s[1:4])))))
_, step_atoms = zip(*sorted(step_atoms, key=operator.itemgetter(0)))
step_atoms = Atoms(step_atoms)
bond, level = self._getbondfromcrd(step_atoms, boxsize)
Expand Down
45 changes: 45 additions & 0 deletions reacnetgenerator/_mergeiso.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from tqdm import tqdm
import numpy as np

from .utils import bytestolist, listtobytes, SharedRNGData


class _mergeISO(SharedRNGData):
def __init__(self, rng):
SharedRNGData.__init__(
self, rng, ['miso', 'moleculetempfilename'], ['temp1it'])

def merge(self):
if self.miso > 0:
self._mergeISO()
self.returnkeys()

def _mergeISO(self):
with open(self.moleculetempfilename, 'rb') as ft:
items = ft.readlines()
items = [[items[i], items[i+1], items[i+2]]
for i in range(0, len(items), 3)]
new_items = []
_oldbitem = b''
_oldbbond = b'0'
_oldindex = []
_oldfreq = 0
for _bitem, _bbond, _bindex in tqdm(sorted(items, key=lambda x: (x[0], x[1].split()[0])), desc='Merge isomers:'):
_index = bytestolist(_bindex)
_freq = len(_index)
if (_bitem == _oldbitem) and ((_bbond.split()[0] == _oldbbond.split()[0]) or (self.miso > 1)):
_oldindex = np.hstack((_oldindex, _index))
else:
if _oldbitem:
new_items.append(
[_oldbitem, _oldbbond, listtobytes(_oldindex)])
_oldbitem = _bitem
_oldindex = _index
if _freq > _oldfreq:
_oldbbond = _bbond
new_items.append([_oldbitem, _oldbbond, listtobytes(_oldindex)])
new_items.sort(key=lambda x: len(x[0]))
self.temp1it = len(new_items)
with open(self.moleculetempfilename, 'wb') as ft:
for item in new_items:
[ft.write(i) for i in item]
7 changes: 6 additions & 1 deletion reacnetgenerator/_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import itertools
from abc import ABCMeta, abstractmethod
from collections import Counter, defaultdict
from re import S

import networkx as nx
import networkx.algorithms.isomorphism as iso
Expand Down Expand Up @@ -130,7 +131,11 @@ def convertSMILES(self, atoms, bonds):

def _getatomsandbonds(self, line):
atoms = np.array(bytestolist(line[0]), dtype=int)
bonds = bytestolist(line[1])
# bonds = bytestolist(line[1])
s = line[1].split()
pairs = bytestolist(s[0])
levels = bytestolist(s[1])
bonds = [[*pair, level] for pair, level in zip(pairs, levels)]
return atoms, bonds


Expand Down
3 changes: 3 additions & 0 deletions reacnetgenerator/commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def _commandline():
parser.add_argument(
'--nohmm', help='Process trajectory without Hidden Markov Model (HMM)',
action="store_true")
parser.add_argument(
'--miso', help='Merge the isomers',type=int, default=0)
parser.add_argument(
'--dump', help='Process the LAMMPS dump file', action="store_true")
parser.add_argument(
Expand Down Expand Up @@ -44,6 +46,7 @@ def _commandline():
import numpy as np
ReacNetGenerator(
inputfilename=args.inputfilename, atomname=args.atomname,
miso=args.miso,
runHMM=not args.nohmm,
inputfiletype=('lammpsdumpfile' if args.dump else args.type),
nproc=args.nproc, selectatoms=args.selectatoms,
Expand Down
3 changes: 2 additions & 1 deletion reacnetgenerator/dps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def dps(bonds, levels):
for b, l in zip(bonds[s], levels[s]):
b_c = b
if visited[b_c]==0:
bond.append((s, b, l) if i < b else (b, s, l))
# bond.append((s, b, l) if i < b else (b, s, l))
bond.append((s, b, l) if s < b else (b, s, l))
st.push(b_c)
visited[s]=1
mol.sort()
Expand Down
13 changes: 13 additions & 0 deletions reacnetgenerator/reacnetgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from . import __version__, __date__, __update__
from ._detect import _Detect
from ._draw import _DrawNetwork
from ._mergeiso import _mergeISO
from ._hmmfilter import _HMMFilter
from ._matrix import _GenerateMatrix
from ._path import _CollectPaths
Expand Down Expand Up @@ -83,6 +84,11 @@ class ReacNetGenerator:
runHMM: bool, optional, default: True
Process trajectory with Hidden Markov Model (HMM) or not. If the user find too many species
are filtered, they can turn off this option.
miso: int, optional, default: 0
Merge the isomers and the highest frequency is used as the representative. 0, off
two available levels:
1, merge the isomers with same atoms and same bond-network but different bond levels;
2, merge the isomers with same atoms with different bond-network.
pbc: bool, optional, default: True
Use periodic boundary conditions (PBC) or not.
cell: (3,3) array_like or (3,) array_like or (9,) array_like, optional, default: None
Expand Down Expand Up @@ -125,6 +131,7 @@ def __init__(self, **kwargs):
"nproc": cpu_count(), "pos": {}, "pbc": True, "split": 1, "n_searchspecies": 2,
"node_size": 200, "font_size": 6, "widthcoefficient": 1, "maxspecies": 20, "stepinterval": 1,
"nolabel": False, "printfiltersignal": False, "showid": True, "runHMM": True, "SMILES": True,
"miso": 0,
"getoriginfile": False, "needprintspecies": True, "urls": [],
"matrix_size":100,
}
Expand Down Expand Up @@ -179,6 +186,7 @@ def runanddraw(self, run=True, draw=True, report=True):
processthing.append(self.Status.DOWNLOAD)
processthing.extend((
self.Status.DETECT,
self.Status.MISO,
self.Status.HMM,
self.Status.PATH,
self.Status.MATRIX,
Expand All @@ -201,6 +209,7 @@ def run(self):
processthing.append(self.Status.DOWNLOAD)
processthing.extend((
self.Status.DETECT,
self.Status.MISO,
self.Status.HMM,
self.Status.PATH,
self.Status.MATRIX,
Expand Down Expand Up @@ -233,6 +242,7 @@ class Status(Enum):
- DOWNLOAD: Download trajectory from urls
- DETECT: Read bond information and detect molecules
- HMM: HMM filter
- MISO: Merge isomers
- PATH: Indentify isomers and collect reaction paths
- MATRIX: Reaction matrix generation
- NETWORK: Draw reaction network
Expand All @@ -241,6 +251,7 @@ class Status(Enum):

INIT = "Init"
DETECT = "Read bond information and detect molecules"
MISO = "Merge isomers"
HMM = "HMM filter"
PATH = "Indentify isomers and collect reaction paths"
MATRIX = "Reaction matrix generation"
Expand All @@ -264,6 +275,8 @@ def _process(self, steps):
for i, runstep in enumerate(steps, 1):
if runstep == self.Status.DETECT:
_Detect.gettype(self).detect()
elif runstep == self.Status.MISO:
_mergeISO(self).merge()
elif runstep == self.Status.HMM:
_HMMFilter(self).filter()
elif runstep == self.Status.PATH:
Expand Down

0 comments on commit 324d027

Please sign in to comment.