diff --git a/reacnetgenerator/_detect.py b/reacnetgenerator/_detect.py index 88dc50935..db89ef741 100644 --- a/reacnetgenerator/_detect.py +++ b/reacnetgenerator/_detect.py @@ -140,8 +140,15 @@ def _readstepfunc(self, item): pass def _connectmolecule(self, bond, level): + # return list([b''.join((listtobytes(mol), + # listtobytes(bondlist))) for mol, bondlist in zip(*dps(bond, level))]) + # int() because sometimes, the elements in bondlist are type numpy.int64 sometimes are int + # which cause the different bytes results from same bond-network. return list([b''.join((listtobytes(mol), - listtobytes(bondlist))) for mol, bondlist in zip(*dps(bond, level))]) + listtobytes([(int(i[0]), int(i[1])) + for i in bondlist]).replace(b'\n', b'\t'), + listtobytes([int(i[2]) for i in bondlist]) + )) for mol, bondlist in zip(*dps(bond, level))]) def _writemoleculetempfile(self, d): with WriteBuffer(tempfile.NamedTemporaryFile('wb', delete=False)) as f: @@ -328,7 +335,7 @@ def _readstepfunc(self, item): s = line.split() ss.append(list(map(float, s))) ss = np.array(ss) - if ss.shape[1]>2: + if ss.shape[1] > 2: xy = ss[0][2] xz = ss[1][2] yz = ss[2][2] @@ -339,10 +346,10 @@ def _readstepfunc(self, item): ylo = ss[1][0] - min(0., yz) yhi = ss[1][1] - max(0., yz) zlo = ss[2][0] - zhi = ss[2][1] + zhi = ss[2][1] boxsize = np.array([[xhi-xlo, 0., 0.], - [xy, yhi-ylo, 0.], - [xz, yz, zhi-zlo]]) + [xy, yhi-ylo, 0.], + [xz, yz, zhi-zlo]]) _, step_atoms = zip(*sorted(step_atoms, key=operator.itemgetter(0))) step_atoms = Atoms(step_atoms) bond, level = self._getbondfromcrd(step_atoms, boxsize) @@ -355,7 +362,8 @@ class _Detectxyz(_DetectCrd): """xyz file. Two frames are connected. Cell information must be inputed by user.""" def _readNfunc(self, f): - atomname_dict = dict(zip(self.atomname.tolist(), range(self.atomname.size))) + atomname_dict = dict( + zip(self.atomname.tolist(), range(self.atomname.size))) for index, line in enumerate(f): s = line.split() if index == 0: @@ -380,7 +388,8 @@ def _readstepfunc(self, item): for index, line in enumerate(lines): s = line.split() if index > 1: - step_atoms.append((index-1, Atom(s[0] ,tuple((float(x) for x in s[1:4]))))) + step_atoms.append( + (index-1, Atom(s[0], tuple((float(x) for x in s[1:4]))))) _, step_atoms = zip(*sorted(step_atoms, key=operator.itemgetter(0))) step_atoms = Atoms(step_atoms) bond, level = self._getbondfromcrd(step_atoms, boxsize) diff --git a/reacnetgenerator/_mergeiso.py b/reacnetgenerator/_mergeiso.py new file mode 100644 index 000000000..8eca29172 --- /dev/null +++ b/reacnetgenerator/_mergeiso.py @@ -0,0 +1,45 @@ +from tqdm import tqdm +import numpy as np + +from .utils import bytestolist, listtobytes, SharedRNGData + + +class _mergeISO(SharedRNGData): + def __init__(self, rng): + SharedRNGData.__init__( + self, rng, ['miso', 'moleculetempfilename'], ['temp1it']) + + def merge(self): + if self.miso > 0: + self._mergeISO() + self.returnkeys() + + def _mergeISO(self): + with open(self.moleculetempfilename, 'rb') as ft: + items = ft.readlines() + items = [[items[i], items[i+1], items[i+2]] + for i in range(0, len(items), 3)] + new_items = [] + _oldbitem = b'' + _oldbbond = b'0' + _oldindex = [] + _oldfreq = 0 + for _bitem, _bbond, _bindex in tqdm(sorted(items, key=lambda x: (x[0], x[1].split()[0])), desc='Merge isomers:'): + _index = bytestolist(_bindex) + _freq = len(_index) + if (_bitem == _oldbitem) and ((_bbond.split()[0] == _oldbbond.split()[0]) or (self.miso > 1)): + _oldindex = np.hstack((_oldindex, _index)) + else: + if _oldbitem: + new_items.append( + [_oldbitem, _oldbbond, listtobytes(_oldindex)]) + _oldbitem = _bitem + _oldindex = _index + if _freq > _oldfreq: + _oldbbond = _bbond + new_items.append([_oldbitem, _oldbbond, listtobytes(_oldindex)]) + new_items.sort(key=lambda x: len(x[0])) + self.temp1it = len(new_items) + with open(self.moleculetempfilename, 'wb') as ft: + for item in new_items: + [ft.write(i) for i in item] diff --git a/reacnetgenerator/_path.py b/reacnetgenerator/_path.py index b57a00857..a3afa97bb 100644 --- a/reacnetgenerator/_path.py +++ b/reacnetgenerator/_path.py @@ -19,6 +19,7 @@ import itertools from abc import ABCMeta, abstractmethod from collections import Counter, defaultdict +from re import S import networkx as nx import networkx.algorithms.isomorphism as iso @@ -130,7 +131,11 @@ def convertSMILES(self, atoms, bonds): def _getatomsandbonds(self, line): atoms = np.array(bytestolist(line[0]), dtype=int) - bonds = bytestolist(line[1]) + # bonds = bytestolist(line[1]) + s = line[1].split() + pairs = bytestolist(s[0]) + levels = bytestolist(s[1]) + bonds = [[*pair, level] for pair, level in zip(pairs, levels)] return atoms, bonds diff --git a/reacnetgenerator/commandline.py b/reacnetgenerator/commandline.py index 35c1fa366..a310a3301 100644 --- a/reacnetgenerator/commandline.py +++ b/reacnetgenerator/commandline.py @@ -15,6 +15,8 @@ def _commandline(): parser.add_argument( '--nohmm', help='Process trajectory without Hidden Markov Model (HMM)', action="store_true") + parser.add_argument( + '--miso', help='Merge the isomers',type=int, default=0) parser.add_argument( '--dump', help='Process the LAMMPS dump file', action="store_true") parser.add_argument( @@ -44,6 +46,7 @@ def _commandline(): import numpy as np ReacNetGenerator( inputfilename=args.inputfilename, atomname=args.atomname, + miso=args.miso, runHMM=not args.nohmm, inputfiletype=('lammpsdumpfile' if args.dump else args.type), nproc=args.nproc, selectatoms=args.selectatoms, diff --git a/reacnetgenerator/dps.pyx b/reacnetgenerator/dps.pyx index f4541416a..c4a0d395f 100644 --- a/reacnetgenerator/dps.pyx +++ b/reacnetgenerator/dps.pyx @@ -37,7 +37,8 @@ def dps(bonds, levels): for b, l in zip(bonds[s], levels[s]): b_c = b if visited[b_c]==0: - bond.append((s, b, l) if i < b else (b, s, l)) + # bond.append((s, b, l) if i < b else (b, s, l)) + bond.append((s, b, l) if s < b else (b, s, l)) st.push(b_c) visited[s]=1 mol.sort() diff --git a/reacnetgenerator/reacnetgen.py b/reacnetgenerator/reacnetgen.py index e6a9da126..4be61da70 100644 --- a/reacnetgenerator/reacnetgen.py +++ b/reacnetgenerator/reacnetgen.py @@ -53,6 +53,7 @@ from . import __version__, __date__, __update__ from ._detect import _Detect from ._draw import _DrawNetwork +from ._mergeiso import _mergeISO from ._hmmfilter import _HMMFilter from ._matrix import _GenerateMatrix from ._path import _CollectPaths @@ -83,6 +84,11 @@ class ReacNetGenerator: runHMM: bool, optional, default: True Process trajectory with Hidden Markov Model (HMM) or not. If the user find too many species are filtered, they can turn off this option. + miso: int, optional, default: 0 + Merge the isomers and the highest frequency is used as the representative. 0, off + two available levels: + 1, merge the isomers with same atoms and same bond-network but different bond levels; + 2, merge the isomers with same atoms with different bond-network. pbc: bool, optional, default: True Use periodic boundary conditions (PBC) or not. cell: (3,3) array_like or (3,) array_like or (9,) array_like, optional, default: None @@ -125,6 +131,7 @@ def __init__(self, **kwargs): "nproc": cpu_count(), "pos": {}, "pbc": True, "split": 1, "n_searchspecies": 2, "node_size": 200, "font_size": 6, "widthcoefficient": 1, "maxspecies": 20, "stepinterval": 1, "nolabel": False, "printfiltersignal": False, "showid": True, "runHMM": True, "SMILES": True, + "miso": 0, "getoriginfile": False, "needprintspecies": True, "urls": [], "matrix_size":100, } @@ -179,6 +186,7 @@ def runanddraw(self, run=True, draw=True, report=True): processthing.append(self.Status.DOWNLOAD) processthing.extend(( self.Status.DETECT, + self.Status.MISO, self.Status.HMM, self.Status.PATH, self.Status.MATRIX, @@ -201,6 +209,7 @@ def run(self): processthing.append(self.Status.DOWNLOAD) processthing.extend(( self.Status.DETECT, + self.Status.MISO, self.Status.HMM, self.Status.PATH, self.Status.MATRIX, @@ -233,6 +242,7 @@ class Status(Enum): - DOWNLOAD: Download trajectory from urls - DETECT: Read bond information and detect molecules - HMM: HMM filter + - MISO: Merge isomers - PATH: Indentify isomers and collect reaction paths - MATRIX: Reaction matrix generation - NETWORK: Draw reaction network @@ -241,6 +251,7 @@ class Status(Enum): INIT = "Init" DETECT = "Read bond information and detect molecules" + MISO = "Merge isomers" HMM = "HMM filter" PATH = "Indentify isomers and collect reaction paths" MATRIX = "Reaction matrix generation" @@ -264,6 +275,8 @@ def _process(self, steps): for i, runstep in enumerate(steps, 1): if runstep == self.Status.DETECT: _Detect.gettype(self).detect() + elif runstep == self.Status.MISO: + _mergeISO(self).merge() elif runstep == self.Status.HMM: _HMMFilter(self).filter() elif runstep == self.Status.PATH: