135
135
from ..topology .LAMMPSParser import DATAParser
136
136
from ..exceptions import NoDataError
137
137
from . import base
138
+ import warnings
138
139
139
140
btype_sections = {'bond' :'Bonds' , 'angle' :'Angles' ,
140
141
'dihedral' :'Dihedrals' , 'improper' :'Impropers' }
@@ -458,12 +459,50 @@ class DumpReader(base.ReaderBase):
458
459
"""Reads the default `LAMMPS dump format
459
460
<https://docs.lammps.org/dump.html>`__
460
461
461
- Supports coordinates in various LAMMPS coordinate conventions and both
462
- orthogonal and triclinic simulation box dimensions (for more details see
463
- `documentation <https://docs.lammps.org/Howto_triclinic.html>`__). In
464
- either case, MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*,
465
- *gamma*)`` to represent the unit cell. Lengths *A*, *B*, *C* are in the
466
- MDAnalysis length unit (Å), and angles are in degrees.
462
+ Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs),
463
+ "unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate
464
+ conventions (see https://docs.lammps.org/dump.html for more details).
465
+ If `lammps_coordinate_convention='auto'` (default),
466
+ one will be guessed. Guessing checks whether the coordinates fit each
467
+ convention in the order "unscaled", "scaled", "unwrapped",
468
+ "scaled_unwrapped" and whichever set of coordinates is detected first will
469
+ be used. If coordinates are given in the scaled coordinate convention
470
+ (xs,ys,zs) or scaled unwrapped coordinate convention (xsu,ysu,zsu) they
471
+ will automatically be converted from their scaled/fractional representation
472
+ to their real values.
473
+
474
+ Supports both orthogonal and triclinic simulation box dimensions (for more
475
+ details see https://docs.lammps.org/Howto_triclinic.html). In either case,
476
+ MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*, *gamma*)``
477
+ to represent the unit cell. Lengths *A*, *B*, *C* are in the MDAnalysis
478
+ length unit (Å), and angles are in degrees.
479
+
480
+ By using the keyword `additional_columns`, you can specify arbitrary data
481
+ to be read. The keyword expects a list of the names of the columns or
482
+ `True` to read all additional columns. The results are saved to
483
+ :attr:`Timestep.data`. For example, if your LAMMPS dump looks like this
484
+
485
+ .. code-block::
486
+
487
+ ITEM: ATOMS id x y z q l
488
+ 1 2.84 8.17 -25 0.00258855 1.1
489
+ 2 7.1 8.17 -25 6.91952e-05 1.2
490
+
491
+ Then you may parse the additional columns `q` and `l` via:
492
+
493
+ .. code-block:: python
494
+
495
+ u = mda.Universe('structure.data', 'traj.lammpsdump',
496
+ additional_columns=['q', 'l'])
497
+
498
+ The additional data is then available for each time step via:
499
+
500
+ .. code-block:: python
501
+
502
+ for ts in u.trajectory:
503
+ charges = ts.data['q'] # Access additional data, sorted by the id
504
+ ls = ts.data['l']
505
+ ...
467
506
468
507
Parameters
469
508
----------
@@ -497,6 +536,9 @@ class DumpReader(base.ReaderBase):
497
536
**kwargs
498
537
Other keyword arguments used in :class:`~MDAnalysis.coordinates.base.ReaderBase`
499
538
539
+ .. versionchanged:: 2.7.0
540
+ Reading of arbitrary, additional columns is now supported.
541
+ (Issue #3608)
500
542
.. versionchanged:: 2.4.0
501
543
Now imports velocities and forces, translates the box to the origin,
502
544
and optionally unwraps trajectories with image flags upon loading.
@@ -510,18 +552,23 @@ class DumpReader(base.ReaderBase):
510
552
format = 'LAMMPSDUMP'
511
553
_conventions = ["auto" , "unscaled" , "scaled" , "unwrapped" ,
512
554
"scaled_unwrapped" ]
555
+
513
556
_coordtype_column_names = {
514
557
"unscaled" : ["x" , "y" , "z" ],
515
558
"scaled" : ["xs" , "ys" , "zs" ],
516
559
"unwrapped" : ["xu" , "yu" , "zu" ],
517
560
"scaled_unwrapped" : ["xsu" , "ysu" , "zsu" ]
518
561
}
519
562
563
+ _parsable_columns = ["id" , "vx" , "vy" , "vz" , "fx" , "fy" , "fz" ]
564
+ for key in _coordtype_column_names .keys ():
565
+ _parsable_columns += _coordtype_column_names [key ]
566
+
520
567
@store_init_arguments
521
- def __init__ (self , filename ,
568
+ def __init__ (self , filename ,
522
569
lammps_coordinate_convention = "auto" ,
523
570
unwrap_images = False ,
524
- ** kwargs ):
571
+ additional_columns = None , ** kwargs ):
525
572
super (DumpReader , self ).__init__ (filename , ** kwargs )
526
573
527
574
root , ext = os .path .splitext (self .filename )
@@ -536,6 +583,16 @@ def __init__(self, filename,
536
583
537
584
self ._unwrap = unwrap_images
538
585
586
+ if (util .iterable (additional_columns )
587
+ or additional_columns is None
588
+ or additional_columns is True ):
589
+ self ._additional_columns = additional_columns
590
+ else :
591
+ raise ValueError (f"additional_columns={ additional_columns } "
592
+ "is not a valid option. Please provide an "
593
+ "iterable containing the additional"
594
+ "column headers." )
595
+
539
596
self ._cache = {}
540
597
541
598
self ._reopen ()
@@ -681,6 +738,25 @@ def _read_next_timestep(self):
681
738
coord_cols .extend (image_cols )
682
739
683
740
ids = "id" in attr_to_col_ix
741
+
742
+ # Create the data arrays for additional attributes which will be saved
743
+ # under ts.data
744
+ if self ._additional_columns is True :
745
+ # Parse every column that is not already parsed
746
+ # elsewhere (total \ parsable)
747
+ additional_keys = set (attrs ).difference (self ._parsable_columns )
748
+ elif self ._additional_columns :
749
+ if not all ([key in attrs for key in self ._additional_columns ]):
750
+ warnings .warn ("Some of the additional columns are not present "
751
+ "in the file, they will be ignored" )
752
+ additional_keys = \
753
+ [key for key in self ._additional_columns if key in attrs ]
754
+ else :
755
+ additional_keys = []
756
+ for key in additional_keys :
757
+ ts .data [key ] = np .empty (self .n_atoms )
758
+
759
+ # Parse all the atoms
684
760
for i in range (self .n_atoms ):
685
761
fields = f .readline ().split ()
686
762
if ids :
@@ -701,12 +777,22 @@ def _read_next_timestep(self):
701
777
if self ._has_forces :
702
778
ts .forces [i ] = [fields [dim ] for dim in force_cols ]
703
779
780
+ # Collect additional cols
781
+ for attribute_key in additional_keys :
782
+ ts .data [attribute_key ][i ] = \
783
+ fields [attr_to_col_ix [attribute_key ]]
784
+
704
785
order = np .argsort (indices )
705
786
ts .positions = ts .positions [order ]
706
787
if self ._has_vels :
707
788
ts .velocities = ts .velocities [order ]
708
789
if self ._has_forces :
709
790
ts .forces = ts .forces [order ]
791
+
792
+ # Also need to sort the additional keys
793
+ for attribute_key in additional_keys :
794
+ ts .data [attribute_key ] = ts .data [attribute_key ][order ]
795
+
710
796
if (self .lammps_coordinate_convention .startswith ("scaled" )):
711
797
# if coordinates are given in scaled format, undo that
712
798
ts .positions = distances .transform_StoR (ts .positions ,
0 commit comments