diffpy
diff --git a/Diff for: ‎CHANGELOG.md
+1-5 b/Diff for: ‎CHANGELOG.md
+1-5
diff --git a/Diff for: ‎conda-recipe/run_test.py
+4 b/Diff for: ‎conda-recipe/run_test.py
+4
diff --git a/Diff for: ‎src/diffpy/utils/parsers/__init__.py
+2 b/Diff for: ‎src/diffpy/utils/parsers/__init__.py
+2
diff --git a/Diff for: ‎src/diffpy/utils/parsers/custom_exceptions.py
+45 b/Diff for: ‎src/diffpy/utils/parsers/custom_exceptions.py
+45
diff --git a/Diff for: ‎src/diffpy/utils/parsers/loaddata.py
+6-6 b/Diff for: ‎src/diffpy/utils/parsers/loaddata.py
+6-6
diff --git a/Diff for: ‎src/diffpy/utils/parsers/serialization.py
+151 b/Diff for: ‎src/diffpy/utils/parsers/serialization.py
+151
diff --git a/Diff for: ‎src/diffpy/utils/tests/test_serialization.py
+91 b/Diff for: ‎src/diffpy/utils/tests/test_serialization.py
+91
diff --git a/Diff for: ‎src/diffpy/utils/tests/testdata/dbload/e1.gr
+9 b/Diff for: ‎src/diffpy/utils/tests/testdata/dbload/e1.gr
+9
diff --git a/Diff for: ‎src/diffpy/utils/tests/testdata/dbload/e2.gr
+9 b/Diff for: ‎src/diffpy/utils/tests/testdata/dbload/e2.gr
+9
@@ -1,17 +1,13 @@
 # Release notes
 
-## Version 3.2.0 – 2023-8-**
+## Version 3.2.0 – 2023-08-**
 
 ### Added
 
 - CI Coverage.
 - New tests for loadData function.
-
-### Changed
-
 - loadData function now toggleable. Can return either (a) data read from data blocks or (b) header
 information stored above the data block.
-- Exclude wx from tests.
 
 ### Removed
 
 
@@ -1,4 +1,8 @@
 #!/usr/bin/env python
 
+import sys
+import pathlib
+sys.path.append((pathlib.Path.cwd().parent.absolute() / "src").as_posix())
+
 import diffpy.utils.tests
 assert diffpy.utils.tests.test().wasSuccessful()
@@ -17,9 +17,11 @@
 """
 
 from .loaddata import loadData
+from .serialization import serialize_data, deserialize_data
 from .resample import resample
 
 # silence the pyflakes syntax checker
 assert loadData or resample or True
+assert serialize_data or deserialize_data or True
 
 # End of file
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+##############################################################################
+#
+# diffpy.utils      by DANSE Diffraction group
+#                   Simon J. L. Billinge
+#                   (c) 2010 The Trustees of Columbia University
+#                   in the City of New York.  All rights reserved.
+#
+# File coded by:
+#
+# See AUTHORS.txt for a list of people who contributed.
+# See LICENSE_DANSE.txt for license information.
+#
+##############################################################################
+
+class UnsupportedTypeError(Exception):
+    """For file types not supported by our parsers.
+
+    supported_types     -- List of supported types.
+    file                -- file triggering the error.
+    message             -- for writing a custom message.
+    """
+
+    def __init__(self, file, supported_types=None, message=None):
+        if message is None:
+            self.message = f"The file {file} is not supported."
+            if supported_types is not None:
+                self.message += " Supported file types include: "
+            for t in supported_types:
+                self.message += t + ", "
+            self.message = self.message[:-2] + "."
+        super().__init__(self.message)
+
+
+class ImproperSizeError(Exception):
+    """When the size of an object does not match expectations.
+
+    bad_object          -- Object with improper size.
+    message             -- for writing a custom message.
+    """
+
+    def __init__(self, bad_object, message=None):
+        if message is None:
+            self.message = f"The size of {bad_object} is different than expected."
+        super().__init__(self.message)
@@ -21,7 +21,7 @@ def loadData(filename, minrows=10, headers=False, hdel='=', hignore=None, **kwar
 
     The data block is identified as the first matrix block of at least minrows rows
     and constant number of columns. This seems to work for most of the datafiles including
-    those generated by PDFGetX2.
+    those generated by diffpy programs.
 
     filename    -- name of the file we want to load data from.
     minrows     -- minimum number of rows in the first data block.
@@ -51,8 +51,8 @@ def loadData(filename, minrows=10, headers=False, hdel='=', hignore=None, **kwar
                    Note transposing the loaded array as loadData(FILENAME).T
                    has the same effect.
 
-    Return a numpy array of the data. If headers enabled, instead returns a
-    dictionary of parameters read from the header.
+    Return a numpy array of the data (data_block). If headers enabled, instead returns a
+    dictionary of parameters read from the header (hddata).
     """
     from numpy import array, loadtxt
     # for storing header data
@@ -145,14 +145,14 @@ def countcolumnsvalues(line):
         # Return an empty array when no data found.
         # loadtxt would otherwise raise an exception on loading from EOF.
         if start is None:
-            rv = array([], dtype=float)
+            data_block = array([], dtype=float)
         else:
             fid.seek(start)
             # always use usecols argument so that loadtxt does not crash
             # in case of trailing delimiters.
             kwargs.setdefault('usecols', list(range(ncvblock[0])))
-            rv = loadtxt(fid, **kwargs)
-    return rv
+            data_block = loadtxt(fid, **kwargs)
+    return data_block
 
 
 class TextDataLoader(object):
 
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+##############################################################################
+#
+# diffpy.utils      by DANSE Diffraction group
+#                   Simon J. L. Billinge
+#                   (c) 2010 The Trustees of Columbia University
+#                   in the City of New York.  All rights reserved.
+#
+# File coded by:
+#
+# See AUTHORS.txt for a list of people who contributed.
+# See LICENSE_DANSE.txt for license information.
+#
+##############################################################################
+
+import pathlib
+import json
+
+from .custom_exceptions import UnsupportedTypeError, ImproperSizeError
+
+# FIXME: add support for yaml, xml
+supported_formats = ['.json']
+
+
+def serialize_data(filename, hdata: dict, data_table: list, show_path=True, dt_colnames=None, serial_file=None):
+    """Serialize file data into a dictionary. Can also save dictionary into a serial language file.
+    Dictionary is formatted as {filename: data}.
+
+    Requires hdata and data_table generated from loadData.
+
+    filename        -- name of the file whose data is being serialized.
+    hdata          -- Dictionary of PDF metadata generated by loadData.
+    data_table      -- List storing  parsed by loadData.
+    dt_colnames     -- List containing names of each column in data_table. Every name in
+                       data_table_cols will be put into the Dictionary as a key with a value
+                       of that column in data_table (stored as a List). Put None for
+                       columns without names. If dt_cols has less non-None entries
+                       than columns in data_table, the pair {'data table': data_table} will be put
+                       in the dictionary. (Default None: only entry {'data table': data_table}
+                       will be added to dictionary.)
+    show_path       -- include a path element in the database entry (default True).
+                       If 'path' is not included in hddata, extract path from filename.
+    serial_file     -- serial language file to dump dictionary into.
+
+    Returns the dictionary loaded from/into the updated database file.
+    """
+
+    # compile data_table and hddata together
+    data = {}
+
+    # handle getting name of file for variety of filename types
+    abs_path = pathlib.Path(filename).resolve()
+    # add path to start of data if requested
+    if show_path and 'path' not in hdata.keys():
+        data.update({'path': abs_path.as_posix()})
+    # title the entry with name of file (taken from end of path)
+    title = abs_path.name
+
+    # first add named columns in dt_cols
+    named_columns = 0  # initial value
+    max_columns = 1  # higher than named_columns to trigger 'data table' entry
+    if dt_colnames is not None:
+        num_columns = [len(row) for row in data_table]
+        max_columns = max(num_columns)
+        num_col_names = len(dt_colnames)
+        if max_columns < num_col_names:  # assume numpy.loadtxt gives non-irregular array
+            raise ImproperSizeError("More entries in dt_colnames than columns in data_table.")
+        named_columns = 0
+        for idx in range(num_col_names):
+            colname = dt_colnames[idx]
+            if colname is not None:
+                data.update({colname: list(data_table[:, idx])})
+                named_columns += 1
+
+    # second add data in hddata dict
+    data.update(hdata)
+
+    # finally add data_table as an entry named 'data table' if not all columns were parsed
+    if named_columns < max_columns:
+        if 'data table' not in data.keys():
+            data.update({'data table': data_table})
+        else:  # if 'data table' is already a key, keep adding primes to the end
+            dt_name = 'data table'
+            while dt_name in data.keys():
+                dt_name += " prime"
+            data.update({dt_name: data_table})
+
+    # parse name using pathlib and generate dictionary entry
+    entry = {title: data}
+
+    # no save
+    if serial_file is None:
+        return entry
+
+    # saving/updating file
+    # check if supported type
+    sf = pathlib.Path(serial_file)
+    sf_name = sf.name
+    extension = sf.suffix
+    if extension not in supported_formats:
+        raise UnsupportedTypeError(sf_name, supported_formats)
+
+    # new file or update
+    existing = False
+    try:
+        open(serial_file)
+        existing = True
+    except FileNotFoundError:
+        pass
+
+    # json
+    if extension == '.json':
+        # dump if non-existing
+        if not existing:
+            with open(serial_file, 'w') as jsonfile:
+                file_data = entry  # for return
+                json.dump(file_data, jsonfile, indent=2)
+
+        # update if existing
+        else:
+            with open(serial_file, 'r') as json_read:
+                file_data = json.load(json_read)
+                file_data.update(entry)
+            with open(serial_file, 'w') as json_write:
+                # dump to string first for formatting
+                json.dump(file_data, json_write, indent=2)
+
+    return file_data
+
+
+def deserialize_data(filename):
+    """Load a dictionary from a serial file.
+
+    filename    -- database file to load from.
+
+    Returns a dictionary of database information.
+    """
+
+    # check if supported type
+    f = pathlib.Path(filename)
+    f_name = f.name
+    extension = f.suffix
+    if extension not in supported_formats:
+        raise UnsupportedTypeError(f_name, supported_formats)
+
+    # json
+    if extension == '.json':
+        with open(filename, 'r') as json_file:
+            j_dict = json.load(json_file)
+
+    return j_dict
@@ -0,0 +1,91 @@
+from diffpy.utils.parsers import serialize_data, deserialize_data
+from diffpy.utils.parsers import loadData
+from diffpy.utils.tests.testhelpers import datafile
+
+from diffpy.utils.parsers.custom_exceptions import UnsupportedTypeError, ImproperSizeError
+
+import os
+import pytest
+import numpy
+
+tests_dir = os.path.dirname(os.path.abspath(locals().get('__file__', 'file.py')))
+
+targetjson = datafile('targetjson.json')
+schemaname = datafile('strumining.json')
+wrongtype = datafile('wrong.type')
+loadfile = datafile('loadfile.txt')
+nodt = datafile('loaddatawithheaders.txt')
+
+
+def test_load_multiple(tmp_path):
+    # generate json and apply schema
+    generatedjson = tmp_path / "generated_serialization.json"
+    tlm_list = os.listdir(os.path.join(tests_dir, "testdata", "dbload"))
+    tlm_list.sort()
+    generated_data = None
+    for hfname in tlm_list:
+        # gather data using loadData
+        headerfile = os.path.normpath(os.path.join(tests_dir, "testdata", "dbload", hfname))
+        hdata = loadData(headerfile, headers=True)
+        data_table = loadData(headerfile)
+
+        # check path extraction
+        generated_data = serialize_data(headerfile, hdata, data_table, dt_colnames=['r', 'gr'], show_path=True)
+        assert headerfile == os.path.normpath(generated_data[hfname].pop('path'))
+
+        # rerun without path information and save to file
+        generated_data = serialize_data(headerfile, hdata, data_table, dt_colnames=['r', 'gr'],
+                                        show_path=False, serial_file=generatedjson)
+
+    # compare to target
+    target_data = deserialize_data(targetjson)
+    assert target_data == generated_data
+    # ensure file saved properly
+    assert target_data == deserialize_data(generatedjson)
+
+
+def test_exceptions():
+    hdata = loadData(loadfile, headers=True)
+    data_table = loadData(loadfile)
+
+    # improper file types
+    with pytest.raises(UnsupportedTypeError):
+        serialize_data(loadfile, hdata, data_table, serial_file=wrongtype)
+    with pytest.raises(UnsupportedTypeError):
+        deserialize_data(wrongtype)
+
+    # various dt_colnames inputs
+    with pytest.raises(ImproperSizeError):
+        serialize_data(loadfile, hdata, data_table, dt_colnames=["one", "two", "three is too many"])
+    # check proper output
+    normal = serialize_data(loadfile, hdata, data_table, dt_colnames=['r', 'gr'])
+    data_name = list(normal.keys())[0]
+    r_list = normal[data_name]['r']
+    gr_list = normal[data_name]['gr']
+    # three equivalent ways to denote no column names
+    missing_parameter = serialize_data(loadfile, hdata, data_table, show_path=False)
+    empty_parameter = serialize_data(loadfile, hdata, data_table, show_path=False, dt_colnames=[])
+    none_entry_parameter = serialize_data(loadfile, hdata, data_table, show_path=False, dt_colnames=[None, None])
+    # check equivalence
+    assert missing_parameter == empty_parameter
+    assert missing_parameter == none_entry_parameter
+    print(data_table)
+    print(missing_parameter[data_name]['data table prime'])
+    assert numpy.allclose(missing_parameter[data_name]['data table prime'], data_table)
+    # extract a single column
+    r_extract = serialize_data(loadfile, hdata, data_table, show_path=False, dt_colnames=['r'])
+    gr_extract = serialize_data(loadfile, hdata, data_table, show_path=False, dt_colnames=[None, 'gr'])
+    incorrect_r_extract = serialize_data(loadfile, hdata, data_table, show_path=False, dt_colnames=[None, 'r'])
+    # check proper columns extracted
+    assert numpy.allclose(gr_extract[data_name]['gr'], incorrect_r_extract[data_name]['r'])
+    assert 'r' not in gr_extract[data_name]
+    assert 'gr' not in r_extract[data_name] and 'gr' not in incorrect_r_extract[data_name]
+    # check correct values extracted
+    assert numpy.allclose(r_extract[data_name]['r'], r_list)
+    assert numpy.allclose(gr_extract[data_name]['gr'], gr_list)
+    # no datatable
+    nodt_hdata = loadData(nodt, headers=True)
+    nodt_dt = loadData(nodt)
+    no_dt = serialize_data(nodt, nodt_hdata, nodt_dt, show_path=False)
+    nodt_data_name = list(no_dt.keys())[0]
+    assert numpy.allclose(no_dt[nodt_data_name]['data table'], nodt_dt)
@@ -0,0 +1,9 @@
+rmin = 0
+rmax = 10
+qmin = 0
+qmax = 10
+
+0 0
+1 0
+2 0
+3 0
@@ -0,0 +1,9 @@
+rmin = 1
+rmax = 11
+qmin = 1
+qmax = 11
+
+0 1
+1 2
+2 3
+3 4