forked from diffpy/diffpy.utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserialization.py
209 lines (169 loc) · 6.41 KB
/
serialization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env python
##############################################################################
#
# diffpy.utils by DANSE Diffraction group
# Simon J. L. Billinge
# (c) 2010 The Trustees of Columbia University
# in the City of New York. All rights reserved.
#
# File coded by:
#
# See AUTHORS.txt for a list of people who contributed.
# See LICENSE_DANSE.txt for license information.
#
##############################################################################
import json
import warnings
from pathlib import Path
import numpy
from .custom_exceptions import ImproperSizeError, UnsupportedTypeError
# FIXME: add support for yaml, xml
supported_formats = [".json"]
def serialize_data(
filepath,
hdata: dict,
data_table,
dt_col_names=None,
show_path=True,
serial_file=None,
):
"""Serialize file data into a dictionary. Can also save dictionary into a serial language file. Dictionary is
formatted as {filename: data}.
Requires hdata and data_table (can be generated by loadData).
Parameters
----------
filepath
The file path whose data is being serialized.
hdata: dict
The file metadata (generally related to data table).
data_table: list or ndarray
The data table.
dt_col_names: list
Names of each column in data_table. Every name in data_table_cols will be put into the Dictionary
as a key with a value of that column in data_table (stored as a List). Put None for columns
without names. If dt_cols has less non-None entries than columns in data_table,
the pair {'data table': data_table} will be put in the dictionary.
(Default None: only entry {'data table': data_table} will be added to dictionary.)
show_path: bool
include a path element in the database entry (default True). If 'path' is not included in hddata,
extract path from filename.
serial_file
Serial language file to dump dictionary into. If None (default), no dumping will occur.
Returns
-------
dict:
Returns the dictionary loaded from/into the updated database file.
"""
# Combine data_table and hdata together in data
data = {}
# Handle getting name of file for variety of filename types
abs_path = Path(filepath).resolve()
# Add path to start of data if show_path is True
if show_path and "path" not in hdata.keys():
data.update({"path": abs_path.as_posix()})
# Add hdata to data
data.update(hdata)
# Prioritize overwriting hdata entries with data_table column entries
col_counter = 0
# Get a list of column counts in each entry in data table
dt_col_counts = [len(row) for row in data_table]
dt_max_col_count = max(dt_col_counts)
if dt_col_names is not None:
dt_col_names_count = len(dt_col_names)
if dt_max_col_count < dt_col_names_count: # assume numpy.loadtxt gives non-irregular array
raise ImproperSizeError("More entries in dt_col_names_count than columns in data_table.")
for idx in range(dt_col_names_count):
col_name = dt_col_names[idx]
if col_name is not None:
# Check if column name already exists in hdata
if col_name in hdata.keys():
warnings.warn(
f"Entry '{col_name}' in hdata has been overwritten by a data_table entry.",
RuntimeWarning,
)
# Add row data per column to data
data.update({col_name: list(data_table[:, idx])})
col_counter += 1
# Add data_table as an entry named 'data table' if not all columns were parsed
if col_counter < dt_max_col_count:
if "data table" in data.keys():
warnings.warn(
"Entry 'data table' in hdata has been overwritten by data_table.",
RuntimeWarning,
)
data.update({"data table": data_table})
# Parse name using pathlib and generate dictionary entry
data_key = abs_path.name
entry = {data_key: data}
if serial_file is None:
return entry
# saving/updating file
# check if supported type
sf_path = Path(serial_file)
sf_name = sf_path.name
sf_ext = sf_path.suffix
if sf_ext not in supported_formats:
raise UnsupportedTypeError(sf_name, supported_formats)
# new file or update
existing = False
try:
open(serial_file)
existing = True
except FileNotFoundError:
pass
# json
if sf_ext == ".json":
# cannot serialize numpy arrays
class NumpyEncoder(json.JSONEncoder):
def default(self, data_obj):
if type(data_obj) is numpy.ndarray:
return data_obj.tolist()
return json.JSONEncoder.default(self, data_obj)
# dump if non-existing
if not existing:
with open(serial_file, "w") as jsonfile:
file_data = entry # for return
json.dump(file_data, jsonfile, indent=2, cls=NumpyEncoder)
# update if existing
else:
with open(serial_file, "r") as json_read:
file_data = json.load(json_read)
file_data.update(entry)
with open(serial_file, "w") as json_write:
# dump to string first for formatting
json.dump(file_data, json_write, indent=2, cls=NumpyEncoder)
return file_data
def deserialize_data(filename, filetype=None):
"""Load a dictionary from a serial file.
Parameters
----------
filename
Serial file to load from.
filetype
For specifying extension type (i.e. '.json').
Returns
-------
dict
A dictionary read from a serial file.
"""
# check if supported type
f = Path(filename)
f_name = f.name
if filetype is None:
extension = f.suffix
if extension not in supported_formats:
raise UnsupportedTypeError(f_name, supported_formats)
else:
extension = filetype
return_dict = {}
# json
if extension == ".json":
with open(filename, "r") as json_file:
j_dict = json.load(json_file)
return_dict = j_dict
if len(return_dict) == 0:
warnings.warn(
"Loaded dictionary is empty. Possibly due to improper file type.",
RuntimeWarning,
)
return return_dict