forked from diffpy/diffpy.utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserialization.py
219 lines (186 loc) · 6.69 KB
/
serialization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/usr/bin/env python
##############################################################################
#
# diffpy.utils by DANSE Diffraction group
# Simon J. L. Billinge
# (c) 2010 The Trustees of Columbia University
# in the City of New York. All rights reserved.
#
# File coded by:
#
# See AUTHORS.txt for a list of people who contributed.
# See LICENSE_DANSE.txt for license information.
#
##############################################################################
import json
import pathlib
import warnings
import numpy
from .custom_exceptions import ImproperSizeError, UnsupportedTypeError
# FIXME: add support for yaml, xml
supported_formats = [".json"]
def serialize_data(
filename,
hdata: dict,
data_table,
dt_colnames=None,
show_path=True,
serial_file=None,
):
"""Serialize file data into a dictionary. Can also save dictionary into a
serial language file. Dictionary is formatted as {filename: data}.
Requires hdata and data_table (can be generated by loadData).
Parameters
----------
filename
Name of the file whose data is being serialized.
hdata: dict
File metadata (generally related to data table).
data_table: list or ndarray
Data table.
dt_colnames: list
Names of each column in data_table. Every name in data_table_cols
will be put into the Dictionary as a key with a value of that column
in data_table (stored as a List). Put None for columns without names.
If dt_cols has less non-None entries than columns in data_table, the
pair {'data table': data_table} will be put in the dictionary.
(Default None: only entry {'data table': data_table} will be added to
dictionary.)
show_path: bool
include a path element in the database entry (default True). If
'path' is not included in hddata, extract path from filename.
serial_file
Serial language file to dump dictionary into. If None (default), no
dumping will occur.
Returns
-------
dict:
Returns the dictionary loaded from/into the updated database file.
"""
# compile data_table and hddata together
data = {}
# handle getting name of file for variety of filename types
abs_path = pathlib.Path(filename).resolve()
# add path to start of data if requested
if show_path and "path" not in hdata.keys():
data.update({"path": abs_path.as_posix()})
# title the entry with name of file (taken from end of path)
title = abs_path.name
# first add data in hddata dict
data.update(hdata)
# second add named columns in dt_cols
# performed second to prioritize overwriting hdata entries with data_
# table column entries
named_columns = 0 # initial value
max_columns = 1 # higher than named_columns to trigger 'data table' entry
if dt_colnames is not None:
num_columns = [len(row) for row in data_table]
max_columns = max(num_columns)
num_col_names = len(dt_colnames)
if (
max_columns < num_col_names
): # assume numpy.loadtxt gives non-irregular array
raise ImproperSizeError(
"More entries in dt_colnames than columns in data_table."
)
named_columns = 0
for idx in range(num_col_names):
colname = dt_colnames[idx]
if colname is not None:
if colname in hdata.keys():
warnings.warn(
(
f"Entry '{colname}' in hdata has been "
"overwritten by a data_table entry."
),
RuntimeWarning,
)
data.update({colname: list(data_table[:, idx])})
named_columns += 1
# finally add data_table as an entry named 'data table' if not all
# columns were parsed
if named_columns < max_columns:
if "data table" in data.keys():
warnings.warn(
(
"Entry 'data table' in hdata has been "
"overwritten by data_table."
),
RuntimeWarning,
)
data.update({"data table": data_table})
# parse name using pathlib and generate dictionary entry
entry = {title: data}
# no save
if serial_file is None:
return entry
# saving/updating file
# check if supported type
sf = pathlib.Path(serial_file)
sf_name = sf.name
extension = sf.suffix
if extension not in supported_formats:
raise UnsupportedTypeError(sf_name, supported_formats)
# new file or update
existing = False
try:
open(serial_file)
existing = True
except FileNotFoundError:
pass
# json
if extension == ".json":
# cannot serialize numpy arrays
class NumpyEncoder(json.JSONEncoder):
def default(self, data_obj):
if type(data_obj) is numpy.ndarray:
return data_obj.tolist()
return json.JSONEncoder.default(self, data_obj)
# dump if non-existing
if not existing:
with open(serial_file, "w") as jsonfile:
file_data = entry # for return
json.dump(file_data, jsonfile, indent=2, cls=NumpyEncoder)
# update if existing
else:
with open(serial_file, "r") as json_read:
file_data = json.load(json_read)
file_data.update(entry)
with open(serial_file, "w") as json_write:
# dump to string first for formatting
json.dump(file_data, json_write, indent=2, cls=NumpyEncoder)
return file_data
def deserialize_data(filename, filetype=None):
"""Load a dictionary from a serial file.
Parameters
----------
filename
Serial file to load from.
filetype
For specifying extension type (i.e. '.json').
Returns
-------
dict
A dictionary read from a serial file.
"""
# check if supported type
f = pathlib.Path(filename)
f_name = f.name
if filetype is None:
extension = f.suffix
if extension not in supported_formats:
raise UnsupportedTypeError(f_name, supported_formats)
else:
extension = filetype
return_dict = {}
# json
if extension == ".json":
with open(filename, "r") as json_file:
j_dict = json.load(json_file)
return_dict = j_dict
if len(return_dict) == 0:
warnings.warn(
"Loaded dictionary is empty. Possibly due to improper file type.",
RuntimeWarning,
)
return return_dict