Skip to content

Commit e0eee6a

Browse files
committed
Convert HDF5/Nexus file to zip-file with most metadata
1 parent a3eec1b commit e0eee6a

File tree

2 files changed

+183
-0
lines changed

2 files changed

+183
-0
lines changed

src/freesas/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ py.install_sources([
2121
'sasio.py',
2222
'transformations.py',
2323
'dnn.py',
24+
'nexus_parser.py',
2425
],
2526
pure: false, # Will be installed next to binaries
2627
subdir: 'freesas' # Folder relative to site-packages to install to

src/freesas/nexus_parser.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
__author__ = "Jérôme Kieffer"
2+
__license__ = "MIT"
3+
__copyright__ = "2017, ESRF"
4+
__date__ = "20/01/2025"
5+
6+
import sys, os
7+
import zipfile
8+
import posixpath
9+
import logging
10+
from typing import Union
11+
from silx.io.nxdata import NXdata
12+
from dataclasses import dataclass
13+
import numpy
14+
logger = logging.getLogger(__name__)
15+
16+
try:
17+
import h5py
18+
except ImportError:
19+
logger.error("H5py is mandatory to parse HDF5 files")
20+
h5py = None
21+
22+
23+
@dataclass
24+
class IntegratedPattern:
25+
"""Store one pyFAI integrated pattern"""
26+
27+
point: Union[float, int, None]
28+
radial: numpy.ndarray
29+
intensity: numpy.ndarray
30+
intensity_errors: Union[numpy.ndarray, None]=None
31+
radial_name: str = ""
32+
radial_units: str = ""
33+
intensity_name: str = ""
34+
intensity_units: str = ""
35+
36+
def __repr__(self):
37+
line = f"# {self.radial_name}({self.radial_units}) \t {self.intensity_name}({self.intensity_units})"
38+
if self.intensity_errors is not None:
39+
line += " \t uncertainties"
40+
res = [line]
41+
if self.intensity_errors is None:
42+
for q,i,s in zip(self.radial, self.intensity):
43+
res.append(f"{q} \t {i}")
44+
else:
45+
for q,i,s in zip(self.radial, self.intensity, self.intensity_errors):
46+
res.append(f"{q} \t {i} \t {s}")
47+
return os.linesep.join(res)
48+
49+
50+
def read_nexus_integrated_patterns(group):
51+
"""Read integrated patterns from a HDF5 NXdata group.
52+
53+
It reads from both single (1D signal) or multi (2D signal) NXdata.
54+
:param group : h5py.Group
55+
:return: list of IntegratedPattern instances.
56+
"""
57+
nxdata = NXdata(group)
58+
if not nxdata.is_valid:
59+
raise RuntimeError(
60+
f"Cannot parse NXdata group: {group.file.filename}::{group.name}"
61+
)
62+
if not (nxdata.signal_is_1d or nxdata.signal_is_2d):
63+
raise RuntimeError(
64+
f"Signal is not a 1D or 2D dataset: {group.file.filename}::{group.name}"
65+
)
66+
67+
if nxdata.signal_is_1d:
68+
points = [None]
69+
else: # 2d
70+
if nxdata.axes[0] is None:
71+
points = [None] * nxdata.signal.shape[0]
72+
else:
73+
points = nxdata.axes[0][()]
74+
75+
if nxdata.axes[-1] is None:
76+
radial = numpy.arange(nxdata.signal.shape[1])
77+
radial_units = ""
78+
radial_name = ""
79+
else:
80+
axis_dataset = nxdata.axes[-1]
81+
radial = axis_dataset[()]
82+
radial_name = axis_dataset.name.split("/")[-1]
83+
radial_units = axis_dataset.attrs.get("units", "")
84+
85+
intensities = numpy.atleast_2d(nxdata.signal)
86+
intensity_name = nxdata.signal.name.split("/")[-1]
87+
intensity_units = nxdata.signal.attrs.get("units", "")
88+
89+
90+
if nxdata.errors is None:
91+
errors = [None] * intensities.shape[0]
92+
else:
93+
errors = numpy.atleast_2d(nxdata.errors)
94+
95+
if (len(points), len(radial)) != intensities.shape:
96+
raise RuntimeError("Shape mismatch between axes and signal")
97+
98+
return [IntegratedPattern(
99+
point, radial, intensity, intensity_errors, radial_name, radial_units, intensity_name, intensity_units) for point, intensity, intensity_errors in zip(points, intensities, errors)]
100+
101+
102+
class Tree:
103+
def __init__(self, root=None):
104+
self.root = root or {}
105+
self.skip = set()
106+
def visit_item(self, name, obj):
107+
if name in self.skip:
108+
return
109+
node = self.root
110+
path = [i.replace(" ","_") for i in name.split("/")]
111+
for i in path[:-1]:
112+
if i not in node:
113+
node[i] = {}
114+
node = node[i]
115+
if isinstance(obj, h5py.Group):
116+
if obj.attrs.get("NX_class") == "NXdata" and "errors" in obj:
117+
try:
118+
node[path[-1]] = read_nexus_integrated_patterns(obj)
119+
except (KeyError, OSError) as err:
120+
print(f"{type(err).__name__}: {err} while readding {path}")
121+
for key in obj:
122+
self.skip.add(posixpath.join(name,key))
123+
if isinstance(obj[key], h5py.Group):
124+
for sub in obj[key]:
125+
self.skip.add(posixpath.join(name,key, sub))
126+
else:
127+
node[path[-1]] = {}
128+
if isinstance(obj, h5py.Dataset):
129+
if len(obj.shape) <= 1:
130+
node[path[-1]] = obj[()]
131+
132+
def save(self, filename):
133+
with zipfile.ZipFile(filename, "w") as z:
134+
def write(path, name, obj):
135+
new_path = posixpath.join(path, name)
136+
if isinstance(obj, dict):
137+
if sys.version_info>=(3,11): z.mkdir(new_path)
138+
for key, value in obj.items():
139+
write(new_path, key, value)
140+
elif isinstance(obj, numpy.ndarray):
141+
if obj.ndim == 1:
142+
z.writestr(new_path, os.linesep.join(str(i) for i in obj))
143+
else:
144+
z.writestr(new_path, str(obj))
145+
elif isinstance(obj, list):
146+
if sys.version_info>=(3,11): z.mkdir(new_path)
147+
if len(obj)==1:
148+
fname = new_path+"/biosaxs.dat"
149+
z.writestr(fname, str(obj[0]))
150+
else:
151+
for i,j in enumerate(obj):
152+
fname = new_path+f"/bioxaxs_{i:04d}.dat"
153+
z.writestr(fname, str(j))
154+
elif isinstance(obj, (int, float, numpy.number, bool, numpy.bool)):
155+
z.writestr(new_path, str(obj))
156+
elif isinstance(obj, (str, bytes)):
157+
z.writestr(new_path, obj)
158+
else:
159+
print(f"skip {new_path} for {obj} of type {obj.__class__.__mro__}")
160+
161+
root = self.root
162+
for key, value in root.items():
163+
write("", key, value)
164+
def get(self, path):
165+
node = self.root
166+
for i in path.split("/"):
167+
node = node[i]
168+
return node
169+
170+
def convert_nexus2zip(nexusfile, outfile=None):
171+
""" Convert a nexus-file, as produced by BM29 beamline into a zip file
172+
173+
:param nexusfile: string with the path of the input file
174+
:param outfile: name of the output file, unless, just replace the extension with ".zip"
175+
:return: nothing, maybe an error code ?
176+
"""
177+
tree = Tree()
178+
with h5py.File(nexusfile, "r") as h:
179+
h.visititems(tree.visit_item)
180+
outfile = outfile or (os.path.splitext(nexusfile)[0]+".h5")
181+
tree.save(outfile)
182+

0 commit comments

Comments
 (0)