Skip to content

Commit bbb6151

Browse files
feat: move muD calculator from labpdfproc to here
1 parent 46bc3d1 commit bbb6151

File tree

2 files changed

+128
-19
lines changed

2 files changed

+128
-19
lines changed

Diff for: src/diffpy/utils/tools.py

+104-18
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,11 @@
33
from copy import copy
44
from pathlib import Path
55

6+
import numpy as np
7+
from scipy.optimize import dual_annealing
8+
from scipy.signal import convolve
69

7-
def clean_dict(obj):
8-
"""Remove keys from the dictionary where the corresponding value is None.
9-
10-
Parameters
11-
----------
12-
obj: dict
13-
The dictionary to clean. If None, initialize as an empty dictionary.
14-
15-
Returns
16-
-------
17-
dict:
18-
The cleaned dictionary with keys removed where the value is None.
19-
"""
20-
obj = obj if obj is not None else {}
21-
for key, value in copy(obj).items():
22-
if not value:
23-
del obj[key]
24-
return obj
10+
from diffpy.utils.parsers.loaddata import loadData
2511

2612

2713
def _stringify(obj):
@@ -206,3 +192,103 @@ def get_package_info(package_names, metadata=None):
206192
pkg_info.update({package: importlib.metadata.version(package)})
207193
metadata["package_info"] = pkg_info
208194
return metadata
195+
196+
197+
def _top_hat(z, half_slit_width):
198+
"""Create a top-hat function, return 1.0 for values within the specified
199+
slit width and 0 otherwise."""
200+
return np.where((z >= -half_slit_width) & (z <= half_slit_width), 1.0, 0.0)
201+
202+
203+
def _model_function(z, diameter, z0, I0, mud, slope):
204+
"""
205+
Compute the model function with the following steps:
206+
1. Let dz = z-z0, so that dz is centered at 0
207+
2. Compute length l that is the effective length for computing intensity I = I0 * e^{-mu * l}:
208+
- For dz within the capillary diameter, l is the chord length of the circle at position dz
209+
- For dz outside this range, l = 0
210+
3. Apply a linear adjustment to I0 by taking I0 as I0 - slope * z
211+
"""
212+
min_radius = -diameter / 2
213+
max_radius = diameter / 2
214+
dz = z - z0
215+
length = np.piecewise(
216+
dz,
217+
[dz < min_radius, (min_radius <= dz) & (dz <= max_radius), dz > max_radius],
218+
[0, lambda dz: 2 * np.sqrt((diameter / 2) ** 2 - dz**2), 0],
219+
)
220+
return (I0 - slope * z) * np.exp(-mud / diameter * length)
221+
222+
223+
def _extend_z_and_convolve(z, diameter, half_slit_width, z0, I0, mud, slope):
224+
"""Extend z values and I values for padding (so that we don't have tails in
225+
convolution), then perform convolution (note that the convolved I values
226+
are the same as modeled I values if slit width is close to 0)"""
227+
n_points = len(z)
228+
z_left_pad = np.linspace(z.min() - n_points * (z[1] - z[0]), z.min(), n_points)
229+
z_right_pad = np.linspace(z.max(), z.max() + n_points * (z[1] - z[0]), n_points)
230+
z_extended = np.concatenate([z_left_pad, z, z_right_pad])
231+
I_extended = _model_function(z_extended, diameter, z0, I0, mud, slope)
232+
kernel = _top_hat(z_extended - z_extended.mean(), half_slit_width)
233+
I_convolved = I_extended # this takes care of the case where slit width is close to 0
234+
if kernel.sum() != 0:
235+
kernel /= kernel.sum()
236+
I_convolved = convolve(I_extended, kernel, mode="same")
237+
padding_length = len(z_left_pad)
238+
return I_convolved[padding_length:-padding_length]
239+
240+
241+
def _objective_function(params, z, observed_data):
242+
"""Compute the objective function for fitting a model to the
243+
observed/experimental data by minimizing the sum of squared residuals
244+
between the observed data and the convolved model data."""
245+
diameter, half_slit_width, z0, I0, mud, slope = params
246+
convolved_model_data = _extend_z_and_convolve(z, diameter, half_slit_width, z0, I0, mud, slope)
247+
residuals = observed_data - convolved_model_data
248+
return np.sum(residuals**2)
249+
250+
251+
def _compute_single_mud(z_data, I_data):
252+
"""Perform dual annealing optimization and extract the parameters."""
253+
bounds = [
254+
(1e-5, z_data.max() - z_data.min()), # diameter: [small positive value, upper bound]
255+
(0, (z_data.max() - z_data.min()) / 2), # half slit width: [0, upper bound]
256+
(z_data.min(), z_data.max()), # z0: [min z, max z]
257+
(1e-5, I_data.max()), # I0: [small positive value, max observed intensity]
258+
(1e-5, 20), # muD: [small positive value, upper bound]
259+
(-100000, 100000), # slope: [lower bound, upper bound]
260+
]
261+
result = dual_annealing(_objective_function, bounds, args=(z_data, I_data))
262+
diameter, half_slit_width, z0, I0, mud, slope = result.x
263+
convolved_fitted_signal = _extend_z_and_convolve(z_data, diameter, half_slit_width, z0, I0, mud, slope)
264+
residuals = I_data - convolved_fitted_signal
265+
rmse = np.sqrt(np.mean(residuals**2))
266+
return mud, rmse
267+
268+
269+
def compute_mud(filepath):
270+
"""Compute the best-fit mu*D value from a z-scan file, removing the sample
271+
holder effect.
272+
273+
This function loads z-scan data and fits it to a model
274+
that convolves a top-hat function with I = I0 * e^{-mu * l}.
275+
The fitting procedure is run multiple times, and we return the best-fit parameters based on the lowest rmse.
276+
277+
The full mathematical details are described in the paper:
278+
An ad hoc Absorption Correction for Reliable Pair-Distribution Functions from Low Energy x-ray Sources,
279+
Yucong Chen, Till Schertenleib, Andrew Yang, Pascal Schouwink, Wendy L. Queen and Simon J. L. Billinge,
280+
in preparation.
281+
282+
Parameters
283+
----------
284+
filepath : str
285+
The path to the z-scan file.
286+
287+
Returns
288+
-------
289+
mu*D : float
290+
The best-fit mu*D value.
291+
"""
292+
z_data, I_data = loadData(filepath, unpack=True)
293+
best_mud, _ = min((_compute_single_mud(z_data, I_data) for _ in range(20)), key=lambda pair: pair[1])
294+
return best_mud

Diff for: tests/test_tools.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,16 @@
33
import os
44
from pathlib import Path
55

6+
import numpy as np
67
import pytest
78

8-
from diffpy.utils.tools import check_and_build_global_config, get_package_info, get_user_info
9+
from diffpy.utils.tools import (
10+
_extend_z_and_convolve,
11+
check_and_build_global_config,
12+
compute_mud,
13+
get_package_info,
14+
get_user_info,
15+
)
916

1017

1118
@pytest.mark.parametrize(
@@ -160,3 +167,19 @@ def test_get_package_info(monkeypatch, inputs, expected):
160167
)
161168
actual_metadata = get_package_info(inputs[0], metadata=inputs[1])
162169
assert actual_metadata == expected
170+
171+
172+
def test_compute_mud(tmp_path):
173+
diameter, slit_width, z0, I0, mud, slope = 1, 0.1, 0, 1e5, 3, 0
174+
z_data = np.linspace(-1, 1, 50)
175+
convolved_I_data = _extend_z_and_convolve(z_data, diameter, slit_width, z0, I0, mud, slope)
176+
177+
directory = Path(tmp_path)
178+
file = directory / "testfile"
179+
with open(file, "w") as f:
180+
for x, I in zip(z_data, convolved_I_data):
181+
f.write(f"{x}\t{I}\n")
182+
183+
expected_mud = 3
184+
actual_mud = compute_mud(file)
185+
assert actual_mud == pytest.approx(expected_mud, rel=1e-4, abs=1e-3)

0 commit comments

Comments
 (0)