Skip to content

Commit 8bc011e

Browse files
committed
Clean up NMR utils and add tests for Bruker reader
1 parent dbf988f commit 8bc011e

File tree

2 files changed

+77
-11
lines changed

2 files changed

+77
-11
lines changed

Diff for: pydatalab/pydatalab/apps/nmr/utils.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import itertools
22
import os
33
import re
4+
from pathlib import Path
45

56
import matplotlib.pyplot as plt
67
import nmrglue as ng
@@ -13,15 +14,27 @@
1314
######################################################################################
1415

1516

16-
def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None):
17+
def read_bruker_1d(
18+
data: Path | pd.DataFrame,
19+
process_number: int = 1,
20+
verbose: bool = False,
21+
sample_mass_mg: float | None = None,
22+
) -> tuple[pd.DataFrame | None, dict, str | None, tuple[int, ...]]:
1723
"""Read a 1D bruker nmr spectrum and return it as a df.
1824
19-
arguments:
25+
Parameters:
26+
data: The directory of the full bruker data file, or a pandas DataFrame which
27+
will be returned without further processing.
28+
process_number: The process number of the processed data you want to plot [default: 1].
29+
verbose: Whether to print information such as the spectrum title to stdout.
30+
sample_mass_mg: The (optional) sample mass. If provided, the resulting DataFrame will have a "intensity_per_scan_per_gram" column.
31+
32+
Returns:
33+
df: A pandas DataFrame containing the spectrum data, or None if the reading failed.
34+
a_dic: A dictionary containing the acquisition parameters.
35+
topspin_title: The title of the spectrum, as stored in the topspin "title" file.
36+
shape: The shape of the spectrum data array.
2037
21-
data: The directory of the full bruker data file. You may also supply a df as this argument. In this case, the df is returned as is.
22-
process_number: The process number of the processed data you want to plot [default 1]
23-
verbose: Whether to print information such as the spectrum title to stdout (default True)
24-
sample_mass_mg: The (optional) sample mass. If provided, the resulting DataFrame will have a "intensity_per_scan_per_gram" column.
2538
"""
2639

2740
# if df is provided, just return it as-is. This functionality is provided to make functions calling read_bruker_1d flexible by default.
@@ -32,12 +45,12 @@ def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None):
3245
print("data frame provided to read_bruker_1d(). Returning it as is.")
3346
return data
3447
else:
35-
data_dir = data
48+
data_dir = Path(data)
3649

37-
processed_data_dir = os.path.join(data_dir, "pdata", str(process_number))
50+
processed_data_dir = data_dir / "pdata" / str(process_number)
3851

39-
a_dic, a_data = ng.fileio.bruker.read(data_dir) # aquisition_data
40-
p_dic, p_data = ng.fileio.bruker.read_pdata(processed_data_dir) # processing data
52+
a_dic, a_data = ng.fileio.bruker.read(str(data_dir)) # aquisition_data
53+
p_dic, p_data = ng.fileio.bruker.read_pdata(str(processed_data_dir)) # processing data
4154

4255
try:
4356
with open(os.path.join(processed_data_dir, "title"), "r") as f:
@@ -46,7 +59,6 @@ def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None):
4659
topspin_title = None
4760

4861
if len(p_data.shape) > 1:
49-
print("data is more than one dimensional - read failed")
5062
return None, a_dic, topspin_title, p_data.shape
5163

5264
nscans = a_dic["acqus"]["NS"]

Diff for: pydatalab/tests/apps/test_nmr.py

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import zipfile
2+
from pathlib import Path
3+
4+
import pytest
5+
6+
from pydatalab.apps.nmr.utils import read_bruker_1d
7+
8+
9+
def _extract_example(filename, dir):
10+
with zipfile.ZipFile(filename, "r") as zip_ref:
11+
zip_ref.extractall(dir)
12+
return Path(dir) / filename.stem
13+
14+
15+
@pytest.fixture(scope="function")
16+
def nmr_1d_solution_example(tmpdir):
17+
zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "1.zip"
18+
return _extract_example(zip_path, tmpdir)
19+
20+
21+
@pytest.fixture(scope="function")
22+
def nmr_1d_solid_example(tmpdir):
23+
zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "71.zip"
24+
return _extract_example(zip_path, tmpdir)
25+
26+
27+
@pytest.fixture(scope="function")
28+
def nmr_2d_matpass_example(tmpdir):
29+
zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "72.zip"
30+
return _extract_example(zip_path, tmpdir)
31+
32+
33+
def test_bruker_reader_solution(nmr_1d_solution_example):
34+
df, a_dic, topspin_title, shape = read_bruker_1d(nmr_1d_solution_example)
35+
assert df is not None
36+
assert a_dic
37+
assert topspin_title
38+
assert shape == (4096,)
39+
40+
41+
def test_bruker_reader_solid(nmr_1d_solid_example):
42+
df, a_dic, topspin_title, shape = read_bruker_1d(nmr_1d_solid_example)
43+
assert df is not None
44+
assert a_dic
45+
assert topspin_title
46+
assert shape == (9984,)
47+
48+
49+
def test_bruker_reader_2D(nmr_2d_matpass_example):
50+
df, a_dic, topspin_title, shape = read_bruker_1d(nmr_2d_matpass_example)
51+
assert df is None
52+
assert a_dic
53+
assert topspin_title
54+
assert shape == (8, 4096)

0 commit comments

Comments
 (0)