Skip to content

Commit dbf988f

Browse files
committed
Refactor NMR block into its own app module and restructure blocks into common and base
1 parent c47a141 commit dbf988f

File tree

13 files changed

+172
-155
lines changed

13 files changed

+172
-155
lines changed

pydatalab/pydatalab/apps/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# This import is required to prevent circular imports for application-specific blocks
2-
from pydatalab.blocks.blocks import DataBlock # noqa
2+
from pydatalab.blocks.base import DataBlock # noqa

pydatalab/pydatalab/apps/chat/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import openai
66
import tiktoken
77

8-
from pydatalab.blocks.blocks import DataBlock
8+
from pydatalab.blocks.base import DataBlock
99
from pydatalab.logger import LOGGER
1010
from pydatalab.models import ITEM_MODELS
1111
from pydatalab.utils import CustomJSONEncoder

pydatalab/pydatalab/apps/echem/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from navani import echem as ec
1010

1111
from pydatalab import bokeh_plots
12-
from pydatalab.blocks.blocks import DataBlock
12+
from pydatalab.blocks.base import DataBlock
1313
from pydatalab.file_utils import get_file_info_by_id
1414
from pydatalab.logger import LOGGER
1515
from pydatalab.mongo import flask_mongo

pydatalab/pydatalab/apps/eis/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pandas as pd
66
from bokeh.models import HoverTool, LogColorMapper
77

8-
from pydatalab.blocks.blocks import DataBlock
8+
from pydatalab.blocks.base import DataBlock
99
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
1010
from pydatalab.file_utils import get_file_info_by_id
1111
from pydatalab.logger import LOGGER
+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .blocks import NMRBlock
2+
3+
__all__ = ("NMRBlock",)
+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import os
2+
import zipfile
3+
4+
import bokeh.embed
5+
import pandas as pd
6+
7+
from pydatalab.blocks.base import DataBlock
8+
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
9+
from pydatalab.file_utils import get_file_info_by_id
10+
from pydatalab.logger import LOGGER
11+
12+
from .utils import read_bruker_1d
13+
14+
15+
class NMRBlock(DataBlock):
16+
blocktype = "nmr"
17+
description = "Simple NMR Block"
18+
accepted_file_extensions = ".zip"
19+
defaults = {"process number": 1}
20+
_supports_collections = False
21+
22+
@property
23+
def plot_functions(self):
24+
return (self.generate_nmr_plot,)
25+
26+
def read_bruker_nmr_data(self):
27+
if "file_id" not in self.data:
28+
LOGGER.warning("NMRPlot.read_bruker_nmr_data(): No file set in the DataBlock")
29+
return
30+
31+
zip_file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
32+
filename = zip_file_info["name"]
33+
34+
name, ext = os.path.splitext(filename)
35+
if ext.lower() not in self.accepted_file_extensions:
36+
LOGGER.warning(
37+
"NMRBlock.read_bruker_nmr_data(): Unsupported file extension (must be .zip)"
38+
)
39+
return
40+
41+
# unzip:
42+
directory_location = zip_file_info["location"] + ".extracted"
43+
LOGGER.debug(f"Directory location is: {directory_location}")
44+
with zipfile.ZipFile(zip_file_info["location"], "r") as zip_ref:
45+
zip_ref.extractall(directory_location)
46+
47+
extracted_directory_name = os.path.join(directory_location, name)
48+
available_processes = os.listdir(os.path.join(extracted_directory_name, "pdata"))
49+
50+
if self.data.get("selected_process") not in available_processes:
51+
self.data["selected_process"] = available_processes[0]
52+
53+
try:
54+
df, a_dic, topspin_title, processed_data_shape = read_bruker_1d(
55+
os.path.join(directory_location, name),
56+
process_number=self.data["selected_process"],
57+
verbose=False,
58+
)
59+
except Exception as error:
60+
LOGGER.critical(f"Unable to parse {name} as Bruker project. {error}")
61+
return
62+
63+
serialized_df = df.to_dict() if (df is not None) else None
64+
65+
# all data sorted in a fairly raw way
66+
self.data["processed_data"] = serialized_df
67+
self.data["acquisition_parameters"] = a_dic["acqus"]
68+
self.data["processing_parameters"] = a_dic["procs"]
69+
self.data["pulse_program"] = a_dic["pprog"]
70+
71+
# specific things that we might want to pull out for the UI:
72+
self.data["available_processes"] = available_processes
73+
self.data["nucleus"] = a_dic["acqus"]["NUC1"]
74+
self.data["carrier_frequency_MHz"] = a_dic["acqus"]["SFO1"]
75+
self.data["carrier_offset_Hz"] = a_dic["acqus"]["O1"]
76+
self.data["recycle_delay"] = a_dic["acqus"]["D"][1]
77+
self.data["nscans"] = a_dic["acqus"]["NS"]
78+
self.data["CNST31"] = a_dic["acqus"]["CNST"][31]
79+
self.data["processed_data_shape"] = processed_data_shape
80+
81+
self.data["probe_name"] = a_dic["acqus"]["PROBHD"]
82+
self.data["pulse_program_name"] = a_dic["acqus"]["PULPROG"]
83+
self.data["topspin_title"] = topspin_title
84+
85+
def generate_nmr_plot(self):
86+
self.read_bruker_nmr_data() # currently calls every time plotting happens, but it should only happen if the file was updated
87+
if "processed_data" not in self.data or not self.data["processed_data"]:
88+
self.data["bokeh_plot_data"] = None
89+
return
90+
91+
df = pd.DataFrame(self.data["processed_data"])
92+
df["normalized intensity"] = df.intensity / df.intensity.max()
93+
94+
bokeh_layout = selectable_axes_plot(
95+
df,
96+
x_options=["ppm", "hz"],
97+
y_options=[
98+
"intensity",
99+
"intensity_per_scan",
100+
"normalized intensity",
101+
],
102+
plot_line=True,
103+
point_size=3,
104+
)
105+
bokeh_layout.children[0].x_range.flipped = True # flip x axis, per NMR convention
106+
107+
self.data["bokeh_plot_data"] = bokeh.embed.json_item(bokeh_layout, theme=mytheme)

pydatalab/pydatalab/apps/raman/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pybaselines import Baseline
88
from scipy.signal import medfilt
99

10-
from pydatalab.blocks.blocks import DataBlock
10+
from pydatalab.blocks.base import DataBlock
1111
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
1212
from pydatalab.file_utils import get_file_info_by_id
1313

pydatalab/pydatalab/apps/tga/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from scipy.signal import savgol_filter
88

99
from pydatalab.apps.tga.parsers import parse_mt_mass_spec_ascii
10-
from pydatalab.blocks.blocks import DataBlock
10+
from pydatalab.blocks.base import DataBlock
1111
from pydatalab.bokeh_plots import grid_theme, selectable_axes_plot
1212
from pydatalab.file_utils import get_file_info_by_id
1313
from pydatalab.logger import LOGGER

pydatalab/pydatalab/apps/xrd/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pandas as pd
77
from scipy.signal import medfilt
88

9-
from pydatalab.blocks.blocks import DataBlock
9+
from pydatalab.blocks.base import DataBlock
1010
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
1111
from pydatalab.file_utils import get_file_info_by_id
1212
from pydatalab.logger import LOGGER

pydatalab/pydatalab/blocks/__init__.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
from typing import Dict, Sequence, Type
22

3+
# These app imports will be replaced by dynamic plugins in a future version
34
from pydatalab.apps.chat.blocks import ChatBlock
45
from pydatalab.apps.echem import CycleBlock
56
from pydatalab.apps.eis import EISBlock
7+
from pydatalab.apps.nmr import NMRBlock
68
from pydatalab.apps.raman import RamanBlock
79
from pydatalab.apps.tga import MassSpecBlock
810
from pydatalab.apps.xrd import XRDBlock
9-
from pydatalab.blocks.blocks import (
10-
CommentBlock,
11+
from pydatalab.blocks.base import (
1112
DataBlock,
13+
)
14+
from pydatalab.blocks.common import (
15+
CommentBlock,
1216
MediaBlock,
13-
NMRBlock,
1417
NotSupportedBlock,
1518
)
1619

pydatalab/pydatalab/blocks/blocks.py renamed to pydatalab/pydatalab/blocks/base.py

-145
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,8 @@
1-
import base64
2-
import io
3-
import os
41
import random
5-
import zipfile
62
from typing import Any, Callable, Dict, Optional, Sequence
73

8-
import bokeh.embed
9-
import pandas as pd
104
from bson import ObjectId
11-
from PIL import Image
125

13-
from pydatalab import nmr_utils
14-
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
15-
from pydatalab.file_utils import get_file_info_by_id
166
from pydatalab.logger import LOGGER
177

188
__all__ = ("generate_random_id", "DataBlock")
@@ -170,138 +160,3 @@ def update_from_web(self, data):
170160
self.data.update(data)
171161

172162
return self
173-
174-
175-
class NotSupportedBlock(DataBlock):
176-
blocktype = "notsupported"
177-
description = "Block not supported"
178-
_supports_collections = True
179-
180-
181-
class CommentBlock(DataBlock):
182-
blocktype = "comment"
183-
description = "Comment"
184-
_supports_collections = True
185-
186-
187-
class MediaBlock(DataBlock):
188-
blocktype = "media"
189-
description = "Media"
190-
accepted_file_extensions = (".png", ".jpeg", ".jpg", ".tif", ".tiff", ".mp4", ".mov", ".webm")
191-
_supports_collections = False
192-
193-
@property
194-
def plot_functions(self):
195-
return (self.encode_tiff,)
196-
197-
def encode_tiff(self):
198-
if "file_id" not in self.data:
199-
LOGGER.warning("ImageBlock.encode_tiff(): No file set in the DataBlock")
200-
return
201-
if "b64_encoded_image" not in self.data:
202-
self.data["b64_encoded_image"] = {}
203-
file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
204-
if file_info["name"].endswith(".tif") or file_info["name"].endswith(".tiff"):
205-
im = Image.open(file_info["location"])
206-
LOGGER.warning("Making base64 encoding of tif")
207-
with io.BytesIO() as f:
208-
im.save(f, format="PNG")
209-
f.seek(0)
210-
self.data["b64_encoded_image"][self.data["file_id"]] = base64.b64encode(
211-
f.getvalue()
212-
).decode()
213-
214-
215-
class NMRBlock(DataBlock):
216-
blocktype = "nmr"
217-
description = "Simple NMR Block"
218-
accepted_file_extensions = ".zip"
219-
defaults = {"process number": 1}
220-
_supports_collections = False
221-
222-
@property
223-
def plot_functions(self):
224-
return (self.generate_nmr_plot,)
225-
226-
def read_bruker_nmr_data(self):
227-
if "file_id" not in self.data:
228-
LOGGER.warning("NMRPlot.read_bruker_nmr_data(): No file set in the DataBlock")
229-
return
230-
231-
zip_file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
232-
filename = zip_file_info["name"]
233-
234-
name, ext = os.path.splitext(filename)
235-
if ext.lower() not in self.accepted_file_extensions:
236-
LOGGER.warning(
237-
"NMRBlock.read_bruker_nmr_data(): Unsupported file extension (must be .zip)"
238-
)
239-
return
240-
241-
# unzip:
242-
directory_location = zip_file_info["location"] + ".extracted"
243-
LOGGER.debug(f"Directory location is: {directory_location}")
244-
with zipfile.ZipFile(zip_file_info["location"], "r") as zip_ref:
245-
zip_ref.extractall(directory_location)
246-
247-
extracted_directory_name = os.path.join(directory_location, name)
248-
available_processes = os.listdir(os.path.join(extracted_directory_name, "pdata"))
249-
250-
if self.data.get("selected_process") not in available_processes:
251-
self.data["selected_process"] = available_processes[0]
252-
253-
try:
254-
df, a_dic, topspin_title, processed_data_shape = nmr_utils.read_bruker_1d(
255-
os.path.join(directory_location, name),
256-
process_number=self.data["selected_process"],
257-
verbose=False,
258-
)
259-
except Exception as error:
260-
LOGGER.critical(f"Unable to parse {name} as Bruker project. {error}")
261-
return
262-
263-
serialized_df = df.to_dict() if (df is not None) else None
264-
265-
# all data sorted in a fairly raw way
266-
self.data["processed_data"] = serialized_df
267-
self.data["acquisition_parameters"] = a_dic["acqus"]
268-
self.data["processing_parameters"] = a_dic["procs"]
269-
self.data["pulse_program"] = a_dic["pprog"]
270-
271-
# specific things that we might want to pull out for the UI:
272-
self.data["available_processes"] = available_processes
273-
self.data["nucleus"] = a_dic["acqus"]["NUC1"]
274-
self.data["carrier_frequency_MHz"] = a_dic["acqus"]["SFO1"]
275-
self.data["carrier_offset_Hz"] = a_dic["acqus"]["O1"]
276-
self.data["recycle_delay"] = a_dic["acqus"]["D"][1]
277-
self.data["nscans"] = a_dic["acqus"]["NS"]
278-
self.data["CNST31"] = a_dic["acqus"]["CNST"][31]
279-
self.data["processed_data_shape"] = processed_data_shape
280-
281-
self.data["probe_name"] = a_dic["acqus"]["PROBHD"]
282-
self.data["pulse_program_name"] = a_dic["acqus"]["PULPROG"]
283-
self.data["topspin_title"] = topspin_title
284-
285-
def generate_nmr_plot(self):
286-
self.read_bruker_nmr_data() # currently calls every time plotting happens, but it should only happen if the file was updated
287-
if "processed_data" not in self.data or not self.data["processed_data"]:
288-
self.data["bokeh_plot_data"] = None
289-
return
290-
291-
df = pd.DataFrame(self.data["processed_data"])
292-
df["normalized intensity"] = df.intensity / df.intensity.max()
293-
294-
bokeh_layout = selectable_axes_plot(
295-
df,
296-
x_options=["ppm", "hz"],
297-
y_options=[
298-
"intensity",
299-
"intensity_per_scan",
300-
"normalized intensity",
301-
],
302-
plot_line=True,
303-
point_size=3,
304-
)
305-
bokeh_layout.children[0].x_range.flipped = True # flip x axis, per NMR convention
306-
307-
self.data["bokeh_plot_data"] = bokeh.embed.json_item(bokeh_layout, theme=mytheme)

0 commit comments

Comments
 (0)