Skip to content

Commit 46b66a1

Browse files
authored
Add dumper/loader for basic yaml (lists of lists) (#11)
1 parent ee2a063 commit 46b66a1

File tree

9 files changed

+465
-122
lines changed

9 files changed

+465
-122
lines changed

poetry.lock

+237-122
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ docs = [
3838
"myst-parser"
3939
]
4040

41+
[tool.poetry.group.dev.dependencies]
42+
black = "^24.1.1"
43+
pytest = "^7.1.2"
44+
mypy = "^1.8.0"
45+
4146
[tool.poetry-dynamic-versioning]
4247
enable = true
4348
vcs = "git"

src/linkml_arrays/dumpers/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
"""Dumper classes for linkml-arrays."""
22

33
from .hdf5_dumper import Hdf5Dumper
4+
from .yaml_dumper import YamlDumper
45
from .yaml_hdf5_dumper import YamlHdf5Dumper
56
from .yaml_numpy_dumper import YamlNumpyDumper
67
from .zarr_directory_store_dumper import ZarrDirectoryStoreDumper
78

89
__all__ = [
910
"Hdf5Dumper",
11+
"YamlDumper",
1012
"YamlHdf5Dumper",
1113
"YamlNumpyDumper",
1214
"ZarrDirectoryStoreDumper",
+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""Class for dumpling a LinkML model to a YAML file."""
2+
3+
from typing import Union
4+
5+
import yaml
6+
from linkml_runtime import SchemaView
7+
from linkml_runtime.dumpers.dumper_root import Dumper
8+
from linkml_runtime.utils.yamlutils import YAMLRoot
9+
from pydantic import BaseModel
10+
11+
12+
def _iterate_element(
13+
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier=None
14+
):
15+
"""Recursively iterate through the elements of a LinkML model and save them.
16+
17+
Returns a dictionary with the same structure as the input element, but with the slots
18+
that implement "linkml:elements" (arrays) are written as lists or lists of lists.
19+
20+
Raises:
21+
ValueError: If the class requires an identifier and it is not provided.
22+
"""
23+
# get the type of the element
24+
element_type = type(element).__name__
25+
26+
# ask schemaview whether it has a class by this name
27+
found_class = schemaview.get_class(element_type)
28+
29+
id_slot = schemaview.get_identifier_slot(found_class.name)
30+
if id_slot is not None:
31+
id_value = getattr(element, id_slot.name)
32+
else:
33+
id_value = None
34+
35+
ret_dict = dict()
36+
for k, v in vars(element).items():
37+
found_slot = schemaview.induced_slot(k, element_type)
38+
if "linkml:elements" in found_slot.implements:
39+
if id_slot is None and parent_identifier is None:
40+
raise ValueError("The class requires an identifier.")
41+
ret_dict[k] = v.tolist()
42+
else:
43+
if isinstance(v, BaseModel):
44+
v2 = _iterate_element(v, schemaview, id_value)
45+
ret_dict[k] = v2
46+
else:
47+
ret_dict[k] = v
48+
return ret_dict
49+
50+
51+
class YamlDumper(Dumper):
52+
"""Dumper class for LinkML models to YAML files."""
53+
54+
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
55+
"""Return element formatted as a YAML string."""
56+
input = _iterate_element(element, schemaview)
57+
58+
return yaml.dump(input)

src/linkml_arrays/loaders/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
from .hdf5_loader import Hdf5Loader
44
from .yaml_hdf5_loader import YamlHdf5Loader
5+
from .yaml_loader import YamlLoader
56
from .yaml_numpy_loader import YamlNumpyLoader
67
from .zarr_directory_store_loader import ZarrDirectoryStoreLoader
78

89
__all__ = [
910
"Hdf5Loader",
1011
"YamlHdf5Loader",
12+
"YamlLoader",
1113
"YamlNumpyLoader",
1214
"ZarrDirectoryStoreLoader",
1315
]
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Class for loading a LinkML model from a YAML file."""
2+
3+
from typing import Type, Union
4+
5+
import numpy as np
6+
import yaml
7+
from linkml_runtime import SchemaView
8+
from linkml_runtime.linkml_model import ClassDefinition
9+
from linkml_runtime.loaders.loader_root import Loader
10+
from linkml_runtime.utils.yamlutils import YAMLRoot
11+
from pydantic import BaseModel
12+
13+
14+
def _iterate_element(
15+
input_dict: dict, element_type: ClassDefinition, schemaview: SchemaView
16+
) -> dict:
17+
"""Recursively iterate through the elements of a LinkML model and load them into a dict.
18+
19+
Datasets are loaded into NumPy arrays.
20+
"""
21+
ret_dict = dict()
22+
for k, v in input_dict.items():
23+
found_slot = schemaview.induced_slot(k, element_type.name)
24+
if "linkml:elements" in found_slot.implements:
25+
v = np.asarray(v)
26+
elif isinstance(v, dict):
27+
found_slot_range = schemaview.get_class(found_slot.range)
28+
v = _iterate_element(v, found_slot_range, schemaview)
29+
# else: do not transform v
30+
ret_dict[k] = v
31+
32+
return ret_dict
33+
34+
35+
class YamlLoader(Loader):
36+
"""Class for loading a LinkML model from a YAML file."""
37+
38+
def load_any(self, source: str, **kwargs):
39+
"""Create an instance of the target class from a YAML file."""
40+
return self.load(source, **kwargs)
41+
42+
def loads(self, source: str, **kwargs):
43+
"""Create an instance of the target class from a YAML file."""
44+
return self.load(source, **kwargs)
45+
46+
def load(
47+
self,
48+
source: str,
49+
target_class: Type[Union[YAMLRoot, BaseModel]],
50+
schemaview: SchemaView,
51+
**kwargs,
52+
):
53+
"""Create an instance of the target class from a YAML file."""
54+
input_dict = yaml.safe_load(source)
55+
56+
element_type = schemaview.get_class(target_class.__name__)
57+
element = _iterate_element(input_dict, element_type, schemaview)
58+
obj = target_class(**element)
59+
60+
return obj
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
latitude_in_deg:
2+
values:
3+
- 1
4+
- 2
5+
longitude_in_deg:
6+
values:
7+
- 4
8+
- 5
9+
name: my_temperature
10+
temperatures_in_K:
11+
values:
12+
- - - 0
13+
- 1
14+
- - 2
15+
- 3
16+
- - - 4
17+
- 5
18+
- - 6
19+
- 7
20+
time_in_d:
21+
values:
22+
- 7
23+
- 8

tests/test_dumpers/test_dumpers.py

+50
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from linkml_arrays.dumpers import (
1111
Hdf5Dumper,
12+
YamlDumper,
1213
YamlHdf5Dumper,
1314
YamlNumpyDumper,
1415
ZarrDirectoryStoreDumper,
@@ -22,6 +23,55 @@
2223
)
2324

2425

26+
class YamlDumpersTestCase(unittest.TestCase):
27+
"""Test dumping of pydantic-style classes from LinkML schemas into YAML files."""
28+
29+
def test_dump_pydantic_arrays(self):
30+
"""Test dumping pydantic classes with numpy arrays to a YAML file."""
31+
latitude_in_deg = LatitudeSeries(values=np.array([1, 2]))
32+
longitude_in_deg = LongitudeSeries(values=np.array([4, 5]))
33+
time_in_d = DaySeries(values=np.array([7, 8]))
34+
temperatures_in_K = TemperatureMatrix(
35+
values=np.arange(8).reshape((2, 2, 2)),
36+
)
37+
temperature = TemperatureDataset(
38+
name="my_temperature",
39+
latitude_in_deg=latitude_in_deg,
40+
longitude_in_deg=longitude_in_deg,
41+
time_in_d=time_in_d,
42+
temperatures_in_K=temperatures_in_K,
43+
)
44+
45+
schemaview = SchemaView(Path(__file__) / "../../input/temperature_dataset.yaml")
46+
ret = YamlDumper().dumps(temperature, schemaview=schemaview)
47+
48+
expected = """latitude_in_deg:
49+
values:
50+
- 1
51+
- 2
52+
longitude_in_deg:
53+
values:
54+
- 4
55+
- 5
56+
name: my_temperature
57+
temperatures_in_K:
58+
values:
59+
- - - 0
60+
- 1
61+
- - 2
62+
- 3
63+
- - - 4
64+
- 5
65+
- - 6
66+
- 7
67+
time_in_d:
68+
values:
69+
- 7
70+
- 8
71+
"""
72+
assert ret == expected
73+
74+
2575
class YamlNumpyDumpersTestCase(unittest.TestCase):
2676
"""Test dumping of pydantic-style classes from LinkML schemas into YAML + NumPy files."""
2777

tests/test_loaders/test_loaders.py

+28
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from linkml_arrays.loaders import (
1111
Hdf5Loader,
1212
YamlHdf5Loader,
13+
YamlLoader,
1314
YamlNumpyLoader,
1415
ZarrDirectoryStoreLoader,
1516
)
@@ -22,6 +23,33 @@
2223
)
2324

2425

26+
class YamlLoadersTestCase(unittest.TestCase):
27+
"""Test loading of pydantic-style classes from YAML arrays."""
28+
29+
def test_load_pydantic_arrays(self):
30+
"""Test loading of pydantic-style classes from YAML arrays."""
31+
read_yaml = hbread(
32+
"temperature_dataset_yaml.yaml", base_path=str(Path(__file__) / "../../input")
33+
)
34+
schemaview = SchemaView(Path(__file__) / "../../input/temperature_dataset.yaml")
35+
ret = YamlLoader().loads(read_yaml, target_class=TemperatureDataset, schemaview=schemaview)
36+
37+
assert isinstance(ret, TemperatureDataset)
38+
assert ret.name == "my_temperature"
39+
40+
assert isinstance(ret.latitude_in_deg, LatitudeSeries)
41+
np.testing.assert_array_equal(ret.latitude_in_deg.values, np.array([1, 2]))
42+
43+
assert isinstance(ret.longitude_in_deg, LongitudeSeries)
44+
np.testing.assert_array_equal(ret.longitude_in_deg.values, np.array([4, 5]))
45+
46+
assert isinstance(ret.time_in_d, DaySeries)
47+
np.testing.assert_array_equal(ret.time_in_d.values, np.array([7, 8]))
48+
49+
assert isinstance(ret.temperatures_in_K, TemperatureMatrix)
50+
np.testing.assert_array_equal(ret.temperatures_in_K.values, np.arange(8).reshape((2, 2, 2)))
51+
52+
2553
class YamlNumpyLoadersTestCase(unittest.TestCase):
2654
"""Test loading of pydantic-style classes from YAML + Numpy arrays."""
2755

0 commit comments

Comments
 (0)