diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d808e7f..4acca083 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,13 +10,16 @@ Keep it human-readable, your future self will thank you! ## [Unreleased](https://github.com/ecmwf/anemoi-datasets/compare/0.5.8...HEAD) +## Changed + +- Fix metadata serialization handling of numpy.integer (#140) +- Fix cutout slicing of grid dimension (#145) ### Added - Call filters from anemoi-transform - make test optional when adls is not installed Pull request #110 - ## [0.5.8](https://github.com/ecmwf/anemoi-datasets/compare/0.5.7...0.5.8) - 2024-10-26 ### Changed diff --git a/src/anemoi/datasets/data/dataset.py b/src/anemoi/datasets/data/dataset.py index 8ab06757..185cf297 100644 --- a/src/anemoi/datasets/data/dataset.py +++ b/src/anemoi/datasets/data/dataset.py @@ -15,6 +15,7 @@ import warnings from functools import cached_property +import numpy as np from anemoi.utils.dates import frequency_to_seconds from anemoi.utils.dates import frequency_to_string from anemoi.utils.dates import frequency_to_timedelta @@ -42,6 +43,9 @@ def _tidy(v): if isinstance(v, slice): return (v.start, v.stop, v.step) + if isinstance(v, np.integer): + return int(v) + return v @@ -241,7 +245,8 @@ def _drop_to_columns(self, vars): if not isinstance(vars, (list, tuple, set)): vars = [vars] - assert set(vars) <= set(self.name_to_index) + if not set(vars) <= set(self.name_to_index): + raise ValueError(f"drop: unknown variables: {set(vars) - set(self.name_to_index)}") return sorted([v for k, v in self.name_to_index.items() if k not in vars]) diff --git a/src/anemoi/datasets/data/grids.py b/src/anemoi/datasets/data/grids.py index 15915f13..df5ed4b2 100644 --- a/src/anemoi/datasets/data/grids.py +++ b/src/anemoi/datasets/data/grids.py @@ -289,14 +289,15 @@ def _get_tuple(self, index): """ index, changes = index_to_slices(index, self.shape) # Select data from each LAM - lam_data = [lam[index] for lam in self.lams] + lam_data = [lam[index[:3]] for lam in self.lams] # First apply spatial indexing on `self.globe` and then apply the mask globe_data_sliced = self.globe[index[:3]] globe_data = globe_data_sliced[..., self.global_mask] - # Concatenate LAM data with global data - result = np.concatenate(lam_data + [globe_data], axis=self.axis) + # Concatenate LAM data with global data, apply the grid slicing + result = np.concatenate(lam_data + [globe_data], axis=self.axis)[..., index[3]] + return apply_index_to_slices_changes(result, changes) def collect_supporting_arrays(self, collected, *path): @@ -324,7 +325,8 @@ def shape(self): """ shapes = [np.sum(mask) for mask in self.masks] global_shape = np.sum(self.global_mask) - return tuple(self.lams[0].shape[:-1] + (sum(shapes) + global_shape,)) + total_shape = sum(shapes) + global_shape + return tuple(self.lams[0].shape[:-1] + (int(total_shape),)) def check_same_resolution(self, d1, d2): # Turned off because we are combining different resolutions diff --git a/src/anemoi/datasets/data/join.py b/src/anemoi/datasets/data/join.py index 3f97c38a..6b7de3e6 100644 --- a/src/anemoi/datasets/data/join.py +++ b/src/anemoi/datasets/data/join.py @@ -124,7 +124,14 @@ def variables_metadata(self): if v in md: result[v] = md[v] - assert len(result) == len(variables), (result, variables) + if len(result) != len(variables): + LOG.error("Some variables are missing metadata.") + for v in variables: + if v not in result: + LOG.error("Missing metadata for %r.", v) + + raise ValueError("Some variables are missing metadata.") + return result @cached_property