diff --git a/CHANGELOG.md b/CHANGELOG.md index eca03ad8..b9f3e9c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Keep it human-readable, your future self will thank you! ### Added - Add anemoi-transform link to documentation +- Various bug fixes - Control compatibility check in xy/zip - Add `merge` feature diff --git a/src/anemoi/datasets/create/__init__.py b/src/anemoi/datasets/create/__init__.py index 14cf5240..85abac76 100644 --- a/src/anemoi/datasets/create/__init__.py +++ b/src/anemoi/datasets/create/__init__.py @@ -412,7 +412,24 @@ def _run(self): metadata.update(self.main_config.get("add_metadata", {})) metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict() - metadata["recipe"] = sanitise(self.main_config.get_serialisable_dict()) + + recipe = sanitise(self.main_config.get_serialisable_dict()) + + # Remove stuff added by prepml + for k in [ + "build_dataset", + "config_format_version", + "config_path", + "dataset_status", + "ecflow", + "metadata", + "platform", + "reading_chunks", + "upload", + ]: + recipe.pop(k, None) + + metadata["recipe"] = recipe metadata["description"] = self.main_config.description metadata["licence"] = self.main_config["licence"] diff --git a/src/anemoi/datasets/create/functions/filters/rename.py b/src/anemoi/datasets/create/functions/filters/rename.py index b82fd8ca..666e085d 100644 --- a/src/anemoi/datasets/create/functions/filters/rename.py +++ b/src/anemoi/datasets/create/functions/filters/rename.py @@ -56,11 +56,14 @@ def __init__(self, field, format): self.format = format self.bits = re.findall(r"{(\w+)}", format) - def metadata(self, key, **kwargs): - value = self.field.metadata(key, **kwargs) - if "{" + key + "}" in self.format: - bits = {b: self.field.metadata(b, **kwargs) for b in self.bits} - return self.format.format(**bits) + def metadata(self, *args, **kwargs): + value = self.field.metadata(*args, **kwargs) + if args: + assert len(args) == 1 + key = args[0] + if "{" + key + "}" in self.format: + bits = {b: self.field.metadata(b, **kwargs) for b in self.bits} + return self.format.format(**bits) return value def __getattr__(self, name): diff --git a/src/anemoi/datasets/create/functions/sources/accumulations.py b/src/anemoi/datasets/create/functions/sources/accumulations.py index b74eb33f..bfc5eba7 100644 --- a/src/anemoi/datasets/create/functions/sources/accumulations.py +++ b/src/anemoi/datasets/create/functions/sources/accumulations.py @@ -370,12 +370,15 @@ def accumulations(context, dates, **request): user_accumulation_period = request.pop("accumulation_period", 6) + # If `data_accumulation_period` is not set, this means that the accumulations are from the start + # of the forecast. + KWARGS = { ("od", "oper"): dict(patch=_scda), ("od", "elda"): dict(base_times=(6, 18)), ("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)), ("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)), - ("rr", "oper"): dict(data_accumulation_period=3, base_times=(0, 3, 6, 9, 12, 15, 18, 21)), + ("rr", "oper"): dict(base_times=(0, 3, 6, 9, 12, 15, 18, 21)), ("l5", "oper"): dict(data_accumulation_period=1, base_times=(0,)), } diff --git a/src/anemoi/datasets/create/input/__init__.py b/src/anemoi/datasets/create/input/__init__.py index d23f038d..92e93669 100644 --- a/src/anemoi/datasets/create/input/__init__.py +++ b/src/anemoi/datasets/create/input/__init__.py @@ -6,23 +6,9 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. # -import datetime -import itertools + import logging -import math -import time -from collections import defaultdict from copy import deepcopy -from functools import cached_property -from functools import wraps - -import numpy as np -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta - -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField from .trace import trace_select diff --git a/src/anemoi/datasets/create/input/result.py b/src/anemoi/datasets/create/input/result.py index 7fab9acf..f708ef9c 100644 --- a/src/anemoi/datasets/create/input/result.py +++ b/src/anemoi/datasets/create/input/result.py @@ -33,9 +33,38 @@ def _fields_metatata(variables, cube): assert isinstance(variables, tuple), variables + def _merge(md1, md2): + assert set(md1.keys()) == set(md2.keys()), (set(md1.keys()), set(md2.keys())) + result = {} + for k in md1.keys(): + v1 = md1[k] + v2 = md2[k] + + if v1 == v2: + result[k] = v1 + continue + + if isinstance(v1, list): + assert v2 not in v1, (v1, v2) + result[k] = sorted(v1 + [v2]) + continue + + if isinstance(v2, list): + assert v1 not in v2, (v1, v2) + result[k] = sorted(v2 + [v1]) + continue + + result[k] = sorted([v1, v2]) + + return result + result = {} - for i, c in enumerate(cube.iterate_cubelets()): - assert c._coords_names[1] == variables[i], (c._coords_names[1], variables[i]) + i = -1 + for c in cube.iterate_cubelets(): + + if i == -1 or c._coords_names[1] != variables[i]: + i += 1 + f = cube[c.coords] md = f.metadata(namespace="mars") if not md: @@ -49,7 +78,10 @@ def _fields_metatata(variables, cube): md["param"] = str(f.metadata("paramId", default="unknown")) # assert md['param'] != 'unknown', (md, f.metadata('param')) - result[variables[i]] = md + if variables[i] in result: + result[variables[i]] = _merge(md, result[variables[i]]) + else: + result[variables[i]] = md assert i + 1 == len(variables), (i + 1, len(variables)) return result diff --git a/src/anemoi/datasets/create/input/step.py b/src/anemoi/datasets/create/input/step.py index 3eb2917c..daca578b 100644 --- a/src/anemoi/datasets/create/input/step.py +++ b/src/anemoi/datasets/create/input/step.py @@ -59,6 +59,7 @@ def select(self, group_of_dates): ) def __repr__(self): + # raise NotImplementedError(f"Not implemented in {self.__class__.__name__}") return super().__repr__(self.previous_step, _inline_=str(self.kwargs))