Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
130 commits
Select commit Hold shift + click to select a range
5971cbd
feat: accumulation from grib-index
b8raoult May 7, 2025
e6e9e0b
make accumulation action
flyIchtus May 6, 2025
2479364
fix bugs, working with mars
flyIchtus May 7, 2025
03af2ed
fix: correct gribindex retrieval and backward-looking period
flyIchtus May 9, 2025
2b23336
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 12, 2025
f5caf0a
fix grib-index
flyIchtus May 14, 2025
85b01ee
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 14, 2025
bee533e
feat : simplify (validity always at end of period) + clean
flyIchtus May 16, 2025
78322ed
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 16, 2025
a6bc6e2
Merge branch 'main' into feat/abstracting_accumulation
flyIchtus May 16, 2025
8e7e90a
add tests for accumulation from mars and grib-idnex
flyIchtus May 23, 2025
22184c0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 23, 2025
adc72ca
fix typo
flyIchtus May 23, 2025
063e6f2
fix: simplify tests
flyIchtus May 23, 2025
4254d97
Merge branch 'main' into feat/abstracting_accumulation
flyIchtus May 23, 2025
8eea944
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 23, 2025
a7ddabe
fix: sources testing
flyIchtus Jun 2, 2025
b496b8e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 2, 2025
2ef7d0f
fix requests/kwargs for grib-index
flyIchtus Jun 2, 2025
be82aca
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 2, 2025
c2ac4c0
fix: tests passing
flyIchtus Jun 2, 2025
e9d89d0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 2, 2025
f3aba94
fix: docsig, ruff + make era accum test pass
flyIchtus Jun 2, 2025
74745cb
lint
flyIchtus Jun 2, 2025
e018332
lint with ruff
flyIchtus Jun 2, 2025
de33681
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 2, 2025
4fbb1fe
faster and shorter test
flyIchtus Jun 3, 2025
b409bee
remove lines
flyIchtus Jun 3, 2025
30647cd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 3, 2025
81e7f83
fix typo in data_accumulation
flyIchtus Jun 3, 2025
eb0aaca
shorten test
flyIchtus Jun 3, 2025
907a768
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 3, 2025
53811b5
Merge branch 'main' into feat/abstracting_accumulation
flyIchtus Jun 17, 2025
29d9232
fix: typos + grib_index 3.9 syntax
flyIchtus Jun 18, 2025
2bf9b99
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 18, 2025
d4d57da
fix: typos in test
flyIchtus Jun 18, 2025
2c27c2c
chore: remove need to encode grib tmpfile
flyIchtus Jun 18, 2025
ea7b6ba
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 18, 2025
81950a3
fix: anemoi utils import
flyIchtus Jun 18, 2025
0eed99c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 18, 2025
2bf7654
fix: imports
flyIchtus Jun 30, 2025
37c68eb
Merge branch 'main' into feat/abstracting_accumulation
flyIchtus Jun 30, 2025
42123b4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 30, 2025
ae72d19
add tests for accumulation from mars and grib-idnex
flyIchtus May 23, 2025
b0b6413
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 23, 2025
c842d63
fix: simplify tests
flyIchtus May 23, 2025
894e713
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 23, 2025
740eda4
fix Accumulator call
flyIchtus Jun 30, 2025
85a3864
remove dependency on template grib
flyIchtus Jun 30, 2025
226e6f7
fix typos
flyIchtus Jun 30, 2025
5538994
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 30, 2025
6e7849a
fix bad rebase
flyIchtus Jun 30, 2025
f81bdc1
lint
flyIchtus Jun 30, 2025
23dd882
fix mars accum
flyIchtus Jun 30, 2025
a839aef
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 30, 2025
4da11c9
Merge branch 'main' into feat/abstracting_accumulation
flyIchtus Aug 20, 2025
0fda5fc
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 20, 2025
69c7b68
fix conflicts on exit statement
flyIchtus Aug 20, 2025
d689e75
remove typing errors and from old accumulation code
flyIchtus Aug 20, 2025
984c0a4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 20, 2025
075151f
fix typing in mars.py
flyIchtus Aug 20, 2025
d5ee607
simplify checking logic
flyIchtus Aug 21, 2025
54506f3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 21, 2025
dcbc5b0
isolating pieces of code and commenting/typing
flyIchtus Aug 21, 2025
f1a06a9
Doc + docstrings + simplifying defaultTimelines
flyIchtus Aug 22, 2025
e977566
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 22, 2025
35ceb73
fix docs
flyIchtus Aug 22, 2025
468a562
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 22, 2025
c5e9c6d
fix import
flyIchtus Aug 22, 2025
5938e9a
Merge branch 'main' into feat/abstracting_accumulation
flyIchtus Aug 22, 2025
6790a1f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 22, 2025
2c54b06
fix dosctring
flyIchtus Aug 22, 2025
2bb8607
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 22, 2025
12b3d1c
fix docstring
flyIchtus Aug 22, 2025
a06cb83
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 22, 2025
3b37c5a
fix typing and docstrings
flyIchtus Sep 10, 2025
239c27a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 10, 2025
74a67cc
beginning accums on RrOper
flyIchtus Sep 12, 2025
167591f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 12, 2025
65a5f8d
add Rroper forecasts
flyIchtus Sep 12, 2025
a404f6a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 12, 2025
a43c764
fixes and cleaning up
flyIchtus Sep 12, 2025
8fb0a16
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 12, 2025
0e80827
Merge remote-tracking branch 'origin/main' into feat/abstracting_accu…
b8raoult Oct 22, 2025
b6ca115
rename file
b8raoult Oct 22, 2025
490e1c6
update
b8raoult Oct 22, 2025
f3de368
Merge remote-tracking branch 'origin/main' into feat/abstracting_accu…
b8raoult Oct 23, 2025
b8a9c48
update
b8raoult Oct 23, 2025
c485dd3
hack for accumulation. wip
floriankrb Oct 23, 2025
9a7219a
adapt source syntax and test to validate recipes
flyIchtus Oct 23, 2025
71ae4ae
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 23, 2025
358c1dd
adjust source kwargs
flyIchtus Oct 23, 2025
a895067
feed grib_index 'indexdb' into source
flyIchtus Oct 24, 2025
c0bca93
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2025
709327a
feat: review datasets creation tests
b8raoult Nov 3, 2025
92760e2
feat: review datasets creation tests
b8raoult Nov 3, 2025
7bc982b
update
b8raoult Nov 3, 2025
efcd3ba
update
b8raoult Nov 3, 2025
c675df6
update
b8raoult Nov 3, 2025
a286e77
renaming and removing TodoList
flyIchtus Nov 3, 2025
805783a
add repeated dates
b8raoult Nov 5, 2025
062b6cb
add repeated dates
b8raoult Nov 5, 2025
998a3bb
add repeated dates
b8raoult Nov 5, 2025
633f984
update
b8raoult Nov 6, 2025
e7c51b2
added more extensive tests
floriankrb Nov 12, 2025
4c42d0f
more tests (do not pass)
floriankrb Nov 12, 2025
781e328
fix ear5 accumulate. + breaking change in the recipe for accumlate: ...
floriankrb Nov 13, 2025
c5d2281
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2025
771629b
up
floriankrb Nov 13, 2025
71a539b
wip
floriankrb Nov 13, 2025
a4fba71
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2025
e4f894d
fixing grib-index
flyIchtus Nov 17, 2025
d37e2b6
remove 'shit_time_request' keyword
flyIchtus Nov 17, 2025
5930d76
Merge branch 'main' into feat/abstracting_accumulation
flyIchtus Nov 17, 2025
f78df25
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 17, 2025
9d2f9d4
fix improt
flyIchtus Nov 17, 2025
c42c17d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 17, 2025
f12b5f9
adapt test files
flyIchtus Nov 17, 2025
e4bb813
remove spurious test accumulate;yaml
flyIchtus Nov 17, 2025
cda0043
Merge branch 'main' into feat/abstracting_accumulation
flyIchtus Nov 21, 2025
9927cee
fix user_time in mars requests
flyIchtus Nov 21, 2025
4a11c5a
fixing tests related to accumulations
flyIchtus Nov 21, 2025
11d8f2e
passing tests with accumulation
flyIchtus Nov 21, 2025
481abe5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 21, 2025
9b1c6e8
remove s3 winds test
flyIchtus Nov 21, 2025
508abc0
cleanup
flyIchtus Nov 21, 2025
d268fdd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 21, 2025
caf8f34
added 24h era test recipe
floriankrb Nov 26, 2025
6f62cf1
added failing test
floriankrb Nov 26, 2025
d3c5c66
more tests for accumulation. era-1-enda passes.
floriankrb Nov 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions docs/howtos/create/05-create-accumulations.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
.. _create-accumulations:

##########################################
Create a dataset with accumulated fields
##########################################

Many fields come as accumulations over time, e.g `tp` (total
precipitations), `sd` (now depth) or `ssr` (surface shortwave
radiation). Given one dataset, one may want to accumulate some of its
fields on specific periods of time.

This depends on the data's native format. For an accumulated field (say
`tp` for simplicity), one needs to know:

- the `accumulation_period` over which to accumulate (e.g 6h).
- the desired `validity_time` at which accumulation stops and for which
the value is valid.
- the `data_accumulation_period`, that is the duration over which the
data is already accumulated.

The resulting field is then "`tp` accumulated over `accumulation_period`
hours up to `validity_time`". In a common case, dataset features, e.g.,
1h-accumulated `tp` at a 1 hour frequency, and each raw file features
`tp` as accumulated over the *last* hour. So having 6h-accumulated `tp`
consists in taking all 6 files before (and including) `validity_time`
and summing fields in them.

The resulting accumulated field can be treated as a normal anemoi
`source` in recipes (e.g, filters can be applied to the source).

Note that depending on how your native dataset is built (e.g, your
native files feature the accumulation on the *next* hour), the
calculation can be very different. See $Subtleties below with the
associated recipes.

***************************************
Using accumulations in recipes : mars
***************************************

In the example below we see recipes to create accumulations from MARS
data. To keep older recipes working, there are two equivalent ways to do
so. The first one is a generic way working for MARS and grib-index
sources.

.. literalinclude:: yaml/recipe-accumulate-era.yaml

That recipe will generate the following dataset:

.. code:: bash

📦 Path : recipe-accumulate.zarr
🔢 Format version: 0.30.0

📅 Start : 2021-01-10 18:00
📅 End : 2021-01-12 12:00
⏰ Frequency : 6h
🚫 Missing : 0
🌎 Resolution : 20.0
🌎 Field shape: [162]

📐 Shape : 8 × 2 × 1 × 162 (10.1 KiB)
💽 Size : 23.2 KiB (23.2 KiB)
📁 Files : 52

Index │ Variable │ Min │ Max │ Mean │ Stdev
──────┼──────────┼─────┼───────────┼─────────────┼───────────
0 │ cp │ 0 │ 0.0110734 │ 0.000244731 │ 0.00103593
1 │ tp │ 0 │ 0.0333021 │ 0.00058075 │ 0.00210331
──────┴──────────┴─────┴───────────┴─────────────┴───────────
🔋 Dataset ready, last update 26 seconds ago.
📊 Statistics ready.

The "legacy" way to do is the following (syntax is only slightly
different)

.. literalinclude:: yaml/recipe-accumulation-era.yaml

The resulting dataset is:

.. code:: bash

📦 Path : recipe-accumulation.zarr
🔢 Format version: 0.30.0

📅 Start : 2021-01-10 18:00
📅 End : 2021-01-12 12:00
⏰ Frequency : 6h
🚫 Missing : 0
🌎 Resolution : 20.0
🌎 Field shape: [9, 18]

📐 Shape : 8 × 2 × 1 × 162 (10.1 KiB)
💽 Size : 22.2 KiB (22.2 KiB)
📁 Files : 52

Index │ Variable │ Min │ Max │ Mean │ Stdev
──────┼──────────┼─────┼───────────┼─────────────┼───────────
0 │ cp │ 0 │ 0.0110734 │ 0.000244739 │ 0.00103593
1 │ tp │ 0 │ 0.0333023 │ 0.000580769 │ 0.00210332
──────┴──────────┴─────┴───────────┴─────────────┴───────────
🔋 Dataset ready, last update 3 minutes ago.
📊 Statistics ready.

Note that statitics for the two datasets are equal up to `1e-6`, this is
due to rounding errors that can accumulate. Larger discrepancies are a
sign something might be wrong.

*********************************************
Using accumulations in recipes : grib files
*********************************************

If your data source is grib files, you can use a grib-index as a source.
First create a `grib-index
<https://anemoi.readthedocs.io/projects/datasets/en/latest/howtos/create/01-grib-data.html#using-an-index-file>`_

that creates a database to query fields. Then, say we want to accumulate
3h-data over 6h.

.. literalinclude:: yaml/recipe-accumulate-gribindex.yaml

Note that we also added a filter at the end of the recipe to rename `tp`
to `tp_6h`. The frequency of the dataset is `1h`, so the accumulation is
a moving window.

************
Subtleties
************

Some datasets (such as ERA5) feature
17 changes: 17 additions & 0 deletions docs/howtos/create/yaml/recipe-accumulate-era.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
dates:
start: 2021-01-10 18:00:00
end: 2021-01-12 12:00:00
frequency: 6h

input:
accumulate:
source:
mars:
expver: "0001"
class: ea
stream: oper
grid: 20./20.
levtype: sfc
param: [ tp, cp]
accumulation_period: 6
data_accumulation_period: 6 # this argument will be ignored because of ERA-like accumulations
18 changes: 18 additions & 0 deletions docs/howtos/create/yaml/recipe-accumulate-gribindex.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
dates:
end: 2020-01-06 12:00:00+00:00
start: 2020-01-03 06:00:00+00:00
frequency: 1h

input:
pipe:
- accumulate:
source:
grib-index:
indexdb: /path/to/gribindex.db
levtype: sfc
param:
- tp
accumulation_period: 6
data_accumulation_period: 3
- rename:
tp: tp_6h
14 changes: 14 additions & 0 deletions docs/howtos/create/yaml/recipe-accumulation-era.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
dates:
start: 2021-01-10 18:00:00
end: 2021-01-12 12:00:00
frequency: 6h

input:
accumulations:
expver: "0001"
class: ea
stream: oper
grid: 20./20.
levtype: sfc
param: [ tp, cp]
accumulation_period: 6
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ dependencies = [
"jsonschema",
"numcodecs<0.16", # Until we move to zarr3
"numpy",
"pytest>=8.4.1",
"pytest-xdist>=3.7",
"pyyaml",
"ruamel-yaml",
"semantic-version",
Expand Down
16 changes: 4 additions & 12 deletions src/anemoi/datasets/create/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,7 @@ def check_name(
resolution: str,
dates: list[datetime.datetime],
frequency: datetime.timedelta,
raise_exception: bool = True,
is_test: bool = False,
raise_exception: bool = False,
) -> None:
"""Check the name of the dataset.

Expand All @@ -271,15 +270,13 @@ def check_name(
The frequency of the dataset.
raise_exception : bool, optional
Whether to raise an exception if the name is invalid.
is_test : bool, optional
Whether this is a test.
"""
basename, _ = os.path.splitext(os.path.basename(self.path))
try:
DatasetName(basename, resolution, dates[0], dates[-1], frequency).raise_if_not_valid()
except Exception as e:
if raise_exception and not is_test:
raise e
if raise_exception:
raise
else:
LOG.warning(f"Dataset name error: {e}")

Expand Down Expand Up @@ -577,7 +574,6 @@ def __init__(
use_threads: bool = False,
statistics_temp_dir: str | None = None,
progress: Any = None,
test: bool = False,
cache: str | None = None,
**kwargs: Any,
):
Expand All @@ -599,8 +595,6 @@ def __init__(
The directory for temporary statistics.
progress : Any, optional
The progress indicator.
test : bool, optional
Whether this is a test.
cache : Optional[str], optional
The cache directory.
"""
Expand All @@ -613,9 +607,8 @@ def __init__(
self.use_threads = use_threads
self.statistics_temp_dir = statistics_temp_dir
self.progress = progress
self.test = test

self.main_config = loader_config(config, is_test=test)
self.main_config = loader_config(config)

# self.registry.delete() ??
self.tmp_statistics.delete()
Expand Down Expand Up @@ -748,7 +741,6 @@ def _run(self) -> int:

self.dataset.check_name(
raise_exception=self.check_name,
is_test=self.test,
resolution=resolution,
dates=dates,
frequency=frequency,
Expand Down
59 changes: 4 additions & 55 deletions src/anemoi/datasets/create/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
from anemoi.utils.config import load_any_dict_format
from earthkit.data.core.order import normalize_order_by

from anemoi.datasets.dates.groups import Groups

LOG = logging.getLogger(__name__)


Expand Down Expand Up @@ -340,72 +338,20 @@ def _prepare_serialisation(o: Any) -> Any:
return str(o)


def set_to_test_mode(cfg: dict) -> None:
"""Modifies the configuration to run in test mode.

Parameters
----------
cfg : dict
The configuration dictionary.
"""
NUMBER_OF_DATES = 4

LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
groups = Groups(**LoadersConfig(cfg).dates)

dates = groups.provider.values
cfg["dates"] = dict(
start=dates[0],
end=dates[NUMBER_OF_DATES - 1],
frequency=groups.provider.frequency,
group_by=NUMBER_OF_DATES,
)

num_ensembles = count_ensembles(cfg)

def set_element_to_test(obj):
if isinstance(obj, (list, tuple)):
for v in obj:
set_element_to_test(v)
return
if isinstance(obj, (dict, DotDict)):
if "grid" in obj and num_ensembles > 1:
previous = obj["grid"]
obj["grid"] = "20./20."
LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
if "number" in obj and num_ensembles > 1:
if isinstance(obj["number"], (list, tuple)):
previous = obj["number"]
obj["number"] = previous[0:3]
LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
for k, v in obj.items():
set_element_to_test(v)
if "constants" in obj:
constants = obj["constants"]
if "param" in constants and isinstance(constants["param"], list):
constants["param"] = ["cos_latitude"]

set_element_to_test(cfg)


def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
def loader_config(config: dict) -> LoadersConfig:
"""Loads and validates the configuration for dataset loaders.

Parameters
----------
config : dict
The configuration dictionary.
is_test : bool, optional
Whether to run in test mode. Defaults to False.

Returns
-------
LoadersConfig
The validated configuration object.
"""
config = Config(config)
if is_test:
set_to_test_mode(config)
obj = LoadersConfig(config)

# yaml round trip to check that serialisation works as expected
Expand All @@ -426,6 +372,9 @@ def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
LOG.info(f"Setting env variable {k}={v}")
os.environ[k] = str(v)

# Used by pytest only
# copy.pop('checks', None)

return copy


Expand Down
4 changes: 1 addition & 3 deletions src/anemoi/datasets/create/input/result/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,7 @@ def get_cube(self) -> Any:
LOG.debug(f"Sorting done in {seconds_to_human(time.time()-start)}.")
except ValueError:
self.explain(ds, order_by, remapping=remapping, patches=patches)
# raise ValueError(f"Error in {self}")
exit(1)
raise ValueError(f"Error in {self}")

if LOG.isEnabledFor(logging.DEBUG):
LOG.debug("Cube shape: %s", cube)
Expand Down Expand Up @@ -497,7 +496,6 @@ def explain(self, ds: Any, *args: Any, remapping: Any, patches: Any) -> None:
print()
print("❌" * 40)
print()
exit(1)

def build_coords(self) -> None:
"""Build the coordinates for the result."""
Expand Down
Loading
Loading