From 5c5f222aa33501195d29397cc2064842c83dcb6f Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Sun, 31 Mar 2024 16:10:33 +0100 Subject: [PATCH] update doc --- docs/building/sources/grib.rst | 7 ++--- docs/using/code/concat1.py | 8 +++++ docs/using/code/join1.py | 8 +++++ docs/using/combining.rst | 53 ++++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 22 deletions(-) diff --git a/docs/building/sources/grib.rst b/docs/building/sources/grib.rst index 8b83ab3e..91a3fa75 100644 --- a/docs/building/sources/grib.rst +++ b/docs/building/sources/grib.rst @@ -22,10 +22,9 @@ wildcards_: You can also use the requested `date` to build the filenames. For example It the GRIB files containing the requested data are named -according to the following pattern: -``/path/to/YYYY/MM/YYYYMMDDHH.grib`` with `YYYY` being the year, `MM` -the month, `DD` the day and `HH` the hour, you can use the following -configuration: +according to the following pattern: ``/path/to/YYYY/MM/YYYYMMDDHH.grib`` +with `YYYY` being the year, `MM` the month, `DD` the day and `HH` the +hour, you can use the following configuration: .. literalinclude:: yaml/grib4.yaml :language: yaml diff --git a/docs/using/code/concat1.py b/docs/using/code/concat1.py index ba153551..5a6f2238 100644 --- a/docs/using/code/concat1.py +++ b/docs/using/code/concat1.py @@ -1,3 +1,11 @@ from anemoi.datasets import open_dataset ds = open_dataset("dataset-1979-2000", "dataset-2001-2022") + +# or + +ds = open_dataset(["dataset-1979-2000", "dataset-2001-2022"]) + +# or + +ds = open_dataset(concat=["dataset-1979-2000", "dataset-2001-2022"]) diff --git a/docs/using/code/join1.py b/docs/using/code/join1.py index b10eb394..8da22699 100644 --- a/docs/using/code/join1.py +++ b/docs/using/code/join1.py @@ -1,3 +1,11 @@ from anemoi.datasets import open_dataset ds = open_dataset("dataset1-1979-2022", "dataset2-1979-2022") + +# or + +ds = open_dataset(["dataset1-1979-2022", "dataset2-1979-2022"]) + +# or + +ds = open_dataset(join=["dataset1-1979-2022", "dataset2-1979-2022"]) diff --git a/docs/using/combining.rst b/docs/using/combining.rst index 27fb5c37..74e1cf16 100644 --- a/docs/using/combining.rst +++ b/docs/using/combining.rst @@ -4,10 +4,17 @@ Combining datasets #################### -When combining datasets, the statistics of the first dataset are used by -default. You can change this by setting the :ref:`selecting-statistics` -option to a different dataset, even if it is not part of the -combination. +You can create "virtual" datasets by combining two or more datasets. The +combination will we behave exactly as if you had a single dataset with +all the methods behaving as expected. The package will make sure that +the data is loaded lazily from the original datasets. + +.. warning:: + + When combining datasets, the statistics of the first dataset are used + by default. You can change this by setting the + :ref:`selecting-statistics` option to a different dataset, even if it + is not part of the combination. When combining datasets, the package will check that the datasets are compatible, i.e. that they have the same resolution, the same variables, @@ -17,11 +24,24 @@ e.g. by changing their date range or frequency using :ref:`start`, :ref:`end`, :ref:`frequency`, etc. You can also ask the package to :ref:`automatically adjust ` these attributes. +********************* + Automatic combining +********************* + +If you just provides a list of datasets, the package will automatically +attempt to combine them: + +- If the datasets have the same variable, ensemble dimension and grids, + and dates that, once concatenated, create a continuous range of dates + a constant frequency, the package will combine using concat_. + +- If the datasets have the same dates, ensemble dimension and grids, + the package will combine using join_. + .. _concat: -******** - concat -******** +concat +====== You can concatenate two or more datasets along the dates dimension. The package will check that all datasets are compatible (same resolution, @@ -43,9 +63,8 @@ function. .. _join: -****** - join -****** +join +==== You can join two datasets that have the same dates, combining their variables. @@ -69,9 +88,8 @@ Please note that you can join more than two ``zarr`` files. .. _ensembles: -*********** - ensembles -*********** +ensembles +========= You can combine two or more datasets that have the same dates, variables, grids, etc. along the ensemble dimension. The package will @@ -81,22 +99,21 @@ check that all datasets are compatible. .. _grids: -******* - grids -******* +grids +===== .. literalinclude:: code/grids1_.py The values for ``mode`` are: mode=concatenate -================ +---------------- All the grid points are concatenated, in the order they are given. The `latitudes` and `longitudes` are also concatenated. mode=cutout -=========== +----------- The `cutout` mode only supports two datasets. The first dataset is the considered to be a limited area model (LAM), while the second one is