Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:ecmwf/anemoi-datasets into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
b8raoult committed Nov 14, 2024
2 parents 41243db + bc374e0 commit 10063ea
Show file tree
Hide file tree
Showing 26 changed files with 1,664 additions and 188 deletions.
11 changes: 3 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
- id: python-check-blanket-noqa # Check for # noqa: all
- id: python-no-log-warn # Check for log.warn
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.8.0
rev: 24.10.0
hooks:
- id: black
args: [--line-length=120]
Expand All @@ -40,7 +40,7 @@ repos:
- --force-single-line-imports
- --profile black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.9
rev: v0.7.2
hooks:
- id: ruff
args:
Expand All @@ -59,13 +59,8 @@ repos:
hooks:
- id: rstfmt
exclude: 'cli/.*' # Because we use argparse
- repo: https://github.com/b8raoult/pre-commit-docconvert
rev: "0.1.5"
hooks:
- id: docconvert
args: ["numpy"]
- repo: https://github.com/tox-dev/pyproject-fmt
rev: "2.2.4"
rev: "v2.5.0"
hooks:
- id: pyproject-fmt

Expand Down
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@ Keep it human-readable, your future self will thank you!

## [Unreleased](https://github.com/ecmwf/anemoi-datasets/compare/0.5.8...HEAD)

### Changed

### Added

- Call filters from anemoi-transform
- make test optional when adls is not installed Pull request #110


## [0.5.8](https://github.com/ecmwf/anemoi-datasets/compare/0.5.7...0.5.8) - 2024-10-26

### Changed
Expand All @@ -34,6 +38,7 @@ Keep it human-readable, your future self will thank you!

### Changed

- Upload with ssh (experimental)
- Remove upstream dependencies from downstream-ci workflow (temporary) (#83)
- ci: pin python versions to 3.9 ... 3.12 for checks (#93)
- Fix `__version__` import in init
Expand Down
10 changes: 10 additions & 0 deletions docs/using/missing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@
Managing missing dates
########################

*********************************************
Managing missing dates with anemoi-training
*********************************************

Anemoi-training has internal handling of missing dates, and will
calculate the valid date indices used during training using the
``missing`` property. Consequenctly, when training a model with
anemoi-training, you should `not` specify a method to deal with missing
dates in the dataloader configuration file.

**************************************************
Filling the missing dates with artificial values
**************************************************
Expand Down
7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
#!/usr/bin/env python
# (C) Copyright 2024 ECMWF.
# (C) Copyright 2024 Anemoi contributors.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
#
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/

[build-system]
requires = [
"setuptools>=60",
Expand Down Expand Up @@ -42,6 +40,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
Expand Down
101 changes: 34 additions & 67 deletions src/anemoi/datasets/commands/copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@

import logging
import os
import shutil
import sys
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed

import tqdm
from anemoi.utils.s3 import download
from anemoi.utils.s3 import upload
from anemoi.utils.remote import Transfer
from anemoi.utils.remote import TransferMethodNotImplementedError

from . import Command

Expand All @@ -29,54 +28,7 @@
isatty = False


class S3Downloader:
def __init__(self, source, target, transfers, overwrite, resume, verbosity, **kwargs):
self.source = source
self.target = target
self.transfers = transfers
self.overwrite = overwrite
self.resume = resume
self.verbosity = verbosity

def run(self):
if self.target == ".":
self.target = os.path.basename(self.source)

if self.overwrite and os.path.exists(self.target):
LOG.info(f"Deleting {self.target}")
shutil.rmtree(self.target)

download(
self.source + "/" if not self.source.endswith("/") else self.source,
self.target,
overwrite=self.overwrite,
resume=self.resume,
verbosity=self.verbosity,
threads=self.transfers,
)


class S3Uploader:
def __init__(self, source, target, transfers, overwrite, resume, verbosity, **kwargs):
self.source = source
self.target = target
self.transfers = transfers
self.overwrite = overwrite
self.resume = resume
self.verbosity = verbosity

def run(self):
upload(
self.source,
self.target,
overwrite=self.overwrite,
resume=self.resume,
verbosity=self.verbosity,
threads=self.transfers,
)


class DefaultCopier:
class ZarrCopier:
def __init__(self, source, target, transfers, block_size, overwrite, resume, verbosity, nested, rechunk, **kwargs):
self.source = source
self.target = target
Expand All @@ -90,6 +42,14 @@ def __init__(self, source, target, transfers, block_size, overwrite, resume, ver

self.rechunking = rechunk.split(",") if rechunk else []

source_is_ssh = self.source.startswith("ssh://")
target_is_ssh = self.target.startswith("ssh://")

if source_is_ssh or target_is_ssh:
if self.rechunk:
raise NotImplementedError("Rechunking with SSH not implemented.")
assert NotImplementedError("SSH not implemented.")

def _store(self, path, nested=False):
if nested:
import zarr
Expand Down Expand Up @@ -337,26 +297,33 @@ def run(self, args):
if args.source == args.target:
raise ValueError("Source and target are the same.")

kwargs = vars(args)

if args.overwrite and args.resume:
raise ValueError("Cannot use --overwrite and --resume together.")

source_in_s3 = args.source.startswith("s3://")
target_in_s3 = args.target.startswith("s3://")

copier = None

if args.rechunk or (source_in_s3 and target_in_s3):
copier = DefaultCopier(**kwargs)
else:
if source_in_s3:
copier = S3Downloader(**kwargs)

if target_in_s3:
copier = S3Uploader(**kwargs)

if not args.rechunk:
# rechunking is only supported for ZARR datasets, it is implemented in this package
try:
if args.source.startswith("s3://") and not args.source.endswith("/"):
args.source = args.source + "/"
copier = Transfer(
args.source,
args.target,
overwrite=args.overwrite,
resume=args.resume,
verbosity=args.verbosity,
threads=args.transfers,
)
copier.run()
return
except TransferMethodNotImplementedError:
# DataTransfer relies on anemoi-utils which is agnostic to the source and target format
# it transfers file and folders, ignoring that it is zarr data
# if it is not implemented, we fallback to the ZarrCopier
pass

copier = ZarrCopier(**vars(args))
copier.run()
return


class Copy(CopyMixin, Command):
Expand Down
2 changes: 1 addition & 1 deletion src/anemoi/datasets/create/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def check_parsed(self):
self.messages.append(
f"the dataset name {self} does not follow naming convention. "
"See here for details: "
"https://confluence.ecmwf.int/display/DWF/Datasets+available+as+zarr"
"https://anemoi-registry.readthedocs.io/en/latest/naming-conventions.html"
)

def check_resolution(self, resolution):
Expand Down
5 changes: 2 additions & 3 deletions src/anemoi/datasets/create/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,14 @@ def _prepare_serialisation(o):
def set_to_test_mode(cfg):
NUMBER_OF_DATES = 4

dates = cfg["dates"]
LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
groups = Groups(**LoadersConfig(cfg).dates)

dates = groups.dates
dates = groups.provider.values
cfg["dates"] = dict(
start=dates[0],
end=dates[NUMBER_OF_DATES - 1],
frequency=dates.frequency,
frequency=groups.provider.frequency,
group_by=NUMBER_OF_DATES,
)

Expand Down
25 changes: 20 additions & 5 deletions src/anemoi/datasets/create/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def assert_is_fieldlist(obj):

def import_function(name, kind):

from anemoi.transform.filters import filter_registry

name = name.replace("-", "_")

plugins = {}
Expand All @@ -30,8 +32,21 @@ def import_function(name, kind):
if name in plugins:
return plugins[name].load()

module = importlib.import_module(
f".{kind}.{name}",
package=__name__,
)
return module.execute
try:
module = importlib.import_module(
f".{kind}.{name}",
package=__name__,
)
return module.execute
except ModuleNotFoundError:
pass

if kind == "filters":
if filter_registry.lookup(name, return_none=True):

def proc(context, data, *args, **kwargs):
return filter_registry.create(name, *args, **kwargs)(data)

return proc

raise ValueError(f"Unknown {kind} '{name}'")
6 changes: 0 additions & 6 deletions src/anemoi/datasets/create/functions/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,3 @@
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#
4 changes: 3 additions & 1 deletion src/anemoi/datasets/create/functions/filters/rename.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ class RenamedFieldMapping:
def __init__(self, field, what, renaming):
self.field = field
self.what = what
self.renaming = renaming
self.renaming = {}
for k, v in renaming.items():
self.renaming[k] = {str(a): str(b) for a, b in v.items()}

def metadata(self, key=None, **kwargs):
if key is None:
Expand Down
Loading

0 comments on commit 10063ea

Please sign in to comment.