Skip to content

Commit 0a5fa5e

Browse files
committed
Merge branch 'develop' of github.com:ecmwf/anemoi-datasets into develop
2 parents 7c23b07 + 904e102 commit 0a5fa5e

File tree

18 files changed

+130
-92
lines changed

18 files changed

+130
-92
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ on:
3535
jobs:
3636
# Run CI including downstream packages on self-hosted runners
3737
downstream-ci:
38+
3839
name: downstream-ci
39-
if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
40+
if: ${{ !contains(github.repository, 'private') && (!github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci') }}
4041
uses: ecmwf-actions/downstream-ci/.github/workflows/downstream-ci.yml@main
4142
with:
4243
anemoi-datasets: ecmwf/anemoi-datasets@${{ github.event.pull_request.head.sha || github.sha }}
@@ -46,7 +47,7 @@ jobs:
4647
# Build downstream packages on HPC
4748
downstream-ci-hpc:
4849
name: downstream-ci-hpc
49-
if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
50+
if: ${{ !contains(github.repository, 'private') && (!github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci') }}
5051
uses: ecmwf-actions/downstream-ci/.github/workflows/downstream-ci-hpc.yml@main
5152
with:
5253
anemoi-datasets: ecmwf/anemoi-datasets@${{ github.event.pull_request.head.sha || github.sha }}

.github/workflows/push-to-private.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Push to another repository
1+
name: Push to private repository
22

33
on:
44
push:
@@ -7,21 +7,27 @@ on:
77

88
jobs:
99
push_changes:
10+
if: ${{ !contains(github.repository, 'private') }}
1011
runs-on: ubuntu-latest
1112

1213
steps:
1314
- name: Checkout source repository
1415
uses: actions/checkout@v3
16+
with:
17+
fetch-depth: 0
18+
fetch-tags: true
1519

1620
- name: Set up Git configuration
1721
run: |
1822
git config user.name "github-actions[bot]"
1923
git config user.email "github-actions[bot]@users.noreply.github.com"
2024
25+
- name: Setup SSH key
26+
uses: webfactory/[email protected]
27+
with:
28+
ssh-private-key: ${{ secrets.KEY_TO_PRIVATE }}
29+
2130
- name: Push changes to private repository
22-
# env:
23-
# MLX_TOKEN: ${{ secrets.MLX_TOKEN }}
2431
run: |
25-
git remote add private https://${{ secrets.MLX_TOKEN }}@github.com/ecmwf-lab/anemoi-datasets-private.git
26-
git fetch private
27-
git push private develop
32+
git remote add private [email protected]:${{ github.repository }}-private.git
33+
git push --set-upstream private develop

.github/workflows/python-publish.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ on:
99

1010
jobs:
1111
quality:
12+
if: ${{ !contains(github.repository, 'private') }}
1213
uses: ecmwf-actions/reusable-workflows/.github/workflows/qa-precommit-run.yml@v2
1314
with:
1415
skip-hooks: "no-commit-to-branch"

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ repos:
55
- id: clear-notebooks-output
66
name: clear-notebooks-output
77
files: tools/.*\.ipynb$
8-
stages: [commit]
8+
stages: [pre-commit]
99
language: python
1010
entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace
1111
additional_dependencies: [jupyter]
1212
- repo: https://github.com/pre-commit/pre-commit-hooks
13-
rev: v4.6.0
13+
rev: v5.0.0
1414
hooks:
1515
- id: check-yaml # Check YAML files for syntax errors only
1616
args: [--unsafe, --allow-multiple-documents]
@@ -40,7 +40,7 @@ repos:
4040
- --force-single-line-imports
4141
- --profile black
4242
- repo: https://github.com/astral-sh/ruff-pre-commit
43-
rev: v0.6.4
43+
rev: v0.6.9
4444
hooks:
4545
- id: ruff
4646
# Next line if for documenation cod snippets
@@ -66,7 +66,7 @@ repos:
6666
- id: docconvert
6767
args: ["numpy"]
6868
- repo: https://github.com/tox-dev/pyproject-fmt
69-
rev: "2.2.3"
69+
rev: "2.2.4"
7070
hooks:
7171
- id: pyproject-fmt
7272

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ Keep it human-readable, your future self will thank you!
1515
- Update documentation
1616

1717
- Update documentation
18+
### Changed
19+
20+
- Add `variables_metadata` entry in the dataset metadata
21+
22+
### Changed
23+
24+
- Add `variables_metadata` entry in the dataset metadata
1825

1926
## [0.5.5](https://github.com/ecmwf/anemoi-datasets/compare/0.5.4...0.5.5) - 2024-10-04
2027

@@ -55,6 +62,7 @@ Keep it human-readable, your future self will thank you!
5562
- Bug fix when creating dataset from zarr
5663
- Bug fix with area selection in cutout operation
5764
- add paths-ignore to ci workflow
65+
- call provenance less often
5866

5967
### Removed
6068

src/anemoi/datasets/commands/inspect.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,10 @@ def name_to_index(self):
506506
def variables(self):
507507
return self.metadata["variables"]
508508

509+
@property
510+
def variables_metadata(self):
511+
return self.metadata.get("variables_metadata", {})
512+
509513

510514
class Version0_12(Version0_6):
511515
def details(self):

src/anemoi/datasets/create/__init__.py

Lines changed: 4 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from anemoi.utils.dates import frequency_to_timedelta
2626
from anemoi.utils.humanize import compress_dates
2727
from anemoi.utils.humanize import seconds_to_human
28+
from anemoi.utils.sanitise import sanitise
2829
from earthkit.data.core.order import build_remapping
2930

3031
from anemoi.datasets import MissingDateError
@@ -52,7 +53,7 @@
5253

5354
LOG = logging.getLogger(__name__)
5455

55-
VERSION = "0.20"
56+
VERSION = "0.30"
5657

5758

5859
def json_tidy(o):
@@ -325,43 +326,6 @@ def build_input_(main_config, output_config):
325326
return builder
326327

327328

328-
def tidy_recipe(config: object):
329-
"""Remove potentially private information in the config"""
330-
config = deepcopy(config)
331-
if isinstance(config, (tuple, list)):
332-
return [tidy_recipe(_) for _ in config]
333-
if isinstance(config, (dict, DotDict)):
334-
for k, v in config.items():
335-
if k.startswith("_"):
336-
config[k] = "*** REMOVED FOR SECURITY ***"
337-
else:
338-
config[k] = tidy_recipe(v)
339-
if isinstance(config, str):
340-
if config.startswith("_"):
341-
return "*** REMOVED FOR SECURITY ***"
342-
if config.startswith("s3://"):
343-
return "*** REMOVED FOR SECURITY ***"
344-
if config.startswith("gs://"):
345-
return "*** REMOVED FOR SECURITY ***"
346-
if config.startswith("http"):
347-
return "*** REMOVED FOR SECURITY ***"
348-
if config.startswith("ftp"):
349-
return "*** REMOVED FOR SECURITY ***"
350-
if config.startswith("file"):
351-
return "*** REMOVED FOR SECURITY ***"
352-
if config.startswith("ssh"):
353-
return "*** REMOVED FOR SECURITY ***"
354-
if config.startswith("scp"):
355-
return "*** REMOVED FOR SECURITY ***"
356-
if config.startswith("rsync"):
357-
return "*** REMOVED FOR SECURITY ***"
358-
if config.startswith("/"):
359-
return "*** REMOVED FOR SECURITY ***"
360-
if "@" in config:
361-
return "*** REMOVED FOR SECURITY ***"
362-
return config
363-
364-
365329
class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
366330
dataset_class = NewDataset
367331
def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip
@@ -448,7 +412,7 @@ def _run(self):
448412
metadata.update(self.main_config.get("add_metadata", {}))
449413

450414
metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
451-
metadata["recipe"] = tidy_recipe(self.main_config.get_serialisable_dict())
415+
metadata["recipe"] = sanitise(self.main_config.get_serialisable_dict())
452416

453417
metadata["description"] = self.main_config.description
454418
metadata["licence"] = self.main_config["licence"]
@@ -467,6 +431,7 @@ def _run(self):
467431
metadata["data_request"] = self.minimal_input.data_request
468432
metadata["field_shape"] = self.minimal_input.field_shape
469433
metadata["proj_string"] = self.minimal_input.proj_string
434+
metadata["variables_metadata"] = self.minimal_input.variables_metadata
470435

471436
metadata["start_date"] = dates[0].isoformat()
472437
metadata["end_date"] = dates[-1].isoformat()

src/anemoi/datasets/create/input/result.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,31 @@
3030
LOG = logging.getLogger(__name__)
3131

3232

33+
def _fields_metatata(variables, cube):
34+
assert isinstance(variables, tuple), variables
35+
36+
result = {}
37+
for i, c in enumerate(cube.iterate_cubelets()):
38+
assert c._coords_names[1] == variables[i], (c._coords_names[1], variables[i])
39+
f = cube[c.coords]
40+
md = f.metadata(namespace="mars")
41+
if not md:
42+
md = f.metadata(namespace="default")
43+
44+
if md.get("param") == "~":
45+
md["param"] = f.metadata("param")
46+
assert md["param"] not in ("~", "unknown"), (md, f.metadata("param"))
47+
48+
if md.get("param") == "unknown":
49+
md["param"] = str(f.metadata("paramId", default="unknown"))
50+
# assert md['param'] != 'unknown', (md, f.metadata('param'))
51+
52+
result[variables[i]] = md
53+
54+
assert i + 1 == len(variables), (i + 1, len(variables))
55+
return result
56+
57+
3358
def _data_request(data):
3459
date = None
3560
params_levels = defaultdict(set)
@@ -312,7 +337,10 @@ def _trace_datasource(self, *args, **kwargs):
312337
def build_coords(self):
313338
if self._coords_already_built:
314339
return
315-
from_data = self.get_cube().user_coords
340+
341+
cube = self.get_cube()
342+
343+
from_data = cube.user_coords
316344
from_config = self.context.order_by
317345

318346
keys_from_config = list(from_config.keys())
@@ -359,11 +387,19 @@ def build_coords(self):
359387
self._field_shape = first_field.shape
360388
self._proj_string = first_field.proj_string if hasattr(first_field, "proj_string") else None
361389

390+
self._cube = cube
391+
392+
self._coords_already_built = True
393+
362394
@property
363395
def variables(self):
364396
self.build_coords()
365397
return self._variables
366398

399+
@property
400+
def variables_metadata(self):
401+
return _fields_metatata(self.variables, self._cube)
402+
367403
@property
368404
def ensembles(self):
369405
self.build_coords()

src/anemoi/datasets/create/persistent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,11 @@ def items(self):
5656
yield pickle.load(f)
5757

5858
def add_provenance(self, **kwargs):
59+
path = os.path.join(self.dirname, "provenance.json")
60+
if os.path.exists(path):
61+
return
5962
out = dict(provenance=gather_provenance_info(), **kwargs)
60-
with open(os.path.join(self.dirname, "provenance.json"), "w") as f:
63+
with open(path, "w") as f:
6164
json.dump(out, f)
6265

6366
def add(self, elt, *, key):

src/anemoi/datasets/create/statistics/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,11 @@ def __init__(self, dirname, overwrite=False):
187187

188188
def add_provenance(self, **kwargs):
189189
self.create(exist_ok=True)
190+
path = os.path.join(self.dirname, "provenance.json")
191+
if os.path.exists(path):
192+
return
190193
out = dict(provenance=gather_provenance_info(), **kwargs)
191-
with open(os.path.join(self.dirname, "provenance.json"), "w") as f:
194+
with open(path, "w") as f:
192195
json.dump(out, f)
193196

194197
def create(self, exist_ok):

0 commit comments

Comments
 (0)