Skip to content

Commit 87dd985

Browse files
b8raoultfloriankrb
andauthored
Feature/merge (#126)
* save masks to checkpoint * force np.datetime64 is seconds * Call filters from anemoi-transform * when merging datasets, consider missing dates * add gcd for frequency --------- Co-authored-by: Florian Pinault <[email protected]>
1 parent 87a7b97 commit 87dd985

File tree

5 files changed

+50
-17
lines changed

5 files changed

+50
-17
lines changed

β€ŽCHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,13 @@ Keep it human-readable, your future self will thank you!
1010

1111
## [Unreleased](https://github.com/ecmwf/anemoi-datasets/compare/0.5.8...HEAD)
1212

13-
### Changed
13+
14+
### Added
15+
16+
- Call filters from anemoi-transform
1417
- make test optional when adls is not installed Pull request #110
1518

19+
1620
## [0.5.8](https://github.com/ecmwf/anemoi-datasets/compare/0.5.7...0.5.8) - 2024-10-26
1721

1822
### Changed

β€Žsrc/anemoi/datasets/create/functions/__init__.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def assert_is_fieldlist(obj):
2121

2222
def import_function(name, kind):
2323

24+
from anemoi.transform.filters import filter_registry
2425
from anemoi.transforms import Transform as Transform
2526

2627
name = name.replace("-", "_")
@@ -32,8 +33,21 @@ def import_function(name, kind):
3233
if name in plugins:
3334
return plugins[name].load()
3435

35-
module = importlib.import_module(
36-
f".{kind}.{name}",
37-
package=__name__,
38-
)
39-
return module.execute
36+
try:
37+
module = importlib.import_module(
38+
f".{kind}.{name}",
39+
package=__name__,
40+
)
41+
return module.execute
42+
except ModuleNotFoundError:
43+
pass
44+
45+
if kind == "filters":
46+
if filter_registry.lookup(name, return_none=True):
47+
48+
def proc(context, data, *args, **kwargs):
49+
return filter_registry.create(name, *args, **kwargs)(data)
50+
51+
return proc
52+
53+
raise ValueError(f"Unknown {kind} '{name}'")

β€Žsrc/anemoi/datasets/create/functions/filters/rename.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ class RenamedFieldMapping:
2525
def __init__(self, field, what, renaming):
2626
self.field = field
2727
self.what = what
28-
self.renaming = renaming
28+
self.renaming = {}
29+
for k, v in renaming.items():
30+
self.renaming[k] = {str(a): str(b) for a, b in v.items()}
2931

3032
def metadata(self, key=None, **kwargs):
3133
if key is None:

β€Žsrc/anemoi/datasets/data/merge.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,29 @@ def __init__(self, datasets, allow_gaps_in_dates=False):
4040

4141
self.allow_gaps_in_dates = allow_gaps_in_dates
4242

43-
dates = dict()
43+
dates = dict() # date -> (dataset_index, date_index)
4444

4545
for i, d in enumerate(datasets):
4646
for j, date in enumerate(d.dates):
4747
date = date.astype(object)
4848
if date in dates:
49-
d1 = datasets[dates[date][0]]
50-
d2 = datasets[i]
49+
50+
d1 = datasets[dates[date][0]] # Selected
51+
d2 = datasets[i] # The new one
52+
53+
if j in d2.missing:
54+
# LOG.warning(f"Duplicate date {date} found in datasets {d1} and {d2}, but {date} is missing in {d}, ignoring")
55+
continue
56+
57+
k = dates[date][1]
58+
if k in d1.missing:
59+
# LOG.warning(f"Duplicate date {date} found in datasets {d1} and {d2}, but {date} is missing in {d}, ignoring")
60+
dates[date] = (i, j) # Replace the missing date with the new one
61+
continue
62+
5163
raise ValueError(f"Duplicate date {date} found in datasets {d1} and {d2}")
52-
dates[date] = (i, j)
64+
else:
65+
dates[date] = (i, j)
5366

5467
all_dates = sorted(dates)
5568
start = all_dates[0]

β€Žsrc/anemoi/datasets/dates/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
import datetime
1212
import warnings
13+
from functools import reduce
14+
from math import gcd
1315

1416
# from anemoi.utils.dates import as_datetime
1517
from anemoi.utils.dates import DateTimes
@@ -195,18 +197,16 @@ def __init__(self, start, end, steps=[0], years=20, **kwargs):
195197

196198
dates = sorted(dates)
197199

198-
mindelta = None
200+
deltas = set()
199201
for a, b in zip(dates, dates[1:]):
200202
delta = b - a
201203
assert isinstance(delta, datetime.timedelta), delta
202-
if mindelta is None:
203-
mindelta = delta
204-
else:
205-
mindelta = min(mindelta, delta)
204+
deltas.add(delta)
206205

206+
mindelta_seconds = reduce(gcd, [int(delta.total_seconds()) for delta in deltas])
207+
mindelta = datetime.timedelta(seconds=mindelta_seconds)
207208
self.frequency = mindelta
208209
assert mindelta.total_seconds() > 0, mindelta
209-
210210
print("πŸ”₯πŸ”₯πŸ”₯πŸ”₯πŸ”₯πŸ”₯πŸ”₯πŸ”₯πŸ”₯πŸ”₯πŸ”₯πŸ”₯", dates[0], dates[-1], mindelta)
211211

212212
# Use all values between start and end by frequency, and set the ones that are missing

0 commit comments

Comments
Β (0)