Skip to content

Commit e1303ad

Browse files
authored
Feature/new datasets (#99)
* main changes * bugfix * few bugs and add unit tests * work with more planetary computer ds * add optional dependencies
1 parent e5ec079 commit e1303ad

File tree

7 files changed

+122
-1
lines changed

7 files changed

+122
-1
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ optional-dependencies.tests = [
9797
optional-dependencies.xarray = [
9898
"gcsfs",
9999
"kerchunk",
100+
"pandas",
101+
"pystac_client",
102+
"planetary_computer",
100103
]
101104

102105
urls.Documentation = "https://anemoi-datasets.readthedocs.io/"

src/anemoi/datasets/create/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# nor does it submit to any jurisdiction.
88
#
99

10+
import cftime
1011
import datetime
1112
import json
1213
import logging
@@ -66,6 +67,19 @@ def json_tidy(o):
6667
if isinstance(o, datetime.timedelta):
6768
return frequency_to_string(o)
6869

70+
if isinstance(o, cftime.DatetimeJulian):
71+
import pandas as pd
72+
73+
o = pd.Timestamp(
74+
o.year,
75+
o.month,
76+
o.day,
77+
o.hour,
78+
o.minute,
79+
o.second,
80+
)
81+
return o.isoformat()
82+
6983
raise TypeError(repr(o) + " is not JSON serializable")
7084

7185

src/anemoi/datasets/create/functions/sources/xarray/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ def load_one(emoji, context, dates, dataset, options={}, flavour=None, **kwargs)
4646

4747
if isinstance(dataset, str) and ".zarr" in dataset:
4848
data = xr.open_zarr(name_to_zarr_store(dataset), **options)
49+
elif "planetarycomputer" in dataset:
50+
store = name_to_zarr_store(dataset)
51+
if "store" in store:
52+
data = xr.open_zarr(**store)
53+
if "filename_or_obj" in store:
54+
data = xr.open_dataset(**store)
4955
else:
5056
data = xr.open_dataset(dataset, **options)
5157

src/anemoi/datasets/create/functions/sources/xarray/fieldlist.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@ def from_xarray(cls, ds, flavour=None):
6060
flavour = yaml.safe_load(f)
6161
else:
6262
flavour = json.load(f)
63+
64+
if isinstance(flavour, dict):
65+
flavour_coords = [coords["name"] for coords in flavour["rules"].values()]
66+
ds_dims = [dim for dim in ds._dims]
67+
for dim in ds_dims:
68+
if dim in flavour_coords and dim not in ds._coord_names:
69+
ds = ds.assign_coords({dim:ds[dim]})
70+
else:
71+
pass
6372

6473
guess = CoordinateGuesser.from_flavour(ds, flavour)
6574

src/anemoi/datasets/create/functions/sources/xarray/variable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def sel(self, missing, **kwargs):
117117

118118
variable = Variable(
119119
ds=self.ds,
120-
var=self.variable.isel({k: i}),
120+
variable=self.variable.isel({k: i}),
121121
coordinates=coordinates,
122122
grid=self.grid,
123123
time=self.time,

src/anemoi/datasets/data/stores.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,41 @@ def __getitem__(self, key):
8585
return response["Body"].read()
8686

8787

88+
class PlanetaryComputerStore(ReadOnlyStore):
89+
"""We write our own Store to access catalogs on Planetary Computer,
90+
as it requires some extra arguements to use xr.open_zarr.
91+
"""
92+
93+
def __init__(self, data_catalog_id):
94+
self.data_catalog_id = data_catalog_id
95+
96+
def __getitem__(self):
97+
import pystac_client
98+
import planetary_computer
99+
100+
catalog = pystac_client.Client.open(
101+
"https://planetarycomputer.microsoft.com/api/stac/v1/",
102+
modifier=planetary_computer.sign_inplace,
103+
)
104+
collection = catalog.get_collection(self.data_catalog_id)
105+
106+
asset = collection.assets["zarr-abfs"]
107+
108+
if "xarray:storage_options" in asset.extra_fields:
109+
store = {
110+
"store": asset.href,
111+
"storage_options": asset.extra_fields["xarray:storage_options"],
112+
**asset.extra_fields["xarray:open_kwargs"],
113+
}
114+
else:
115+
store = {
116+
"filename_or_obj": asset.href,
117+
**asset.extra_fields["xarray:open_kwargs"],
118+
}
119+
120+
return store
121+
122+
88123
class DebugStore(ReadOnlyStore):
89124
"""A store to debug the zarr loading."""
90125

@@ -121,6 +156,9 @@ def name_to_zarr_store(path_or_url):
121156
if len(bits) == 5 and (bits[1], bits[3], bits[4]) == ("s3", "amazonaws", "com"):
122157
s3_url = f"s3://{bits[0]}{parsed.path}"
123158
store = S3Store(s3_url, region=bits[2])
159+
elif store.startswith("https://planetarycomputer.microsoft.com/"):
160+
data_catalog_id = store.rsplit('/', 1)[-1]
161+
store = PlanetaryComputerStore(data_catalog_id).__getitem__()
124162
else:
125163
store = HTTPStore(store)
126164

tests/xarray/test_zarr.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from anemoi.datasets.create.functions.sources.xarray import XarrayFieldList
1414
from anemoi.datasets.testing import assert_field_list
15+
from anemoi.datasets.data.stores import name_to_zarr_store
1516

1617

1718
def test_arco_era5_1():
@@ -91,6 +92,56 @@ def test_inca_one_date():
9192
print(fs[0].datetime())
9293

9394

95+
def test_noaa_replay():
96+
ds = xr.open_zarr(
97+
"gs://noaa-ufs-gefsv13replay/ufs-hr1/1.00-degree/03h-freq/zarr/fv3.zarr",
98+
storage_options={"token": "anon"},
99+
)
100+
101+
flavour = {
102+
"rules": {
103+
"latitude": {"name": "grid_yt"},
104+
"longitude": {"name": "grid_xt"},
105+
"time": {"name": "time"},
106+
"level": {"name": "pfull"},
107+
},
108+
"levtype": "pl",
109+
}
110+
111+
fs = XarrayFieldList.from_xarray(ds, flavour)
112+
113+
assert_field_list(
114+
fs,
115+
36956954,
116+
"1993-12-31T18:00:00",
117+
"1999-06-13T03:00:00",
118+
)
119+
120+
121+
def test_planetary_computer_conus404():
122+
url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/conus404"
123+
ds = xr.open_zarr(**name_to_zarr_store(url))
124+
125+
flavour = {
126+
"rules": {
127+
"latitude": {"name": "lat"},
128+
"longitude": {"name": "lon"},
129+
"x": {"name": "west_east"},
130+
"y": {"name": "south_north"},
131+
"time": {"name": "time"},
132+
},
133+
}
134+
135+
fs = XarrayFieldList.from_xarray(ds, flavour)
136+
137+
assert_field_list(
138+
fs,
139+
74634912,
140+
"1979-10-01T00:00:00",
141+
"2022-09-30T23:00:00",
142+
)
143+
144+
94145
if __name__ == "__main__":
95146
# test_arco_era5_2()
96147
# exit()

0 commit comments

Comments
 (0)