Skip to content

Commit bd92782

Browse files
Adds open_datatree and load_datatree to the tutorial module (#10082)
* added tutorial.open_datatree and tutorial.load_datatree * updated tests to use fixture * added whats-new.rst and api.rst * added suggestions fixed formatting for docs * Added longname of GPM_3IMERGHH_07 --------- Co-authored-by: Deepak Cherian <[email protected]>
1 parent b7ec48a commit bd92782

File tree

4 files changed

+169
-11
lines changed

4 files changed

+169
-11
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,6 +1591,8 @@ Tutorial
15911591

15921592
tutorial.open_dataset
15931593
tutorial.load_dataset
1594+
tutorial.open_datatree
1595+
tutorial.load_datatree
15941596

15951597
Testing
15961598
=======

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ v2025.02.0 (unreleased)
2121

2222
New Features
2323
~~~~~~~~~~~~
24+
- Added :py:meth:`tutorial.open_datatree` and :py:meth:`tutorial.load_datatree`
25+
By `Eni Awowale <https://github.com/eni-awowale>`_.
2426
- Added :py:meth:`DataTree.filter_like` to conveniently restructure a DataTree like another DataTree (:issue:`10096`, :pull:`10097`).
2527
By `Kobe Vandelanotte <https://github.com/kobebryant432>`_.
2628
- Added :py:meth:`Coordinates.from_xindex` as convenience for creating a new :py:class:`Coordinates` object

xarray/tests/test_tutorial.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,15 @@
11
from __future__ import annotations
22

3-
import pytest
4-
5-
from xarray import DataArray, tutorial
6-
from xarray.tests import assert_identical, network
3+
from xarray import DataArray, DataTree, tutorial
4+
from xarray.testing import assert_identical
5+
from xarray.tests import network
76

87

98
@network
109
class TestLoadDataset:
11-
@pytest.fixture(autouse=True)
12-
def setUp(self):
13-
self.testfile = "tiny"
14-
1510
def test_download_from_github(self, tmp_path) -> None:
1611
cache_dir = tmp_path / tutorial._default_cache_dir_name
17-
ds = tutorial.open_dataset(self.testfile, cache_dir=cache_dir).load()
12+
ds = tutorial.open_dataset("tiny", cache_dir=cache_dir).load()
1813
tiny = DataArray(range(5), name="tiny").to_dataset()
1914
assert_identical(ds, tiny)
2015

@@ -24,7 +19,27 @@ def test_download_from_github_load_without_cache(
2419
cache_dir = tmp_path / tutorial._default_cache_dir_name
2520

2621
ds_nocache = tutorial.open_dataset(
27-
self.testfile, cache=False, cache_dir=cache_dir
22+
"tiny", cache=False, cache_dir=cache_dir
23+
).load()
24+
ds_cache = tutorial.open_dataset("tiny", cache_dir=cache_dir).load()
25+
assert_identical(ds_cache, ds_nocache)
26+
27+
28+
@network
29+
class TestLoadDataTree:
30+
def test_download_from_github(self, tmp_path) -> None:
31+
cache_dir = tmp_path / tutorial._default_cache_dir_name
32+
ds = tutorial.open_datatree("tiny", cache_dir=cache_dir).load()
33+
tiny = DataTree.from_dict({"/": DataArray(range(5), name="tiny").to_dataset()})
34+
assert_identical(ds, tiny)
35+
36+
def test_download_from_github_load_without_cache(
37+
self, tmp_path, monkeypatch
38+
) -> None:
39+
cache_dir = tmp_path / tutorial._default_cache_dir_name
40+
41+
ds_nocache = tutorial.open_datatree(
42+
"tiny", cache=False, cache_dir=cache_dir
2843
).load()
29-
ds_cache = tutorial.open_dataset(self.testfile, cache_dir=cache_dir).load()
44+
ds_cache = tutorial.open_datatree("tiny", cache_dir=cache_dir).load()
3045
assert_identical(ds_cache, ds_nocache)

xarray/tutorial.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
import numpy as np
1717

1818
from xarray.backends.api import open_dataset as _open_dataset
19+
from xarray.backends.api import open_datatree as _open_datatree
1920
from xarray.core.dataarray import DataArray
2021
from xarray.core.dataset import Dataset
22+
from xarray.core.datatree import DataTree
2123

2224
if TYPE_CHECKING:
2325
from xarray.backends.api import T_Engine
@@ -248,3 +250,140 @@ def scatter_example_dataset(*, seed: None | int = None) -> Dataset:
248250
ds.B.attrs["units"] = "Bunits"
249251

250252
return ds
253+
254+
255+
def open_datatree(
256+
name: str,
257+
cache: bool = True,
258+
cache_dir: None | str | os.PathLike = None,
259+
*,
260+
engine: T_Engine = None,
261+
**kws,
262+
) -> DataTree:
263+
"""
264+
Open a dataset as a `DataTree` from the online repository (requires internet).
265+
266+
If a local copy is found then always use that to avoid network traffic.
267+
268+
Available datasets:
269+
270+
* ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z
271+
* ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z
272+
* ``"air_temperature"``: NCEP reanalysis subset
273+
* ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients
274+
* ``"basin_mask"``: Dataset with ocean basins marked using integers
275+
* ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1
276+
* ``"rasm"``: Output of the Regional Arctic System Model (RASM)
277+
* ``"ROMS_example"``: Regional Ocean Model System (ROMS) output
278+
* ``"tiny"``: small synthetic dataset with a 1D data variable
279+
* ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK
280+
* ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data
281+
* ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages
282+
283+
Parameters
284+
----------
285+
name : str
286+
Name of the file containing the dataset.
287+
e.g. 'air_temperature'
288+
cache_dir : path-like, optional
289+
The directory in which to search for and write cached data.
290+
cache : bool, optional
291+
If True, then cache data locally for use on subsequent calls
292+
**kws : dict, optional
293+
Passed to xarray.open_dataset
294+
295+
See Also
296+
--------
297+
tutorial.load_datatree
298+
open_datatree
299+
"""
300+
try:
301+
import pooch
302+
except ImportError as e:
303+
raise ImportError(
304+
"tutorial.open_dataset depends on pooch to download and manage datasets."
305+
" To proceed please install pooch."
306+
) from e
307+
308+
logger = pooch.get_logger()
309+
logger.setLevel("WARNING")
310+
311+
cache_dir = _construct_cache_dir(cache_dir)
312+
if name in external_urls:
313+
url = external_urls[name]
314+
else:
315+
path = pathlib.Path(name)
316+
if not path.suffix:
317+
# process the name
318+
default_extension = ".nc"
319+
if engine is None:
320+
_check_netcdf_engine_installed(name)
321+
path = path.with_suffix(default_extension)
322+
elif path.suffix == ".grib":
323+
if engine is None:
324+
engine = "cfgrib"
325+
try:
326+
import cfgrib # noqa: F401
327+
except ImportError as e:
328+
raise ImportError(
329+
"Reading this tutorial dataset requires the cfgrib package."
330+
) from e
331+
332+
url = f"{base_url}/raw/{version}/{path.name}"
333+
334+
headers = {"User-Agent": f"xarray {sys.modules['xarray'].__version__}"}
335+
downloader = pooch.HTTPDownloader(headers=headers)
336+
337+
# retrieve the file
338+
filepath = pooch.retrieve(
339+
url=url, known_hash=None, path=cache_dir, downloader=downloader
340+
)
341+
ds = _open_datatree(filepath, engine=engine, **kws)
342+
if not cache:
343+
ds = ds.load()
344+
pathlib.Path(filepath).unlink()
345+
346+
return ds
347+
348+
349+
def load_datatree(*args, **kwargs) -> DataTree:
350+
"""
351+
Open, load into memory (as a `DataTree`), and close a dataset from the online repository
352+
(requires internet).
353+
354+
If a local copy is found then always use that to avoid network traffic.
355+
356+
Available datasets:
357+
358+
* ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z
359+
* ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z
360+
* ``"air_temperature"``: NCEP reanalysis subset
361+
* ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients
362+
* ``"basin_mask"``: Dataset with ocean basins marked using integers
363+
* ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1
364+
* ``"rasm"``: Output of the Regional Arctic System Model (RASM)
365+
* ``"ROMS_example"``: Regional Ocean Model System (ROMS) output
366+
* ``"tiny"``: small synthetic dataset with a 1D data variable
367+
* ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK
368+
* ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data
369+
* ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages
370+
371+
Parameters
372+
----------
373+
name : str
374+
Name of the file containing the dataset.
375+
e.g. 'air_temperature'
376+
cache_dir : path-like, optional
377+
The directory in which to search for and write cached data.
378+
cache : bool, optional
379+
If True, then cache data locally for use on subsequent calls
380+
**kws : dict, optional
381+
Passed to xarray.open_datatree
382+
383+
See Also
384+
--------
385+
tutorial.open_datatree
386+
open_datatree
387+
"""
388+
with open_datatree(*args, **kwargs) as ds:
389+
return ds.load()

0 commit comments

Comments
 (0)