|
16 | 16 | import numpy as np
|
17 | 17 |
|
18 | 18 | from xarray.backends.api import open_dataset as _open_dataset
|
| 19 | +from xarray.backends.api import open_datatree as _open_datatree |
19 | 20 | from xarray.core.dataarray import DataArray
|
20 | 21 | from xarray.core.dataset import Dataset
|
| 22 | +from xarray.core.datatree import DataTree |
21 | 23 |
|
22 | 24 | if TYPE_CHECKING:
|
23 | 25 | from xarray.backends.api import T_Engine
|
@@ -248,3 +250,140 @@ def scatter_example_dataset(*, seed: None | int = None) -> Dataset:
|
248 | 250 | ds.B.attrs["units"] = "Bunits"
|
249 | 251 |
|
250 | 252 | return ds
|
| 253 | + |
| 254 | + |
| 255 | +def open_datatree( |
| 256 | + name: str, |
| 257 | + cache: bool = True, |
| 258 | + cache_dir: None | str | os.PathLike = None, |
| 259 | + *, |
| 260 | + engine: T_Engine = None, |
| 261 | + **kws, |
| 262 | +) -> DataTree: |
| 263 | + """ |
| 264 | + Open a dataset as a `DataTree` from the online repository (requires internet). |
| 265 | +
|
| 266 | + If a local copy is found then always use that to avoid network traffic. |
| 267 | +
|
| 268 | + Available datasets: |
| 269 | +
|
| 270 | + * ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z |
| 271 | + * ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z |
| 272 | + * ``"air_temperature"``: NCEP reanalysis subset |
| 273 | + * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients |
| 274 | + * ``"basin_mask"``: Dataset with ocean basins marked using integers |
| 275 | + * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 |
| 276 | + * ``"rasm"``: Output of the Regional Arctic System Model (RASM) |
| 277 | + * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output |
| 278 | + * ``"tiny"``: small synthetic dataset with a 1D data variable |
| 279 | + * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK |
| 280 | + * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data |
| 281 | + * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages |
| 282 | +
|
| 283 | + Parameters |
| 284 | + ---------- |
| 285 | + name : str |
| 286 | + Name of the file containing the dataset. |
| 287 | + e.g. 'air_temperature' |
| 288 | + cache_dir : path-like, optional |
| 289 | + The directory in which to search for and write cached data. |
| 290 | + cache : bool, optional |
| 291 | + If True, then cache data locally for use on subsequent calls |
| 292 | + **kws : dict, optional |
| 293 | + Passed to xarray.open_dataset |
| 294 | +
|
| 295 | + See Also |
| 296 | + -------- |
| 297 | + tutorial.load_datatree |
| 298 | + open_datatree |
| 299 | + """ |
| 300 | + try: |
| 301 | + import pooch |
| 302 | + except ImportError as e: |
| 303 | + raise ImportError( |
| 304 | + "tutorial.open_dataset depends on pooch to download and manage datasets." |
| 305 | + " To proceed please install pooch." |
| 306 | + ) from e |
| 307 | + |
| 308 | + logger = pooch.get_logger() |
| 309 | + logger.setLevel("WARNING") |
| 310 | + |
| 311 | + cache_dir = _construct_cache_dir(cache_dir) |
| 312 | + if name in external_urls: |
| 313 | + url = external_urls[name] |
| 314 | + else: |
| 315 | + path = pathlib.Path(name) |
| 316 | + if not path.suffix: |
| 317 | + # process the name |
| 318 | + default_extension = ".nc" |
| 319 | + if engine is None: |
| 320 | + _check_netcdf_engine_installed(name) |
| 321 | + path = path.with_suffix(default_extension) |
| 322 | + elif path.suffix == ".grib": |
| 323 | + if engine is None: |
| 324 | + engine = "cfgrib" |
| 325 | + try: |
| 326 | + import cfgrib # noqa: F401 |
| 327 | + except ImportError as e: |
| 328 | + raise ImportError( |
| 329 | + "Reading this tutorial dataset requires the cfgrib package." |
| 330 | + ) from e |
| 331 | + |
| 332 | + url = f"{base_url}/raw/{version}/{path.name}" |
| 333 | + |
| 334 | + headers = {"User-Agent": f"xarray {sys.modules['xarray'].__version__}"} |
| 335 | + downloader = pooch.HTTPDownloader(headers=headers) |
| 336 | + |
| 337 | + # retrieve the file |
| 338 | + filepath = pooch.retrieve( |
| 339 | + url=url, known_hash=None, path=cache_dir, downloader=downloader |
| 340 | + ) |
| 341 | + ds = _open_datatree(filepath, engine=engine, **kws) |
| 342 | + if not cache: |
| 343 | + ds = ds.load() |
| 344 | + pathlib.Path(filepath).unlink() |
| 345 | + |
| 346 | + return ds |
| 347 | + |
| 348 | + |
| 349 | +def load_datatree(*args, **kwargs) -> DataTree: |
| 350 | + """ |
| 351 | + Open, load into memory (as a `DataTree`), and close a dataset from the online repository |
| 352 | + (requires internet). |
| 353 | +
|
| 354 | + If a local copy is found then always use that to avoid network traffic. |
| 355 | +
|
| 356 | + Available datasets: |
| 357 | +
|
| 358 | + * ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z |
| 359 | + * ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z |
| 360 | + * ``"air_temperature"``: NCEP reanalysis subset |
| 361 | + * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients |
| 362 | + * ``"basin_mask"``: Dataset with ocean basins marked using integers |
| 363 | + * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 |
| 364 | + * ``"rasm"``: Output of the Regional Arctic System Model (RASM) |
| 365 | + * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output |
| 366 | + * ``"tiny"``: small synthetic dataset with a 1D data variable |
| 367 | + * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK |
| 368 | + * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data |
| 369 | + * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages |
| 370 | +
|
| 371 | + Parameters |
| 372 | + ---------- |
| 373 | + name : str |
| 374 | + Name of the file containing the dataset. |
| 375 | + e.g. 'air_temperature' |
| 376 | + cache_dir : path-like, optional |
| 377 | + The directory in which to search for and write cached data. |
| 378 | + cache : bool, optional |
| 379 | + If True, then cache data locally for use on subsequent calls |
| 380 | + **kws : dict, optional |
| 381 | + Passed to xarray.open_datatree |
| 382 | +
|
| 383 | + See Also |
| 384 | + -------- |
| 385 | + tutorial.open_datatree |
| 386 | + open_datatree |
| 387 | + """ |
| 388 | + with open_datatree(*args, **kwargs) as ds: |
| 389 | + return ds.load() |
0 commit comments