From 9deadb73064e6a11999bed0ae53d548206934cf9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Aug 2022 20:48:06 -0600 Subject: [PATCH 01/30] Add Kvikio backend entrypoint --- cupy_xarray/kvikio.py | 200 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 3 + 2 files changed, 203 insertions(+) create mode 100644 cupy_xarray/kvikio.py diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py new file mode 100644 index 0000000..313a249 --- /dev/null +++ b/cupy_xarray/kvikio.py @@ -0,0 +1,200 @@ +import os + +import cupy as cp +import numpy as np +import zarr +from xarray import Variable +from xarray.backends import zarr as zarr_backend +from xarray.backends.common import _normalize_path # TODO: can this be public +from xarray.backends.store import StoreBackendEntrypoint +from xarray.backends.zarr import ZarrArrayWrapper, ZarrBackendEntrypoint, ZarrStore +from xarray.core import indexing +from xarray.core.utils import close_on_error # TODO: can this be public. + +try: + import kvikio.zarr + + has_kvikio = True +except ImportError: + has_kvikio = False + + +class CupyZarrArrayWrapper(ZarrArrayWrapper): + def __array__(self): + return self.get_array() + + +class EagerCupyZarrArrayWrapper(ZarrArrayWrapper): + """Used to wrap dimension coordinates.""" + + def __array__(self): + return self.datastore.zarr_group[self.variable_name][:].get() + + def get_array(self): + return np.asarray(self) + + +class GDSZarrStore(ZarrStore): + @classmethod + def open_group( + cls, + store, + mode="r", + synchronizer=None, + group=None, + consolidated=False, + consolidate_on_close=False, + chunk_store=None, + storage_options=None, + append_dim=None, + write_region=None, + safe_chunks=True, + stacklevel=2, + ): + + # zarr doesn't support pathlib.Path objects yet. zarr-python#601 + if isinstance(store, os.PathLike): + store = os.fspath(store) + + open_kwargs = dict( + mode=mode, + synchronizer=synchronizer, + path=group, + ########## NEW STUFF + meta_array=cp.empty(()), + ) + open_kwargs["storage_options"] = storage_options + + # TODO: handle consolidated + assert not consolidated + + if chunk_store: + open_kwargs["chunk_store"] = chunk_store + if consolidated is None: + consolidated = False + + store = kvikio.zarr.GDSStore(store) + + if consolidated is None: + try: + zarr_group = zarr.open_consolidated(store, **open_kwargs) + except KeyError: + warnings.warn( + "Failed to open Zarr store with consolidated metadata, " + "falling back to try reading non-consolidated metadata. " + "This is typically much slower for opening a dataset. " + "To silence this warning, consider:\n" + "1. Consolidating metadata in this existing store with " + "zarr.consolidate_metadata().\n" + "2. Explicitly setting consolidated=False, to avoid trying " + "to read consolidate metadata, or\n" + "3. Explicitly setting consolidated=True, to raise an " + "error in this case instead of falling back to try " + "reading non-consolidated metadata.", + RuntimeWarning, + stacklevel=stacklevel, + ) + zarr_group = zarr.open_group(store, **open_kwargs) + elif consolidated: + # TODO: an option to pass the metadata_key keyword + zarr_group = zarr.open_consolidated(store, **open_kwargs) + else: + zarr_group = zarr.open_group(store, **open_kwargs) + + return cls( + zarr_group, + mode, + consolidate_on_close, + append_dim, + write_region, + safe_chunks, + ) + + def open_store_variable(self, name, zarr_array): + + try_nczarr = self._mode == "r" + dimensions, attributes = zarr_backend._get_zarr_dims_and_attrs( + zarr_array, zarr_backend.DIMENSION_KEY, try_nczarr + ) + + #### Changed from zarr array wrapper + if name in dimensions: + # we want indexed dimensions to be loaded eagerly + # Right now we load in to device and then transfer to host + # But these should be small-ish arrays + # TODO: can we tell GDSStore to load as numpy array directly + # not cupy array? + array_wrapper = EagerCupyZarrArrayWrapper + else: + array_wrapper = CupyZarrArrayWrapper + data = indexing.LazilyIndexedArray(array_wrapper(name, self)) + + attributes = dict(attributes) + encoding = { + "chunks": zarr_array.chunks, + "preferred_chunks": dict(zip(dimensions, zarr_array.chunks)), + "compressor": zarr_array.compressor, + "filters": zarr_array.filters, + } + # _FillValue needs to be in attributes, not encoding, so it will get + # picked up by decode_cf + if getattr(zarr_array, "fill_value") is not None: + attributes["_FillValue"] = zarr_array.fill_value + + return Variable(dimensions, data, attributes, encoding) + + +class KvikioBackendEntrypoint(ZarrBackendEntrypoint): + available = has_kvikio + + # disabled by default + # We need to provide this because of the subclassing from + # ZarrBackendEntrypoint + def guess_can_open(self, filename_or_obj): + return False + + def open_dataset( + self, + filename_or_obj, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + group=None, + mode="r", + synchronizer=None, + consolidated=None, + chunk_store=None, + storage_options=None, + stacklevel=3, + ): + + filename_or_obj = _normalize_path(filename_or_obj) + store = GDSZarrStore.open_group( + filename_or_obj, + group=group, + mode=mode, + synchronizer=synchronizer, + consolidated=consolidated, + consolidate_on_close=False, + chunk_store=chunk_store, + storage_options=storage_options, + stacklevel=stacklevel + 1, + ) + + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds diff --git a/setup.py b/setup.py index 8f8d92a..0fc6cef 100644 --- a/setup.py +++ b/setup.py @@ -22,4 +22,7 @@ ], python_requires=">=3.6", install_requires=requirements, + entry_points={ + "xarray.backends": ["kvikio=cupy_xarray.kvikio:KvikioBackendEntrypoint"], + }, ) From aa2dc91649663d3f6732011230d8dc77ecb5ad61 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 2 Aug 2022 15:47:32 -0600 Subject: [PATCH 02/30] Add demo notebook --- docs/kvikio.ipynb | 2360 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2360 insertions(+) create mode 100644 docs/kvikio.ipynb diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb new file mode 100644 index 0000000..2847780 --- /dev/null +++ b/docs/kvikio.ipynb @@ -0,0 +1,2360 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5920bb97-1d76-4363-9aee-d1c5cd395409", + "metadata": {}, + "source": [ + "# Kvikio experiment\n", + "\n", + "To get this to work we need\n", + "1. https://github.com/zarr-developers/zarr-python/pull/934\n", + "2. https://github.com/dcherian/xarray/tree/kvikio" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c9ee3a73-6f7b-4875-b5a6-2e6d48fade44", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The watermark extension is already loaded. To reload it, use:\n", + " %reload_ext watermark\n", + "zarr : 2.12.1.dev38\n", + "sys : 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:10) \n", + "[GCC 10.3.0]\n", + "numpy : 1.23.1\n", + "json : 2.0.9\n", + "xarray: 2022.6.1.dev7+g3f7cc2da3\n", + "pandas: 1.4.3\n", + "\n" + ] + } + ], + "source": [ + "%load_ext watermark\n", + "\n", + "# These imports are currently unnecessary.\n", + "# cupy_xarray registers the kvikio entrypoint on install.\n", + "#import cupy as cp\n", + "#import cupy_xarray\n", + "#import kvikio.zarr\n", + "\n", + "import numpy as np\n", + "import xarray as xr\n", + "import zarr\n", + "\n", + "store = \"./air-temperature.zarr\"\n", + "\n", + "%watermark -iv" + ] + }, + { + "cell_type": "markdown", + "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", + "metadata": {}, + "source": [ + "## Create example dataset\n", + "\n", + "at the momment this needs xarray released version + pooch + netCDF4\n", + "\n", + "- Something is broken on the dcherian/kvikio branch\n", + "- cannot be compressed\n", + "- must read with consolidated=False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", + "metadata": {}, + "outputs": [], + "source": [ + "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", + "for var in airt.variables:\n", + " airt[var].encoding[\"compressor\"] = None\n", + "airt.to_zarr(store, mode=\"w\", consolidated=False)" + ] + }, + { + "cell_type": "markdown", + "id": "883d5507-988f-453a-b576-87bb563b540f", + "metadata": {}, + "source": [ + "## Test opening\n", + "\n", + "### Standard usage" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "58063142-b69b-46a5-9e4d-a83944e57857", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/glade/u/home/dcherian/python/xarray/xarray/backends/plugins.py:117: RuntimeWarning: 'netcdf4' fails while guessing\n", + " warnings.warn(f\"{engine!r} fails while guessing\", RuntimeWarning)\n", + "/glade/scratch/dcherian/tmp/ipykernel_172003/3542870433.py:1: RuntimeWarning: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider:\n", + "1. Consolidating metadata in this existing store with zarr.consolidate_metadata().\n", + "2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or\n", + "3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.\n", + " xr.open_dataset(store).air\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "[3869000 values with dtype=float32]\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
+       "Dimensions without coordinates: time, lon\n",
+       "Attributes:\n",
+       "    GRIB_id:       11\n",
+       "    GRIB_name:     TMP\n",
+       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
+       "    dataset:       NMC Reanalysis\n",
+       "    level_desc:    Surface\n",
+       "    long_name:     4xDaily Air temperature at sigma level 995\n",
+       "    parent_stat:   Other\n",
+       "    precision:     2\n",
+       "    statistic:     Individual Obs\n",
+       "    units:         degK\n",
+       "    var_desc:      Air temperature
" + ], + "text/plain": [ + "\n", + "[3869000 values with dtype=float32]\n", + "Coordinates:\n", + " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", + "Dimensions without coordinates: time, lon\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.open_dataset(store).air" + ] + }, + { + "cell_type": "markdown", + "id": "95161182-6b58-4dbd-9752-9961c251be1a", + "metadata": {}, + "source": [ + "### Now with kvikio!" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
+       "Dimensions without coordinates: time, lon\n",
+       "Data variables:\n",
+       "    air      (time, lat, lon) float32 ...\n",
+       "Attributes:\n",
+       "    Conventions:  COARDS\n",
+       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
+       "    platform:     Model\n",
+       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
+       "    title:        4x daily NMC reanalysis (1948)
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", + "Dimensions without coordinates: time, lon\n", + "Data variables:\n", + " air (time, lat, lon) float32 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", + " title: 4x daily NMC reanalysis (1948)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "metadata": {}, + "source": [ + "## Lazy reading" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "[3869000 values with dtype=float32]\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
+       "Dimensions without coordinates: time, lon\n",
+       "Attributes:\n",
+       "    GRIB_id:       11\n",
+       "    GRIB_name:     TMP\n",
+       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
+       "    dataset:       NMC Reanalysis\n",
+       "    level_desc:    Surface\n",
+       "    long_name:     4xDaily Air temperature at sigma level 995\n",
+       "    parent_stat:   Other\n",
+       "    precision:     2\n",
+       "    statistic:     Individual Obs\n",
+       "    units:         degK\n",
+       "    var_desc:      Air temperature
" + ], + "text/plain": [ + "\n", + "[3869000 values with dtype=float32]\n", + "Coordinates:\n", + " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", + "Dimensions without coordinates: time, lon\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.air" + ] + }, + { + "cell_type": "markdown", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", + "metadata": {}, + "source": [ + "## Data load for repr" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "00205e73-9b43-4254-9cba-f75435251391", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'air' (lon: 53)>\n",
+       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
+       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
+       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
+       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
+       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
+       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
+       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
+       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
+       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ], dtype=float32)\n",
+       "Coordinates:\n",
+       "    lat      float32 nan\n",
+       "Dimensions without coordinates: lon\n",
+       "Attributes:\n",
+       "    GRIB_id:       11\n",
+       "    GRIB_name:     TMP\n",
+       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
+       "    dataset:       NMC Reanalysis\n",
+       "    level_desc:    Surface\n",
+       "    long_name:     4xDaily Air temperature at sigma level 995\n",
+       "    parent_stat:   Other\n",
+       "    precision:     2\n",
+       "    statistic:     Individual Obs\n",
+       "    units:         degK\n",
+       "    var_desc:      Air temperature
" + ], + "text/plain": [ + "\n", + "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", + " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", + " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", + " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", + " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", + " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", + " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", + " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", + " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ], dtype=float32)\n", + "Coordinates:\n", + " lat float32 nan\n", + "Dimensions without coordinates: lon\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[\"air\"].isel(time=0, lat=10)" + ] + }, + { + "cell_type": "markdown", + "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", + "metadata": {}, + "source": [ + "## Load to host" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " \n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "array([[[241.2    , 242.5    , 243.5    , ..., 232.79999, 235.5    ,\n",
+       "         238.59999],\n",
+       "        [243.79999, 244.5    , 244.7    , ..., 232.79999, 235.29999,\n",
+       "         239.29999],\n",
+       "        [250.     , 249.79999, 248.89   , ..., 233.2    , 236.39   ,\n",
+       "         241.7    ],\n",
+       "        ...,\n",
+       "        [296.6    , 296.19998, 296.4    , ..., 295.4    , 295.1    ,\n",
+       "         294.69998],\n",
+       "        [295.9    , 296.19998, 296.79   , ..., 295.9    , 295.9    ,\n",
+       "         295.19998],\n",
+       "        [296.29   , 296.79   , 297.1    , ..., 296.9    , 296.79   ,\n",
+       "         296.6    ]],\n",
+       "\n",
+       "       [[242.09999, 242.7    , 243.09999, ..., 232.     , 233.59999,\n",
+       "         235.79999],\n",
+       "        [243.59999, 244.09999, 244.2    , ..., 231.     , 232.5    ,\n",
+       "         235.7    ],\n",
+       "        [253.2    , 252.89   , 252.09999, ..., 230.79999, 233.39   ,\n",
+       "         238.5    ],\n",
+       "...\n",
+       "        [293.69   , 293.88998, 295.38998, ..., 295.09   , 294.69   ,\n",
+       "         294.29   ],\n",
+       "        [296.29   , 297.19   , 297.59   , ..., 295.29   , 295.09   ,\n",
+       "         294.38998],\n",
+       "        [297.79   , 298.38998, 298.49   , ..., 295.69   , 295.49   ,\n",
+       "         295.19   ]],\n",
+       "\n",
+       "       [[245.09   , 244.29   , 243.29   , ..., 241.68999, 241.48999,\n",
+       "         241.79   ],\n",
+       "        [249.89   , 249.29   , 248.39   , ..., 239.59   , 240.29   ,\n",
+       "         241.68999],\n",
+       "        [262.99   , 262.19   , 261.38998, ..., 239.89   , 242.59   ,\n",
+       "         246.29   ],\n",
+       "        ...,\n",
+       "        [293.79   , 293.69   , 295.09   , ..., 295.29   , 295.09   ,\n",
+       "         294.69   ],\n",
+       "        [296.09   , 296.88998, 297.19   , ..., 295.69   , 295.69   ,\n",
+       "         295.19   ],\n",
+       "        [297.69   , 298.09   , 298.09   , ..., 296.49   , 296.19   ,\n",
+       "         295.69   ]]], dtype=float32)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
+       "Dimensions without coordinates: time, lon\n",
+       "Attributes:\n",
+       "    GRIB_id:       11\n",
+       "    GRIB_name:     TMP\n",
+       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
+       "    dataset:       NMC Reanalysis\n",
+       "    level_desc:    Surface\n",
+       "    long_name:     4xDaily Air temperature at sigma level 995\n",
+       "    parent_stat:   Other\n",
+       "    precision:     2\n",
+       "    statistic:     Individual Obs\n",
+       "    units:         degK\n",
+       "    var_desc:      Air temperature
" + ], + "text/plain": [ + "\n", + "array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n", + " 238.59999],\n", + " [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n", + " 239.29999],\n", + " [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n", + " 241.7 ],\n", + " ...,\n", + " [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n", + " 294.69998],\n", + " [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n", + " 295.19998],\n", + " [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n", + " 296.6 ]],\n", + "\n", + " [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n", + " 235.79999],\n", + " [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n", + " 235.7 ],\n", + " [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n", + " 238.5 ],\n", + "...\n", + " [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n", + " 294.29 ],\n", + " [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n", + " 294.38998],\n", + " [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n", + " 295.19 ]],\n", + "\n", + " [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n", + " 241.79 ],\n", + " [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n", + " 241.68999],\n", + " [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n", + " 246.29 ],\n", + " ...,\n", + " [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n", + " 294.69 ],\n", + " [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n", + " 295.19 ],\n", + " [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n", + " 295.69 ]]], dtype=float32)\n", + "Coordinates:\n", + " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", + "Dimensions without coordinates: time, lon\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(type(ds.air.data), type(ds.air.as_numpy().data))\n", + "ds.air.as_numpy()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:miniconda3-kvikio_env]", + "language": "python", + "name": "conda-env-miniconda3-kvikio_env-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 7fb4b946534ae9730375ba368349d4fd037550de Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 16 Aug 2022 11:52:27 -0600 Subject: [PATCH 03/30] Update kvikio notebook --- docs/kvikio.ipynb | 878 ++++++++++++++++++++++++++++++---------------- 1 file changed, 577 insertions(+), 301 deletions(-) diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 2847780..1ddd5e0 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -5,16 +5,16 @@ "id": "5920bb97-1d76-4363-9aee-d1c5cd395409", "metadata": {}, "source": [ - "# Kvikio experiment\n", + "# Kvikio demo\n", "\n", "To get this to work we need\n", "1. https://github.com/zarr-developers/zarr-python/pull/934\n", - "2. https://github.com/dcherian/xarray/tree/kvikio" + "2. https://github.com/pydata/xarray/pull/6874" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "id": "c9ee3a73-6f7b-4875-b5a6-2e6d48fade44", "metadata": { "tags": [] @@ -24,15 +24,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "The watermark extension is already loaded. To reload it, use:\n", - " %reload_ext watermark\n", - "zarr : 2.12.1.dev38\n", - "sys : 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:10) \n", + "flox : 0.5.10.dev5+g44f3851.d20220816\n", + "cupy : 11.0.0\n", + "json : 2.0.9\n", + "cupy_xarray : 0.1.0+11.gaa2dc91.dirty\n", + "numpy : 1.22.4\n", + "zarr : 2.12.1.dev68\n", + "numpy_groupies: 0.9.19+1.g8f14bbf\n", + "kvikio : 22.10.0a0+22.gd063a3b\n", + "xarray : 2022.6.1.dev51+g5a9a51ba1\n", + "sys : 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) \n", "[GCC 10.3.0]\n", - "numpy : 1.23.1\n", - "json : 2.0.9\n", - "xarray: 2022.6.1.dev7+g3f7cc2da3\n", - "pandas: 1.4.3\n", "\n" ] } @@ -40,12 +42,15 @@ "source": [ "%load_ext watermark\n", "\n", - "# These imports are currently unnecessary.\n", + "# These imports are currently unnecessary. I import them to show versions\n", "# cupy_xarray registers the kvikio entrypoint on install.\n", - "#import cupy as cp\n", - "#import cupy_xarray\n", - "#import kvikio.zarr\n", + "import cupy as cp\n", + "#import cudf\n", + "import cupy_xarray\n", + "import kvikio.zarr\n", "\n", + "import flox\n", + "import numpy_groupies\n", "import numpy as np\n", "import xarray as xr\n", "import zarr\n", @@ -58,28 +63,50 @@ { "cell_type": "markdown", "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Create example dataset\n", "\n", - "at the momment this needs xarray released version + pooch + netCDF4\n", - "\n", - "- Something is broken on the dcherian/kvikio branch\n", - "- cannot be compressed\n", - "- must read with consolidated=False" + "- cannot be compressed" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", - "metadata": {}, - "outputs": [], + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/glade/u/home/dcherian/python/xarray/xarray/core/dataset.py:2066: SerializationWarning: saving variable None with floating point data as an integer dtype without any _FillValue to use for NaNs\n", + " return to_zarr( # type: ignore\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", "for var in airt.variables:\n", " airt[var].encoding[\"compressor\"] = None\n", - "airt.to_zarr(store, mode=\"w\", consolidated=False)" + "airt.to_zarr(store, mode=\"w\", consolidated=True)" ] }, { @@ -94,23 +121,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "58063142-b69b-46a5-9e4d-a83944e57857", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/glade/u/home/dcherian/python/xarray/xarray/backends/plugins.py:117: RuntimeWarning: 'netcdf4' fails while guessing\n", - " warnings.warn(f\"{engine!r} fails while guessing\", RuntimeWarning)\n", - "/glade/scratch/dcherian/tmp/ipykernel_172003/3542870433.py:1: RuntimeWarning: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider:\n", - "1. Consolidating metadata in this existing store with zarr.consolidate_metadata().\n", - "2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or\n", - "3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.\n", - " xr.open_dataset(store).air\n" - ] - }, { "data": { "text/html": [ @@ -469,8 +483,9 @@ "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
        "[3869000 values with dtype=float32]\n",
        "Coordinates:\n",
-       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
-       "Dimensions without coordinates: time, lon\n",
+       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
        "Attributes:\n",
        "    GRIB_id:       11\n",
        "    GRIB_name:     TMP\n",
@@ -482,15 +497,25 @@
        "    precision:     2\n",
        "    statistic:     Individual Obs\n",
        "    units:         degK\n",
-       "    var_desc:      Air temperature
" + " var_desc: Air temperature" ], "text/plain": [ "\n", "[3869000 values with dtype=float32]\n", "Coordinates:\n", - " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", - "Dimensions without coordinates: time, lon\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", "Attributes:\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", @@ -505,13 +530,13 @@ " var_desc: Air temperature" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "xr.open_dataset(store).air" + "xr.open_dataset(store, engine=\"zarr\").air" ] }, { @@ -519,14 +544,38 @@ "id": "95161182-6b58-4dbd-9752-9961c251be1a", "metadata": {}, "source": [ - "### Now with kvikio!" + "### Now with kvikio!\n", + "\n", + " - must read with `consolidated=False` (https://github.com/rapidsai/kvikio/issues/119)\n", + " - dask.from_zarr to GDSStore / open_mfdataset" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", "metadata": {}, + "outputs": [], + "source": [ + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "print(ds.air._variable._data)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "metadata": {}, + "source": [ + "## Lazy reading" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "metadata": {}, "outputs": [ { "data": { @@ -883,61 +932,77 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.Dataset>\n",
-       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
+       "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "[3869000 values with dtype=float32]\n",
        "Coordinates:\n",
-       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
-       "Dimensions without coordinates: time, lon\n",
-       "Data variables:\n",
-       "    air      (time, lat, lon) float32 ...\n",
+       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
        "Attributes:\n",
-       "    Conventions:  COARDS\n",
-       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
-       "    platform:     Model\n",
-       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
-       "    title:        4x daily NMC reanalysis (1948)
" + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature
" ], "text/plain": [ - "\n", - "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "\n", + "[3869000 values with dtype=float32]\n", "Coordinates:\n", - " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", - "Dimensions without coordinates: time, lon\n", - "Data variables:\n", - " air (time, lat, lon) float32 ...\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", "Attributes:\n", - " Conventions: COARDS\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Consolidated must be False\n", - "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", - "ds" + "ds.air" ] }, { "cell_type": "markdown", - "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", "metadata": {}, "source": [ - "## Lazy reading" + "## Data load for repr" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "execution_count": 4, + "id": "00205e73-9b43-4254-9cba-f75435251391", "metadata": {}, "outputs": [ { @@ -1295,11 +1360,20 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
-       "[3869000 values with dtype=float32]\n",
+       "
<xarray.DataArray 'air' (lon: 53)>\n",
+       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
+       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
+       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
+       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
+       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
+       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
+       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
+       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
+       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ], dtype=float32)\n",
        "Coordinates:\n",
-       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
-       "Dimensions without coordinates: time, lon\n",
+       "    lat      float32 50.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "    time     datetime64[ns] 2013-01-01\n",
        "Attributes:\n",
        "    GRIB_id:       11\n",
        "    GRIB_name:     TMP\n",
@@ -1311,15 +1385,36 @@
        "    precision:     2\n",
        "    statistic:     Individual Obs\n",
        "    units:         degK\n",
-       "    var_desc:      Air temperature
" + " var_desc: Air temperature
" ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", + "\n", + "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", + " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", + " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", + " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", + " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", + " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", + " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", + " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", + " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ], dtype=float32)\n", "Coordinates:\n", - " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", - "Dimensions without coordinates: time, lon\n", + " lat float32 50.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " time datetime64[ns] 2013-01-01\n", "Attributes:\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", @@ -1334,27 +1429,77 @@ " var_desc: Air temperature" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air" + "ds[\"air\"].isel(time=0, lat=10)" ] }, { "cell_type": "markdown", - "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", + "id": "d0ea31d2-6c52-4346-b489-fc1e43200213", "metadata": {}, "source": [ - "## Data load for repr" + "## CuPy array on load" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "00205e73-9b43-4254-9cba-f75435251391", + "execution_count": 8, + "id": "1b34a68a-a6b3-4273-bf7c-28814ebfce11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cupy._core.core.ndarray" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ds[\"air\"].isel(time=0, lat=10).data)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "db69559c-1fde-4b3b-914d-87d8437ec256", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cupy._core.core.ndarray" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ds[\"air\"].isel(time=0, lat=10).load().data)" + ] + }, + { + "cell_type": "markdown", + "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", + "metadata": {}, + "source": [ + "## Load to host" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "457a612e-04cb-4ffa-8cda-f4371b33bda8", "metadata": {}, "outputs": [ { @@ -1712,19 +1857,52 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray 'air' (lon: 53)>\n",
-       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
-       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
-       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
-       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
-       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
-       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
-       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
-       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
-       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ], dtype=float32)\n",
+       "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "array([[[241.2    , 242.5    , 243.5    , ..., 232.79999, 235.5    ,\n",
+       "         238.59999],\n",
+       "        [243.79999, 244.5    , 244.7    , ..., 232.79999, 235.29999,\n",
+       "         239.29999],\n",
+       "        [250.     , 249.79999, 248.89   , ..., 233.2    , 236.39   ,\n",
+       "         241.7    ],\n",
+       "        ...,\n",
+       "        [296.6    , 296.19998, 296.4    , ..., 295.4    , 295.1    ,\n",
+       "         294.69998],\n",
+       "        [295.9    , 296.19998, 296.79   , ..., 295.9    , 295.9    ,\n",
+       "         295.19998],\n",
+       "        [296.29   , 296.79   , 297.1    , ..., 296.9    , 296.79   ,\n",
+       "         296.6    ]],\n",
+       "\n",
+       "       [[242.09999, 242.7    , 243.09999, ..., 232.     , 233.59999,\n",
+       "         235.79999],\n",
+       "        [243.59999, 244.09999, 244.2    , ..., 231.     , 232.5    ,\n",
+       "         235.7    ],\n",
+       "        [253.2    , 252.89   , 252.09999, ..., 230.79999, 233.39   ,\n",
+       "         238.5    ],\n",
+       "...\n",
+       "        [293.69   , 293.88998, 295.38998, ..., 295.09   , 294.69   ,\n",
+       "         294.29   ],\n",
+       "        [296.29   , 297.19   , 297.59   , ..., 295.29   , 295.09   ,\n",
+       "         294.38998],\n",
+       "        [297.79   , 298.38998, 298.49   , ..., 295.69   , 295.49   ,\n",
+       "         295.19   ]],\n",
+       "\n",
+       "       [[245.09   , 244.29   , 243.29   , ..., 241.68999, 241.48999,\n",
+       "         241.79   ],\n",
+       "        [249.89   , 249.29   , 248.39   , ..., 239.59   , 240.29   ,\n",
+       "         241.68999],\n",
+       "        [262.99   , 262.19   , 261.38998, ..., 239.89   , 242.59   ,\n",
+       "         246.29   ],\n",
+       "        ...,\n",
+       "        [293.79   , 293.69   , 295.09   , ..., 295.29   , 295.09   ,\n",
+       "         294.69   ],\n",
+       "        [296.09   , 296.88998, 297.19   , ..., 295.69   , 295.69   ,\n",
+       "         295.19   ],\n",
+       "        [297.69   , 298.09   , 298.09   , ..., 296.49   , 296.19   ,\n",
+       "         295.69   ]]], dtype=float32)\n",
        "Coordinates:\n",
-       "    lat      float32 nan\n",
-       "Dimensions without coordinates: lon\n",
+       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
        "Attributes:\n",
        "    GRIB_id:       11\n",
        "    GRIB_name:     TMP\n",
@@ -1736,36 +1914,111 @@
        "    precision:     2\n",
        "    statistic:     Individual Obs\n",
        "    units:         degK\n",
-       "    var_desc:      Air temperature
" - ], - "text/plain": [ - "\n", - "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", - " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", - " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", - " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", - " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", - " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", - " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", - " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", - " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ], dtype=float32)\n", - "Coordinates:\n", - " lat float32 nan\n", - "Dimensions without coordinates: lon\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", + " var_desc: Air temperature
" + ], + "text/plain": [ + "\n", + "array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n", + " 238.59999],\n", + " [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n", + " 239.29999],\n", + " [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n", + " 241.7 ],\n", + " ...,\n", + " [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n", + " 294.69998],\n", + " [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n", + " 295.19998],\n", + " [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n", + " 296.6 ]],\n", + "\n", + " [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n", + " 235.79999],\n", + " [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n", + " 235.7 ],\n", + " [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n", + " 238.5 ],\n", + "...\n", + " [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n", + " 294.29 ],\n", + " [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n", + " 294.38998],\n", + " [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n", + " 295.19 ]],\n", + "\n", + " [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n", + " 241.79 ],\n", + " [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n", + " 241.68999],\n", + " [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n", + " 246.29 ],\n", + " ...,\n", + " [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n", + " 294.69 ],\n", + " [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n", + " 295.19 ],\n", + " [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n", + " 295.69 ]]], dtype=float32)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", " long_name: 4xDaily Air temperature at sigma level 995\n", " parent_stat: Other\n", " precision: 2\n", @@ -1774,36 +2027,76 @@ " var_desc: Air temperature" ] }, - "execution_count": 6, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds[\"air\"].isel(time=0, lat=10)" + "ds.air.as_numpy()" ] }, { - "cell_type": "markdown", - "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", + "cell_type": "code", + "execution_count": 11, + "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.ndarray" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Load to host" + "type(ds.air.as_numpy().data)" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", + "execution_count": 12, + "id": "140fe3e2-ea9b-445d-8401-5c624384c182", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " \n" - ] - }, + "data": { + "text/plain": [ + "cupy._core.core.ndarray" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ds.air.mean(\"time\").load().data)" + ] + }, + { + "cell_type": "markdown", + "id": "1c07c449-bc43-490a-ac38-11e93200133d", + "metadata": {}, + "source": [ + "## GroupBy with flox\n", + "\n", + "Requires\n", + "\n", + "1. flox main branch?\n", + "2. https://github.com/ml31415/numpy-groupies/pull/63" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "c292cf77-c99e-40fa-8cad-d8914c346b29", + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -2159,181 +2452,164 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
-       "array([[[241.2    , 242.5    , 243.5    , ..., 232.79999, 235.5    ,\n",
-       "         238.59999],\n",
-       "        [243.79999, 244.5    , 244.7    , ..., 232.79999, 235.29999,\n",
-       "         239.29999],\n",
-       "        [250.     , 249.79999, 248.89   , ..., 233.2    , 236.39   ,\n",
-       "         241.7    ],\n",
+       "
<xarray.DataArray 'air' (month: 12, lat: 25, lon: 53)>\n",
+       "array([[[246.34975, 246.38591, 246.21495, ..., 243.06096, 244.08774,\n",
+       "         245.64653],\n",
+       "        [248.8575 , 248.90729, 248.71028, ..., 241.52846, 243.50845,\n",
+       "         246.7545 ],\n",
+       "        [251.57713, 251.19649, 250.71451, ..., 243.39871, 246.78438,\n",
+       "         251.56555],\n",
        "        ...,\n",
-       "        [296.6    , 296.19998, 296.4    , ..., 295.4    , 295.1    ,\n",
-       "         294.69998],\n",
-       "        [295.9    , 296.19998, 296.79   , ..., 295.9    , 295.9    ,\n",
-       "         295.19998],\n",
-       "        [296.29   , 296.79   , 297.1    , ..., 296.9    , 296.79   ,\n",
-       "         296.6    ]],\n",
-       "\n",
-       "       [[242.09999, 242.7    , 243.09999, ..., 232.     , 233.59999,\n",
-       "         235.79999],\n",
-       "        [243.59999, 244.09999, 244.2    , ..., 231.     , 232.5    ,\n",
-       "         235.7    ],\n",
-       "        [253.2    , 252.89   , 252.09999, ..., 230.79999, 233.39   ,\n",
-       "         238.5    ],\n",
+       "        [295.8504 , 295.24423, 295.2271 , ..., 295.1864 , 294.65726,\n",
+       "         294.04868],\n",
+       "        [296.54468, 296.47   , 296.16025, ..., 295.35614, 295.0814 ,\n",
+       "         294.53015],\n",
+       "        [297.15448, 297.23843, 297.04913, ..., 296.01816, 295.7758 ,\n",
+       "         295.63678]],\n",
+       "\n",
+       "       [[246.6771 , 246.40562, 245.94829, ..., 241.85826, 243.00186,\n",
+       "         244.44365],\n",
+       "        [247.79994, 247.75986, 247.4774 , ..., 240.64687, 242.26611,\n",
+       "         245.06642],\n",
+       "        [249.07076, 248.57227, 247.94246, ..., 242.42851, 245.33334,\n",
+       "         249.72244],\n",
        "...\n",
-       "        [293.69   , 293.88998, 295.38998, ..., 295.09   , 294.69   ,\n",
-       "         294.29   ],\n",
-       "        [296.29   , 297.19   , 297.59   , ..., 295.29   , 295.09   ,\n",
-       "         294.38998],\n",
-       "        [297.79   , 298.38998, 298.49   , ..., 295.69   , 295.49   ,\n",
-       "         295.19   ]],\n",
-       "\n",
-       "       [[245.09   , 244.29   , 243.29   , ..., 241.68999, 241.48999,\n",
-       "         241.79   ],\n",
-       "        [249.89   , 249.29   , 248.39   , ..., 239.59   , 240.29   ,\n",
-       "         241.68999],\n",
-       "        [262.99   , 262.19   , 261.38998, ..., 239.89   , 242.59   ,\n",
-       "         246.29   ],\n",
+       "        [297.8426 , 297.14062, 296.98776, ..., 297.96893, 297.56882,\n",
+       "         297.16125],\n",
+       "        [298.58783, 298.4203 , 297.96912, ..., 298.16418, 298.194  ,\n",
+       "         297.90833],\n",
+       "        [298.81146, 298.8566 , 298.6211 , ..., 298.7296 , 298.7519 ,\n",
+       "         298.81894]],\n",
+       "\n",
+       "       [[247.97087, 248.02097, 247.91281, ..., 239.77168, 241.02357,\n",
+       "         242.62805],\n",
+       "        [249.73338, 250.16028, 250.48564, ..., 238.78947, 240.96451,\n",
+       "         244.11601],\n",
+       "        [252.02939, 251.53108, 251.36617, ..., 238.07532, 241.91273,\n",
+       "         247.06967],\n",
        "        ...,\n",
-       "        [293.79   , 293.69   , 295.09   , ..., 295.29   , 295.09   ,\n",
-       "         294.69   ],\n",
-       "        [296.09   , 296.88998, 297.19   , ..., 295.69   , 295.69   ,\n",
-       "         295.19   ],\n",
-       "        [297.69   , 298.09   , 298.09   , ..., 296.49   , 296.19   ,\n",
-       "         295.69   ]]], dtype=float32)\n",
+       "        [296.76517, 295.97668, 295.88925, ..., 296.456  , 296.09137,\n",
+       "         295.65768],\n",
+       "        [297.46817, 297.38034, 297.0443 , ..., 296.85565, 296.84668,\n",
+       "         296.52142],\n",
+       "        [297.88092, 297.98676, 297.7755 , ..., 297.60034, 297.5654 ,\n",
+       "         297.53772]]], dtype=float32)\n",
        "Coordinates:\n",
-       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
-       "Dimensions without coordinates: time, lon\n",
-       "Attributes:\n",
-       "    GRIB_id:       11\n",
-       "    GRIB_name:     TMP\n",
-       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
-       "    dataset:       NMC Reanalysis\n",
-       "    level_desc:    Surface\n",
-       "    long_name:     4xDaily Air temperature at sigma level 995\n",
-       "    parent_stat:   Other\n",
-       "    precision:     2\n",
-       "    statistic:     Individual Obs\n",
-       "    units:         degK\n",
-       "    var_desc:      Air temperature
    • lat
      (lat)
      float32
      75.0 72.5 70.0 ... 20.0 17.5 15.0
      array([75. , 72.5, 70. , 67.5, 65. , 62.5, 60. , 57.5, 55. , 52.5, 50. , 47.5,\n",
      +       "       45. , 42.5, 40. , 37.5, 35. , 32.5, 30. , 27.5, 25. , 22.5, 20. , 17.5,\n",
      +       "       15. ], dtype=float32)
    • lon
      (lon)
      float32
      200.0 202.5 205.0 ... 327.5 330.0
      array([200. , 202.5, 205. , 207.5, 210. , 212.5, 215. , 217.5, 220. , 222.5,\n",
      +       "       225. , 227.5, 230. , 232.5, 235. , 237.5, 240. , 242.5, 245. , 247.5,\n",
      +       "       250. , 252.5, 255. , 257.5, 260. , 262.5, 265. , 267.5, 270. , 272.5,\n",
      +       "       275. , 277.5, 280. , 282.5, 285. , 287.5, 290. , 292.5, 295. , 297.5,\n",
      +       "       300. , 302.5, 305. , 307.5, 310. , 312.5, 315. , 317.5, 320. , 322.5,\n",
      +       "       325. , 327.5, 330. ], dtype=float32)
    • month
      (month)
      int64
      1 2 3 4 5 6 7 8 9 10 11 12
      array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])
  • " ], "text/plain": [ - "\n", - "array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n", - " 238.59999],\n", - " [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n", - " 239.29999],\n", - " [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n", - " 241.7 ],\n", + "\n", + "array([[[246.34975, 246.38591, 246.21495, ..., 243.06096, 244.08774,\n", + " 245.64653],\n", + " [248.8575 , 248.90729, 248.71028, ..., 241.52846, 243.50845,\n", + " 246.7545 ],\n", + " [251.57713, 251.19649, 250.71451, ..., 243.39871, 246.78438,\n", + " 251.56555],\n", " ...,\n", - " [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n", - " 294.69998],\n", - " [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n", - " 295.19998],\n", - " [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n", - " 296.6 ]],\n", - "\n", - " [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n", - " 235.79999],\n", - " [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n", - " 235.7 ],\n", - " [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n", - " 238.5 ],\n", + " [295.8504 , 295.24423, 295.2271 , ..., 295.1864 , 294.65726,\n", + " 294.04868],\n", + " [296.54468, 296.47 , 296.16025, ..., 295.35614, 295.0814 ,\n", + " 294.53015],\n", + " [297.15448, 297.23843, 297.04913, ..., 296.01816, 295.7758 ,\n", + " 295.63678]],\n", + "\n", + " [[246.6771 , 246.40562, 245.94829, ..., 241.85826, 243.00186,\n", + " 244.44365],\n", + " [247.79994, 247.75986, 247.4774 , ..., 240.64687, 242.26611,\n", + " 245.06642],\n", + " [249.07076, 248.57227, 247.94246, ..., 242.42851, 245.33334,\n", + " 249.72244],\n", "...\n", - " [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n", - " 294.29 ],\n", - " [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n", - " 294.38998],\n", - " [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n", - " 295.19 ]],\n", - "\n", - " [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n", - " 241.79 ],\n", - " [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n", - " 241.68999],\n", - " [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n", - " 246.29 ],\n", + " [297.8426 , 297.14062, 296.98776, ..., 297.96893, 297.56882,\n", + " 297.16125],\n", + " [298.58783, 298.4203 , 297.96912, ..., 298.16418, 298.194 ,\n", + " 297.90833],\n", + " [298.81146, 298.8566 , 298.6211 , ..., 298.7296 , 298.7519 ,\n", + " 298.81894]],\n", + "\n", + " [[247.97087, 248.02097, 247.91281, ..., 239.77168, 241.02357,\n", + " 242.62805],\n", + " [249.73338, 250.16028, 250.48564, ..., 238.78947, 240.96451,\n", + " 244.11601],\n", + " [252.02939, 251.53108, 251.36617, ..., 238.07532, 241.91273,\n", + " 247.06967],\n", " ...,\n", - " [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n", - " 294.69 ],\n", - " [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n", - " 295.19 ],\n", - " [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n", - " 295.69 ]]], dtype=float32)\n", + " [296.76517, 295.97668, 295.88925, ..., 296.456 , 296.09137,\n", + " 295.65768],\n", + " [297.46817, 297.38034, 297.0443 , ..., 296.85565, 296.84668,\n", + " 296.52142],\n", + " [297.88092, 297.98676, 297.7755 , ..., 297.60034, 297.5654 ,\n", + " 297.53772]]], dtype=float32)\n", "Coordinates:\n", - " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", - "Dimensions without coordinates: time, lon\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", - " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * month (month) int64 1 2 3 4 5 6 7 8 9 10 11 12" ] }, - "execution_count": 11, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "print(type(ds.air.data), type(ds.air.as_numpy().data))\n", - "ds.air.as_numpy()" + "ds.air.groupby(\"time.month\").mean(engine=\"numpy\")" ] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:miniconda3-kvikio_env]", + "display_name": "Python [conda env:miniconda3-kvikio_nightly]", "language": "python", - "name": "conda-env-miniconda3-kvikio_env-py" + "name": "conda-env-miniconda3-kvikio_nightly-py" }, "language_info": { "codemirror_mode": { From facf5f73c52e1aecb7061715ef46d76ae551a4f6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:27:13 +0000 Subject: [PATCH 04/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/kvikio.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 1ddd5e0..72723ea 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -45,7 +45,8 @@ "# These imports are currently unnecessary. I import them to show versions\n", "# cupy_xarray registers the kvikio entrypoint on install.\n", "import cupy as cp\n", - "#import cudf\n", + "\n", + "# import cudf\n", "import cupy_xarray\n", "import kvikio.zarr\n", "\n", From f3f51891b79f1e46f7855c5708d55b271313da32 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 17 Aug 2022 11:56:39 -0600 Subject: [PATCH 05/30] Update cupy_xarray/kvikio.py --- cupy_xarray/kvikio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 313a249..e806865 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -1,4 +1,5 @@ import os +import warnings import cupy as cp import numpy as np From d2da1e4c7b23923aca8969c25531a1817b498de1 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 20 Jan 2023 17:03:07 -0700 Subject: [PATCH 06/30] Add url, description. --- cupy_xarray/kvikio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index e806865..e6c7c63 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -147,6 +147,8 @@ def open_store_variable(self, name, zarr_array): class KvikioBackendEntrypoint(ZarrBackendEntrypoint): available = has_kvikio + description = "Open zarr files (.zarr) using Kvikio" + url = "https://docs.rapids.ai/api/kvikio/nightly/api.html#zarr" # disabled by default # We need to provide this because of the subclassing from From b87c3c2d6295b05b4416d5aa2e0f93440547c90d Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 18 Aug 2023 16:28:40 -0600 Subject: [PATCH 07/30] Working --- cupy_xarray/kvikio.py | 24 +- docs/kvikio.ipynb | 2216 ++++++++++++++++++++++++++++++++++------- 2 files changed, 1874 insertions(+), 366 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index e6c7c63..669978f 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -20,6 +20,24 @@ has_kvikio = False +class DummyZarrArrayWrapper(ZarrArrayWrapper): + def __init__(self, array: np.ndarray): + assert isinstance(array, np.ndarray) + self._array = array + self.filters = None + self.dtype = array.dtype + self.shape = array.shape + + def __array__(self): + return self._array + + def get_array(self): + return self._array + + def __getitem__(self, key): + return self._array[key] + + class CupyZarrArrayWrapper(ZarrArrayWrapper): def __array__(self): return self.get_array() @@ -32,7 +50,8 @@ def __array__(self): return self.datastore.zarr_group[self.variable_name][:].get() def get_array(self): - return np.asarray(self) + # total hack: make a numpy array look like a Zarr array + return DummyZarrArrayWrapper(self.datastore.zarr_group[self.variable_name][:].get()) class GDSZarrStore(ZarrStore): @@ -52,7 +71,6 @@ def open_group( safe_chunks=True, stacklevel=2, ): - # zarr doesn't support pathlib.Path objects yet. zarr-python#601 if isinstance(store, os.PathLike): store = os.fspath(store) @@ -112,7 +130,6 @@ def open_group( ) def open_store_variable(self, name, zarr_array): - try_nczarr = self._mode == "r" dimensions, attributes = zarr_backend._get_zarr_dims_and_attrs( zarr_array, zarr_backend.DIMENSION_KEY, try_nczarr @@ -174,7 +191,6 @@ def open_dataset( storage_options=None, stacklevel=3, ): - filename_or_obj = _normalize_path(filename_or_obj) store = GDSZarrStore.open_group( filename_or_obj, diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 72723ea..4867878 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -5,11 +5,7 @@ "id": "5920bb97-1d76-4363-9aee-d1c5cd395409", "metadata": {}, "source": [ - "# Kvikio demo\n", - "\n", - "To get this to work we need\n", - "1. https://github.com/zarr-developers/zarr-python/pull/934\n", - "2. https://github.com/pydata/xarray/pull/6874" + "# Kvikio demo" ] }, { @@ -24,17 +20,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "flox : 0.5.10.dev5+g44f3851.d20220816\n", - "cupy : 11.0.0\n", + "flox : 0.7.3.dev12+g796dcd2\n", "json : 2.0.9\n", - "cupy_xarray : 0.1.0+11.gaa2dc91.dirty\n", - "numpy : 1.22.4\n", - "zarr : 2.12.1.dev68\n", - "numpy_groupies: 0.9.19+1.g8f14bbf\n", - "kvikio : 22.10.0a0+22.gd063a3b\n", - "xarray : 2022.6.1.dev51+g5a9a51ba1\n", - "sys : 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) \n", - "[GCC 10.3.0]\n", + "xarray : 2023.7.0\n", + "cupy_xarray : 0.1.1+21.gd2da1e4.dirty\n", + "kvikio : 23.2.0\n", + "zarr : 2.16.0\n", + "numpy : 1.24.4\n", + "sys : 3.9.17 | packaged by conda-forge | (main, Aug 10 2023, 07:02:31) \n", + "[GCC 12.3.0]\n", + "numpy_groupies: 0.9.22+2.gd148074\n", "\n" ] } @@ -44,10 +39,9 @@ "\n", "# These imports are currently unnecessary. I import them to show versions\n", "# cupy_xarray registers the kvikio entrypoint on install.\n", - "import cupy as cp\n", - "\n", + "# import cupy as cp\n", "# import cudf\n", - "import cupy_xarray\n", + "import cupy_xarray # registers cupy accessor\n", "import kvikio.zarr\n", "\n", "import flox\n", @@ -62,68 +56,1482 @@ ] }, { - "cell_type": "markdown", - "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", - "metadata": { - "tags": [] - }, + "cell_type": "code", + "execution_count": 2, + "id": "83b1b514-eeb8-4a81-a3e8-3a7dc82ffce4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'kvikio': \n", + " Open zarr files (.zarr) using Kvikio\n", + " Learn more at https://docs.rapids.ai/api/kvikio/nightly/api.html#zarr,\n", + " 'store': \n", + " Open AbstractDataStore instances in Xarray\n", + " Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.StoreBackendEntrypoint.html,\n", + " 'zarr': \n", + " Open zarr files (.zarr) using zarr in Xarray\n", + " Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.ZarrBackendEntrypoint.html}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.backends.list_engines()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "81b2e5cb-4b2d-4a31-b7a0-961aadbc321d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + ] + }, + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset>\n",
    +       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "Data variables:\n",
    +       "    air      (time, lat, lon) float32 ...\n",
    +       "    scalar   float64 ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  COARDS\n",
    +       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
    +       "    platform:     Model\n",
    +       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
    +       "    title:        4x daily NMC reanalysis (1948)
    " + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 ...\n", + " scalar float64 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", + " title: 4x daily NMC reanalysis (1948)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%autoreload\n", + "\n", + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "print(ds.air._variable._data)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Create example dataset\n", + "\n", + "- cannot be compressed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", + "for var in airt.variables:\n", + " airt[var].encoding[\"compressor\"] = None\n", + "airt[\"scalar\"] = 12.0\n", + "airt.to_zarr(store, mode=\"w\", consolidated=True)" + ] + }, + { + "cell_type": "markdown", + "id": "883d5507-988f-453a-b576-87bb563b540f", + "metadata": {}, + "source": [ + "## Test opening\n", + "\n", + "### Standard usage" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "58063142-b69b-46a5-9e4d-a83944e57857", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    +       "[3869000 values with dtype=float32]\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "Attributes:\n",
    +       "    GRIB_id:       11\n",
    +       "    GRIB_name:     TMP\n",
    +       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    dataset:       NMC Reanalysis\n",
    +       "    level_desc:    Surface\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    parent_stat:   Other\n",
    +       "    precision:     2\n",
    +       "    statistic:     Individual Obs\n",
    +       "    units:         degK\n",
    +       "    var_desc:      Air temperature
    " + ], + "text/plain": [ + "\n", + "[3869000 values with dtype=float32]\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.open_dataset(store, engine=\"zarr\").air" + ] + }, + { + "cell_type": "markdown", + "id": "95161182-6b58-4dbd-9752-9961c251be1a", + "metadata": {}, + "source": [ + "### Now with kvikio!\n", + "\n", + " - must read with `consolidated=False` (https://github.com/rapidsai/kvikio/issues/119)\n", + " - dask.from_zarr to GDSStore / open_mfdataset" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + ] + }, + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset>\n",
    +       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "Data variables:\n",
    +       "    air      (time, lat, lon) float32 ...\n",
    +       "    scalar   float64 ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  COARDS\n",
    +       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
    +       "    platform:     Model\n",
    +       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
    +       "    title:        4x daily NMC reanalysis (1948)
    " + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 ...\n", + " scalar float64 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", + " title: 4x daily NMC reanalysis (1948)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Create example dataset\n", - "\n", - "- cannot be compressed" + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "print(ds.air._variable._data)\n", + "ds" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", - "metadata": { - "jupyter": { - "source_hidden": true - }, - "tags": [] - }, + "execution_count": 26, + "id": "6c939a04-1588-4693-9483-c6ad7152951a", + "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/glade/u/home/dcherian/python/xarray/xarray/core/dataset.py:2066: SerializationWarning: saving variable None with floating point data as an integer dtype without any _FillValue to use for NaNs\n", - " return to_zarr( # type: ignore\n" - ] - }, { "data": { "text/plain": [ - "" + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=, key=BasicIndexer(()))))" ] }, - "execution_count": 11, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", - "for var in airt.variables:\n", - " airt[var].encoding[\"compressor\"] = None\n", - "airt.to_zarr(store, mode=\"w\", consolidated=True)" + "ds.scalar.variable._data" ] }, { "cell_type": "markdown", - "id": "883d5507-988f-453a-b576-87bb563b540f", + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", "metadata": {}, "source": [ - "## Test opening\n", - "\n", - "### Standard usage" + "## Lazy reading" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "58063142-b69b-46a5-9e4d-a83944e57857", + "execution_count": 27, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", "metadata": {}, "outputs": [ { @@ -390,6 +1798,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -411,14 +1824,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -428,13 +1843,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -472,7 +1890,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -498,17 +1917,37 @@ " precision: 2\n", " statistic: Individual Obs\n", " units: degK\n", - " var_desc: Air temperature
    • lat
      PandasIndex
      PandasIndex(Index([75.0, 72.5, 70.0, 67.5, 65.0, 62.5, 60.0, 57.5, 55.0, 52.5, 50.0, 47.5,\n",
      +       "       45.0, 42.5, 40.0, 37.5, 35.0, 32.5, 30.0, 27.5, 25.0, 22.5, 20.0, 17.5,\n",
      +       "       15.0],\n",
      +       "      dtype='float32', name='lat'))
    • lon
      PandasIndex
      PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
      +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
      +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
      +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
      +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
      +       "       325.0, 327.5, 330.0],\n",
      +       "      dtype='float32', name='lon'))
    • time
      PandasIndex
      PandasIndex(DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 06:00:00',\n",
      +       "               '2013-01-01 12:00:00', '2013-01-01 18:00:00',\n",
      +       "               '2013-01-02 00:00:00', '2013-01-02 06:00:00',\n",
      +       "               '2013-01-02 12:00:00', '2013-01-02 18:00:00',\n",
      +       "               '2013-01-03 00:00:00', '2013-01-03 06:00:00',\n",
      +       "               ...\n",
      +       "               '2014-12-29 12:00:00', '2014-12-29 18:00:00',\n",
      +       "               '2014-12-30 00:00:00', '2014-12-30 06:00:00',\n",
      +       "               '2014-12-30 12:00:00', '2014-12-30 18:00:00',\n",
      +       "               '2014-12-31 00:00:00', '2014-12-31 06:00:00',\n",
      +       "               '2014-12-31 12:00:00', '2014-12-31 18:00:00'],\n",
      +       "              dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ "\n", @@ -531,51 +1970,27 @@ " var_desc: Air temperature" ] }, - "execution_count": 4, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "xr.open_dataset(store, engine=\"zarr\").air" - ] - }, - { - "cell_type": "markdown", - "id": "95161182-6b58-4dbd-9752-9961c251be1a", - "metadata": {}, - "source": [ - "### Now with kvikio!\n", - "\n", - " - must read with `consolidated=False` (https://github.com/rapidsai/kvikio/issues/119)\n", - " - dask.from_zarr to GDSStore / open_mfdataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", - "metadata": {}, - "outputs": [], - "source": [ - "# Consolidated must be False\n", - "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", - "print(ds.air._variable._data)\n", - "ds" + "ds.air" ] }, { "cell_type": "markdown", - "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", "metadata": {}, "source": [ - "## Lazy reading" + "## Data load for repr" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "execution_count": 31, + "id": "00205e73-9b43-4254-9cba-f75435251391", "metadata": {}, "outputs": [ { @@ -842,6 +2257,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -863,14 +2283,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -880,13 +2302,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -924,7 +2349,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -933,12 +2359,21 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "[3869000 values with dtype=float32]\n",
    +       "
    <xarray.DataArray 'air' (lon: 53)>\n",
    +       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    +       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    +       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    +       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    +       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    +       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    +       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    +       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    +       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ],\n",
    +       "      dtype=float32)\n",
            "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "    lat      float32 50.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "    time     datetime64[ns] 2013-01-01\n",
            "Attributes:\n",
            "    GRIB_id:       11\n",
            "    GRIB_name:     TMP\n",
    @@ -950,25 +2385,44 @@
            "    precision:     2\n",
            "    statistic:     Individual Obs\n",
            "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • time
    ()
    datetime64[ns]
    2013-01-01
    long_name :
    Time
    standard_name :
    time
    array('2013-01-01T00:00:00.000000000', dtype='datetime64[ns]')
    • lon
      PandasIndex
      PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
      +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
      +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
      +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
      +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
      +       "       325.0, 327.5, 330.0],\n",
      +       "      dtype='float32', name='lon'))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", + "\n", + "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", + " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", + " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", + " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", + " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", + " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", + " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", + " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", + " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ],\n", + " dtype=float32)\n", "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " lat float32 50.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + " time datetime64[ns] 2013-01-01\n", "Attributes:\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", @@ -983,27 +2437,19 @@ " var_desc: Air temperature" ] }, - "execution_count": 3, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air" - ] - }, - { - "cell_type": "markdown", - "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", - "metadata": {}, - "source": [ - "## Data load for repr" + "ds[\"air\"].isel(time=0, lat=10).load()" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "00205e73-9b43-4254-9cba-f75435251391", + "execution_count": 29, + "id": "80aa6892-8c7f-44b3-bd52-9795ec4ea6f3", "metadata": {}, "outputs": [ { @@ -1270,6 +2716,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -1291,14 +2742,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -1308,13 +2761,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -1352,7 +2808,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -1361,82 +2818,21 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (lon: 53)>\n",
    -       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    -       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    -       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    -       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    -       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    -       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    -       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    -       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    -       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ], dtype=float32)\n",
    -       "Coordinates:\n",
    -       "    lat      float32 50.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "    time     datetime64[ns] 2013-01-01\n",
    -       "Attributes:\n",
    -       "    GRIB_id:       11\n",
    -       "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    -       "    dataset:       NMC Reanalysis\n",
    -       "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
    -       "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
    " + "
    <xarray.DataArray 'scalar' ()>\n",
    +       "[1 values with dtype=float64]
    " ], "text/plain": [ - "\n", - "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", - " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", - " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", - " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", - " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", - " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", - " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", - " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", - " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ], dtype=float32)\n", - "Coordinates:\n", - " lat float32 50.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " time datetime64[ns] 2013-01-01\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", - " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + "\n", + "[1 values with dtype=float64]" ] }, - "execution_count": 4, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds[\"air\"].isel(time=0, lat=10)" + "ds.scalar" ] }, { @@ -1449,17 +2845,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 32, "id": "1b34a68a-a6b3-4273-bf7c-28814ebfce11", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy._core.core.ndarray" + "cupy.ndarray" ] }, - "execution_count": 8, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1470,17 +2866,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 33, "id": "db69559c-1fde-4b3b-914d-87d8437ec256", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy._core.core.ndarray" + "cupy.ndarray" ] }, - "execution_count": 9, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1499,7 +2895,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 34, "id": "457a612e-04cb-4ffa-8cda-f4371b33bda8", "metadata": {}, "outputs": [ @@ -1767,6 +3163,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -1788,14 +3189,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -1805,13 +3208,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -1849,7 +3255,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -1915,7 +3322,7 @@ " precision: 2\n", " statistic: Individual Obs\n", " units: degK\n", - " var_desc: Air temperature
    • lat
      PandasIndex
      PandasIndex(Index([75.0, 72.5, 70.0, 67.5, 65.0, 62.5, 60.0, 57.5, 55.0, 52.5, 50.0, 47.5,\n",
      +       "       45.0, 42.5, 40.0, 37.5, 35.0, 32.5, 30.0, 27.5, 25.0, 22.5, 20.0, 17.5,\n",
      +       "       15.0],\n",
      +       "      dtype='float32', name='lat'))
    • lon
      PandasIndex
      PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
      +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
      +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
      +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
      +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
      +       "       325.0, 327.5, 330.0],\n",
      +       "      dtype='float32', name='lon'))
    • time
      PandasIndex
      PandasIndex(DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 06:00:00',\n",
      +       "               '2013-01-01 12:00:00', '2013-01-01 18:00:00',\n",
      +       "               '2013-01-02 00:00:00', '2013-01-02 06:00:00',\n",
      +       "               '2013-01-02 12:00:00', '2013-01-02 18:00:00',\n",
      +       "               '2013-01-03 00:00:00', '2013-01-03 06:00:00',\n",
      +       "               ...\n",
      +       "               '2014-12-29 12:00:00', '2014-12-29 18:00:00',\n",
      +       "               '2014-12-30 00:00:00', '2014-12-30 06:00:00',\n",
      +       "               '2014-12-30 12:00:00', '2014-12-30 18:00:00',\n",
      +       "               '2014-12-31 00:00:00', '2014-12-31 06:00:00',\n",
      +       "               '2014-12-31 12:00:00', '2014-12-31 18:00:00'],\n",
      +       "              dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ "\n", @@ -2028,7 +3455,7 @@ " var_desc: Air temperature" ] }, - "execution_count": 10, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -2039,7 +3466,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 35, "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", "metadata": {}, "outputs": [ @@ -2049,7 +3476,7 @@ "numpy.ndarray" ] }, - "execution_count": 11, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -2060,17 +3487,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 36, "id": "140fe3e2-ea9b-445d-8401-5c624384c182", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy._core.core.ndarray" + "cupy.ndarray" ] }, - "execution_count": 12, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -2081,21 +3508,16 @@ }, { "cell_type": "markdown", - "id": "1c07c449-bc43-490a-ac38-11e93200133d", + "id": "cab539a7-d952-4b38-b515-712c52c62501", "metadata": {}, "source": [ - "## GroupBy with flox\n", - "\n", - "Requires\n", - "\n", - "1. flox main branch?\n", - "2. https://github.com/ml31415/numpy-groupies/pull/63" + "## Chunk with dask" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "c292cf77-c99e-40fa-8cad-d8914c346b29", + "execution_count": 37, + "id": "68f93bfe-fe56-488a-a10b-dc4f48029367", "metadata": {}, "outputs": [ { @@ -2362,6 +3784,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -2383,14 +3810,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -2400,13 +3829,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -2444,7 +3876,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -2453,164 +3886,223 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (month: 12, lat: 25, lon: 53)>\n",
    -       "array([[[246.34975, 246.38591, 246.21495, ..., 243.06096, 244.08774,\n",
    -       "         245.64653],\n",
    -       "        [248.8575 , 248.90729, 248.71028, ..., 241.52846, 243.50845,\n",
    -       "         246.7545 ],\n",
    -       "        [251.57713, 251.19649, 250.71451, ..., 243.39871, 246.78438,\n",
    -       "         251.56555],\n",
    -       "        ...,\n",
    -       "        [295.8504 , 295.24423, 295.2271 , ..., 295.1864 , 294.65726,\n",
    -       "         294.04868],\n",
    -       "        [296.54468, 296.47   , 296.16025, ..., 295.35614, 295.0814 ,\n",
    -       "         294.53015],\n",
    -       "        [297.15448, 297.23843, 297.04913, ..., 296.01816, 295.7758 ,\n",
    -       "         295.63678]],\n",
    -       "\n",
    -       "       [[246.6771 , 246.40562, 245.94829, ..., 241.85826, 243.00186,\n",
    -       "         244.44365],\n",
    -       "        [247.79994, 247.75986, 247.4774 , ..., 240.64687, 242.26611,\n",
    -       "         245.06642],\n",
    -       "        [249.07076, 248.57227, 247.94246, ..., 242.42851, 245.33334,\n",
    -       "         249.72244],\n",
    -       "...\n",
    -       "        [297.8426 , 297.14062, 296.98776, ..., 297.96893, 297.56882,\n",
    -       "         297.16125],\n",
    -       "        [298.58783, 298.4203 , 297.96912, ..., 298.16418, 298.194  ,\n",
    -       "         297.90833],\n",
    -       "        [298.81146, 298.8566 , 298.6211 , ..., 298.7296 , 298.7519 ,\n",
    -       "         298.81894]],\n",
    -       "\n",
    -       "       [[247.97087, 248.02097, 247.91281, ..., 239.77168, 241.02357,\n",
    -       "         242.62805],\n",
    -       "        [249.73338, 250.16028, 250.48564, ..., 238.78947, 240.96451,\n",
    -       "         244.11601],\n",
    -       "        [252.02939, 251.53108, 251.36617, ..., 238.07532, 241.91273,\n",
    -       "         247.06967],\n",
    -       "        ...,\n",
    -       "        [296.76517, 295.97668, 295.88925, ..., 296.456  , 296.09137,\n",
    -       "         295.65768],\n",
    -       "        [297.46817, 297.38034, 297.0443 , ..., 296.85565, 296.84668,\n",
    -       "         296.52142],\n",
    -       "        [297.88092, 297.98676, 297.7755 , ..., 297.60034, 297.5654 ,\n",
    -       "         297.53772]]], dtype=float32)\n",
    +       "
    <xarray.Dataset>\n",
    +       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
            "Coordinates:\n",
            "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * month    (month) int64 1 2 3 4 5 6 7 8 9 10 11 12
  • time
    (time)
    datetime64[ns]
    2013-01-01 ... 2014-12-31T18:00:00
    long_name :
    Time
    standard_name :
    time
    array(['2013-01-01T00:00:00.000000000', '2013-01-01T06:00:00.000000000',\n",
    +       "       '2013-01-01T12:00:00.000000000', ..., '2014-12-31T06:00:00.000000000',\n",
    +       "       '2014-12-31T12:00:00.000000000', '2014-12-31T18:00:00.000000000'],\n",
    +       "      dtype='datetime64[ns]')
    • air
      (time, lat, lon)
      float32
      dask.array<chunksize=(10, 25, 53), meta=np.ndarray>
      GRIB_id :
      11
      GRIB_name :
      TMP
      actual_range :
      [185.16000366210938, 322.1000061035156]
      dataset :
      NMC Reanalysis
      level_desc :
      Surface
      long_name :
      4xDaily Air temperature at sigma level 995
      parent_stat :
      Other
      precision :
      2
      statistic :
      Individual Obs
      units :
      degK
      var_desc :
      Air temperature
      \n", + " \n", + " \n", + " \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      Array Chunk
      Bytes 14.76 MiB 51.76 kiB
      Shape (2920, 25, 53) (10, 25, 53)
      Dask graph 292 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", + "
      \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " 53\n", + " 25\n", + " 2920\n", + "\n", + "
    • scalar
      ()
      float64
      ...
      [1 values with dtype=float64]
    • lat
      PandasIndex
      PandasIndex(Index([75.0, 72.5, 70.0, 67.5, 65.0, 62.5, 60.0, 57.5, 55.0, 52.5, 50.0, 47.5,\n",
      +       "       45.0, 42.5, 40.0, 37.5, 35.0, 32.5, 30.0, 27.5, 25.0, 22.5, 20.0, 17.5,\n",
      +       "       15.0],\n",
      +       "      dtype='float32', name='lat'))
    • lon
      PandasIndex
      PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
      +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
      +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
      +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
      +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
      +       "       325.0, 327.5, 330.0],\n",
      +       "      dtype='float32', name='lon'))
    • time
      PandasIndex
      PandasIndex(DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 06:00:00',\n",
      +       "               '2013-01-01 12:00:00', '2013-01-01 18:00:00',\n",
      +       "               '2013-01-02 00:00:00', '2013-01-02 06:00:00',\n",
      +       "               '2013-01-02 12:00:00', '2013-01-02 18:00:00',\n",
      +       "               '2013-01-03 00:00:00', '2013-01-03 06:00:00',\n",
      +       "               ...\n",
      +       "               '2014-12-29 12:00:00', '2014-12-29 18:00:00',\n",
      +       "               '2014-12-30 00:00:00', '2014-12-30 06:00:00',\n",
      +       "               '2014-12-30 12:00:00', '2014-12-30 18:00:00',\n",
      +       "               '2014-12-31 00:00:00', '2014-12-31 06:00:00',\n",
      +       "               '2014-12-31 12:00:00', '2014-12-31 18:00:00'],\n",
      +       "              dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • Conventions :
    COARDS
    description :
    Data is from NMC initialized reanalysis\n", + "(4x/day). These are the 0.9950 sigma level values.
    platform :
    Model
    references :
    http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html
    title :
    4x daily NMC reanalysis (1948)
  • " ], "text/plain": [ - "\n", - "array([[[246.34975, 246.38591, 246.21495, ..., 243.06096, 244.08774,\n", - " 245.64653],\n", - " [248.8575 , 248.90729, 248.71028, ..., 241.52846, 243.50845,\n", - " 246.7545 ],\n", - " [251.57713, 251.19649, 250.71451, ..., 243.39871, 246.78438,\n", - " 251.56555],\n", - " ...,\n", - " [295.8504 , 295.24423, 295.2271 , ..., 295.1864 , 294.65726,\n", - " 294.04868],\n", - " [296.54468, 296.47 , 296.16025, ..., 295.35614, 295.0814 ,\n", - " 294.53015],\n", - " [297.15448, 297.23843, 297.04913, ..., 296.01816, 295.7758 ,\n", - " 295.63678]],\n", - "\n", - " [[246.6771 , 246.40562, 245.94829, ..., 241.85826, 243.00186,\n", - " 244.44365],\n", - " [247.79994, 247.75986, 247.4774 , ..., 240.64687, 242.26611,\n", - " 245.06642],\n", - " [249.07076, 248.57227, 247.94246, ..., 242.42851, 245.33334,\n", - " 249.72244],\n", - "...\n", - " [297.8426 , 297.14062, 296.98776, ..., 297.96893, 297.56882,\n", - " 297.16125],\n", - " [298.58783, 298.4203 , 297.96912, ..., 298.16418, 298.194 ,\n", - " 297.90833],\n", - " [298.81146, 298.8566 , 298.6211 , ..., 298.7296 , 298.7519 ,\n", - " 298.81894]],\n", - "\n", - " [[247.97087, 248.02097, 247.91281, ..., 239.77168, 241.02357,\n", - " 242.62805],\n", - " [249.73338, 250.16028, 250.48564, ..., 238.78947, 240.96451,\n", - " 244.11601],\n", - " [252.02939, 251.53108, 251.36617, ..., 238.07532, 241.91273,\n", - " 247.06967],\n", - " ...,\n", - " [296.76517, 295.97668, 295.88925, ..., 296.456 , 296.09137,\n", - " 295.65768],\n", - " [297.46817, 297.38034, 297.0443 , ..., 296.85565, 296.84668,\n", - " 296.52142],\n", - " [297.88092, 297.98676, 297.7755 , ..., 297.60034, 297.5654 ,\n", - " 297.53772]]], dtype=float32)\n", + "\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", "Coordinates:\n", " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * month (month) int64 1 2 3 4 5 6 7 8 9 10 11 12" + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 dask.array\n", + " scalar float64 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", + " title: 4x daily NMC reanalysis (1948)" ] }, - "execution_count": 25, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air.groupby(\"time.month\").mean(engine=\"numpy\")" + "ds.chunk(time=10)" + ] + }, + { + "cell_type": "markdown", + "id": "1c07c449-bc43-490a-ac38-11e93200133d", + "metadata": {}, + "source": [ + "## GroupBy with flox\n", + "\n", + "Requires\n", + "\n", + "1. flox main branch?\n", + "2. https://github.com/ml31415/numpy-groupies/pull/63" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c292cf77-c99e-40fa-8cad-d8914c346b29", + "metadata": {}, + "outputs": [], + "source": [ + "ds.air.groupby(\"time.month\").mean()" ] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:miniconda3-kvikio_nightly]", + "display_name": "miniconda3-kvikio_env", "language": "python", - "name": "conda-env-miniconda3-kvikio_nightly-py" + "name": "conda-env-miniconda3-kvikio_env-py" }, "language_info": { "codemirror_mode": { @@ -2622,7 +4114,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.16" }, "widgets": { "application/vnd.jupyter.widget-state+json": { From 87cb74eaad7aff14d6ec74a0962d2143967875f6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 22 Aug 2023 12:09:57 -0600 Subject: [PATCH 08/30] Updated notebook --- docs/kvikio.ipynb | 1648 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 1275 insertions(+), 373 deletions(-) diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 4867878..09c5394 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -5,7 +5,11 @@ "id": "5920bb97-1d76-4363-9aee-d1c5cd395409", "metadata": {}, "source": [ - "# Kvikio demo" + "# Kvikio demo\n", + "\n", + "Requires\n", + "- [ ] https://github.com/pydata/xarray/pull/8100\n", + "- [ ] Some updates to `dask.array.core.getter`" ] }, { @@ -20,22 +24,25 @@ "name": "stdout", "output_type": "stream", "text": [ - "flox : 0.7.3.dev12+g796dcd2\n", - "json : 2.0.9\n", - "xarray : 2023.7.0\n", - "cupy_xarray : 0.1.1+21.gd2da1e4.dirty\n", + "Exception reporting mode: Minimal\n", "kvikio : 23.2.0\n", - "zarr : 2.16.0\n", + "xarray : 2022.6.1.dev458+g83c2919b2\n", + "numpy_groupies: 0.9.22+2.gd148074\n", + "json : 2.0.9\n", "numpy : 1.24.4\n", + "flox : 0.7.3.dev12+g796dcd2\n", + "zarr : 2.16.1\n", + "dask : 2023.8.1\n", + "cupy_xarray : 0.1.1+21.gd2da1e4.dirty\n", "sys : 3.9.17 | packaged by conda-forge | (main, Aug 10 2023, 07:02:31) \n", "[GCC 12.3.0]\n", - "numpy_groupies: 0.9.22+2.gd148074\n", "\n" ] } ], "source": [ "%load_ext watermark\n", + "%xmode minimal\n", "\n", "# These imports are currently unnecessary. I import them to show versions\n", "# cupy_xarray registers the kvikio entrypoint on install.\n", @@ -50,6 +57,9 @@ "import xarray as xr\n", "import zarr\n", "\n", + "import dask\n", + "dask.config.set(scheduler=\"sync\")\n", + "\n", "store = \"./air-temperature.zarr\"\n", "\n", "%watermark -iv" @@ -86,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 3, "id": "81b2e5cb-4b2d-4a31-b7a0-961aadbc321d", "metadata": {}, "outputs": [ @@ -94,7 +104,43 @@ "name": "stdout", "output_type": "stream", "text": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + "> \u001b[0;32m/glade/u/home/dcherian/python/xarray/xarray/core/indexing.py\u001b[0m(485)\u001b[0;36m__array__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m 484 \u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m--> 485 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_duck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 486 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\n" + ] + }, + { + "name": "stdin", + "output_type": "stream", + "text": [ + "ipdb> c\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> \u001b[0;32m/glade/u/home/dcherian/python/xarray/xarray/core/indexing.py\u001b[0m(485)\u001b[0;36m__array__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m 484 \u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m--> 485 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_duck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 486 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\n" + ] + }, + { + "name": "stdin", + "output_type": "stream", + "text": [ + "ipdb> c\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" ] }, { @@ -477,26 +523,26 @@ " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", " platform: Model\n", " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" ], "text/plain": [ @@ -528,7 +574,7 @@ " title: 4x daily NMC reanalysis (1948)" ] }, - "execution_count": 23, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -574,7 +620,10 @@ { "cell_type": "markdown", "id": "883d5507-988f-453a-b576-87bb563b540f", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ "## Test opening\n", "\n", @@ -583,7 +632,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 4, "id": "58063142-b69b-46a5-9e4d-a83944e57857", "metadata": {}, "outputs": [ @@ -970,26 +1019,26 @@ " precision: 2\n", " statistic: Individual Obs\n", " units: degK\n", - " var_desc: Air temperature
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ "\n", @@ -1023,7 +1072,7 @@ " var_desc: Air temperature" ] }, - "execution_count": 24, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1045,7 +1094,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", "metadata": {}, "outputs": [ @@ -1053,7 +1102,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" ] }, { @@ -1436,26 +1485,26 @@ " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", " platform: Model\n", " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" ], "text/plain": [ @@ -1487,7 +1536,7 @@ " title: 4x daily NMC reanalysis (1948)" ] }, - "execution_count": 25, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -1501,38 +1550,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 6, "id": "6c939a04-1588-4693-9483-c6ad7152951a", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=, key=BasicIndexer(()))))" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.scalar.variable._data" - ] - }, - { - "cell_type": "markdown", - "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", - "metadata": {}, - "source": [ - "## Lazy reading" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "1ecc39b1-b788-4831-9160-5b35afb83598", - "metadata": {}, "outputs": [ { "data": { @@ -1900,97 +1920,38 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "[3869000 values with dtype=float32]\n",
    -       "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    -       "Attributes:\n",
    -       "    GRIB_id:       11\n",
    -       "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    -       "    dataset:       NMC Reanalysis\n",
    -       "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
    -       "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
    " + "
    <xarray.DataArray 'scalar' ()>\n",
    +       "[1 values with dtype=float64]
    " ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", - "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", - " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + "\n", + "[1 values with dtype=float64]" ] }, - "execution_count": 27, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air" + "ds.scalar" ] }, { "cell_type": "markdown", - "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", - "metadata": {}, + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "## Data load for repr" + "## Lazy reading" ] }, { "cell_type": "code", - "execution_count": 31, - "id": "00205e73-9b43-4254-9cba-f75435251391", + "execution_count": 7, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", "metadata": {}, "outputs": [ { @@ -2359,21 +2320,12 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (lon: 53)>\n",
    -       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    -       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    -       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    -       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    -       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    -       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    -       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    -       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    -       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ],\n",
    -       "      dtype=float32)\n",
    +       "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    +       "[3869000 values with dtype=float32]\n",
            "Coordinates:\n",
    -       "    lat      float32 50.0\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "    time     datetime64[ns] 2013-01-01\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
            "Attributes:\n",
            "    GRIB_id:       11\n",
            "    GRIB_name:     TMP\n",
    @@ -2385,44 +2337,45 @@
            "    precision:     2\n",
            "    statistic:     Individual Obs\n",
            "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • time
    PandasIndex
    PandasIndex(DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 06:00:00',\n",
    +       "               '2013-01-01 12:00:00', '2013-01-01 18:00:00',\n",
    +       "               '2013-01-02 00:00:00', '2013-01-02 06:00:00',\n",
    +       "               '2013-01-02 12:00:00', '2013-01-02 18:00:00',\n",
    +       "               '2013-01-03 00:00:00', '2013-01-03 06:00:00',\n",
    +       "               ...\n",
    +       "               '2014-12-29 12:00:00', '2014-12-29 18:00:00',\n",
    +       "               '2014-12-30 00:00:00', '2014-12-30 06:00:00',\n",
    +       "               '2014-12-30 12:00:00', '2014-12-30 18:00:00',\n",
    +       "               '2014-12-31 00:00:00', '2014-12-31 06:00:00',\n",
    +       "               '2014-12-31 12:00:00', '2014-12-31 18:00:00'],\n",
    +       "              dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ - "\n", - "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", - " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", - " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", - " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", - " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", - " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", - " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", - " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", - " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ],\n", - " dtype=float32)\n", + "\n", + "[3869000 values with dtype=float32]\n", "Coordinates:\n", - " lat float32 50.0\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " time datetime64[ns] 2013-01-01\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", "Attributes:\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", @@ -2437,19 +2390,30 @@ " var_desc: Air temperature" ] }, - "execution_count": 31, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds[\"air\"].isel(time=0, lat=10).load()" + "ds.air" + ] + }, + { + "cell_type": "markdown", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Data load for repr" ] }, { "cell_type": "code", - "execution_count": 29, - "id": "80aa6892-8c7f-44b3-bd52-9795ec4ea6f3", + "execution_count": 8, + "id": "00205e73-9b43-4254-9cba-f75435251391", "metadata": {}, "outputs": [ { @@ -2818,55 +2782,968 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'scalar' ()>\n",
    -       "[1 values with dtype=float64]
    " + "
    <xarray.DataArray 'air' (lon: 53)>\n",
    +       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    +       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    +       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    +       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    +       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    +       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    +       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    +       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    +       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ],\n",
    +       "      dtype=float32)\n",
    +       "Coordinates:\n",
    +       "    lat      float32 50.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "    time     datetime64[ns] 2013-01-01\n",
    +       "Attributes:\n",
    +       "    GRIB_id:       11\n",
    +       "    GRIB_name:     TMP\n",
    +       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    dataset:       NMC Reanalysis\n",
    +       "    level_desc:    Surface\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    parent_stat:   Other\n",
    +       "    precision:     2\n",
    +       "    statistic:     Individual Obs\n",
    +       "    units:         degK\n",
    +       "    var_desc:      Air temperature
    " ], "text/plain": [ - "\n", - "[1 values with dtype=float64]" + "\n", + "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", + " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", + " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", + " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", + " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", + " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", + " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", + " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", + " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ],\n", + " dtype=float32)\n", + "Coordinates:\n", + " lat float32 50.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " time datetime64[ns] 2013-01-01\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[\"air\"].isel(time=0, lat=10).load()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "80aa6892-8c7f-44b3-bd52-9795ec4ea6f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.DataArray 'scalar' ()>\n",
    +       "[1 values with dtype=float64]
    " + ], + "text/plain": [ + "\n", + "[1 values with dtype=float64]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.scalar" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ba48a2c0-96e0-41d7-9e07-381e05e8dc33", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    +       "[3869000 values with dtype=float32]\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "Attributes:\n",
    +       "    GRIB_id:       11\n",
    +       "    GRIB_name:     TMP\n",
    +       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    dataset:       NMC Reanalysis\n",
    +       "    level_desc:    Surface\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    parent_stat:   Other\n",
    +       "    precision:     2\n",
    +       "    statistic:     Individual Obs\n",
    +       "    units:         degK\n",
    +       "    var_desc:      Air temperature
    " + ], + "text/plain": [ + "\n", + "[3869000 values with dtype=float32]\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" ] }, - "execution_count": 29, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.scalar" + "ds.air" ] }, { "cell_type": "markdown", "id": "d0ea31d2-6c52-4346-b489-fc1e43200213", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ "## CuPy array on load" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 11, "id": "1b34a68a-a6b3-4273-bf7c-28814ebfce11", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy.ndarray" + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((0, 10, slice(None, None, None))))))" ] }, - "execution_count": 32, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "type(ds[\"air\"].isel(time=0, lat=10).data)" + "ds[\"air\"].isel(time=0, lat=10).variable._data" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 12, "id": "db69559c-1fde-4b3b-914d-87d8437ec256", "metadata": {}, "outputs": [ @@ -2876,7 +3753,7 @@ "cupy.ndarray" ] }, - "execution_count": 33, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -2888,15 +3765,18 @@ { "cell_type": "markdown", "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ "## Load to host" ] }, { "cell_type": "code", - "execution_count": 34, - "id": "457a612e-04cb-4ffa-8cda-f4371b33bda8", + "execution_count": 13, + "id": "09b40d7d-ed38-4a50-af11-c2e5f0242a97", "metadata": {}, "outputs": [ { @@ -3266,47 +4146,7 @@ " fill: currentColor;\n", "}\n", "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "array([[[241.2    , 242.5    , 243.5    , ..., 232.79999, 235.5    ,\n",
    -       "         238.59999],\n",
    -       "        [243.79999, 244.5    , 244.7    , ..., 232.79999, 235.29999,\n",
    -       "         239.29999],\n",
    -       "        [250.     , 249.79999, 248.89   , ..., 233.2    , 236.39   ,\n",
    -       "         241.7    ],\n",
    -       "        ...,\n",
    -       "        [296.6    , 296.19998, 296.4    , ..., 295.4    , 295.1    ,\n",
    -       "         294.69998],\n",
    -       "        [295.9    , 296.19998, 296.79   , ..., 295.9    , 295.9    ,\n",
    -       "         295.19998],\n",
    -       "        [296.29   , 296.79   , 297.1    , ..., 296.9    , 296.79   ,\n",
    -       "         296.6    ]],\n",
    -       "\n",
    -       "       [[242.09999, 242.7    , 243.09999, ..., 232.     , 233.59999,\n",
    -       "         235.79999],\n",
    -       "        [243.59999, 244.09999, 244.2    , ..., 231.     , 232.5    ,\n",
    -       "         235.7    ],\n",
    -       "        [253.2    , 252.89   , 252.09999, ..., 230.79999, 233.39   ,\n",
    -       "         238.5    ],\n",
    -       "...\n",
    -       "        [293.69   , 293.88998, 295.38998, ..., 295.09   , 294.69   ,\n",
    -       "         294.29   ],\n",
    -       "        [296.29   , 297.19   , 297.59   , ..., 295.29   , 295.09   ,\n",
    -       "         294.38998],\n",
    -       "        [297.79   , 298.38998, 298.49   , ..., 295.69   , 295.49   ,\n",
    -       "         295.19   ]],\n",
    -       "\n",
    -       "       [[245.09   , 244.29   , 243.29   , ..., 241.68999, 241.48999,\n",
    -       "         241.79   ],\n",
    -       "        [249.89   , 249.29   , 248.39   , ..., 239.59   , 240.29   ,\n",
    -       "         241.68999],\n",
    -       "        [262.99   , 262.19   , 261.38998, ..., 239.89   , 242.59   ,\n",
    -       "         246.29   ],\n",
    -       "        ...,\n",
    -       "        [293.79   , 293.69   , 295.09   , ..., 295.29   , 295.09   ,\n",
    -       "         294.69   ],\n",
    -       "        [296.09   , 296.88998, 297.19   , ..., 295.69   , 295.69   ,\n",
    -       "         295.19   ],\n",
    -       "        [297.69   , 298.09   , 298.09   , ..., 296.49   , 296.19   ,\n",
    -       "         295.69   ]]], dtype=float32)\n",
    +       "[3869000 values with dtype=float32]\n",
            "Coordinates:\n",
            "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    @@ -3322,66 +4162,26 @@
            "    precision:     2\n",
            "    statistic:     Individual Obs\n",
            "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ "\n", - "array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n", - " 238.59999],\n", - " [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n", - " 239.29999],\n", - " [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n", - " 241.7 ],\n", - " ...,\n", - " [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n", - " 294.69998],\n", - " [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n", - " 295.19998],\n", - " [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n", - " 296.6 ]],\n", - "\n", - " [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n", - " 235.79999],\n", - " [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n", - " 235.7 ],\n", - " [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n", - " 238.5 ],\n", - "...\n", - " [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n", - " 294.29 ],\n", - " [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n", - " 294.38998],\n", - " [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n", - " 295.19 ]],\n", - "\n", - " [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n", - " 241.79 ],\n", - " [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n", - " 241.68999],\n", - " [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n", - " 246.29 ],\n", - " ...,\n", - " [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n", - " 294.69 ],\n", - " [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n", - " 295.19 ],\n", - " [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n", - " 295.69 ]]], dtype=float32)\n", + "[3869000 values with dtype=float32]\n", "Coordinates:\n", " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", @@ -3455,18 +4215,18 @@ " var_desc: Air temperature" ] }, - "execution_count": 34, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air.as_numpy()" + "ds.air" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 14, "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", "metadata": {}, "outputs": [ @@ -3476,7 +4236,7 @@ "numpy.ndarray" ] }, - "execution_count": 35, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -3487,7 +4247,28 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 15, + "id": "615efd76-2194-4604-9ab8-61499e7d725d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cupy.ndarray" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ds.air.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "140fe3e2-ea9b-445d-8401-5c624384c182", "metadata": {}, "outputs": [ @@ -3497,7 +4278,7 @@ "cupy.ndarray" ] }, - "execution_count": 36, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -3509,14 +4290,24 @@ { "cell_type": "markdown", "id": "cab539a7-d952-4b38-b515-712c52c62501", + "metadata": { + "tags": [] + }, + "source": [ + "## Doesn't work: Chunk with dask" + ] + }, + { + "cell_type": "markdown", + "id": "62c084eb-8df4-4b7f-a187-a736d68d430d", "metadata": {}, "source": [ - "## Chunk with dask" + "`meta` is wrong" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 4, "id": "68f93bfe-fe56-488a-a10b-dc4f48029367", "metadata": {}, "outputs": [ @@ -3886,31 +4677,24 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    -       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
    +       "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    +       "dask.array<xarray-air, shape=(2920, 25, 53), dtype=float32, chunksize=(10, 25, 53), chunktype=numpy.ndarray>\n",
            "Coordinates:\n",
            "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
            "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    -       "Data variables:\n",
    -       "    air      (time, lat, lon) float32 dask.array<chunksize=(10, 25, 53), meta=np.ndarray>\n",
    -       "    scalar   float64 ...\n",
            "Attributes:\n",
    -       "    Conventions:  COARDS\n",
    -       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
    -       "    platform:     Model\n",
    -       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
    -       "    title:        4x daily NMC reanalysis (1948)
    " + " dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ - "\n", - "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "\n", + "dask.array\n", "Coordinates:\n", " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", - "Data variables:\n", - " air (time, lat, lon) float32 dask.array\n", - " scalar float64 ...\n", "Attributes:\n", - " Conventions: COARDS\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.chunk(time=10).air" + ] + }, + { + "cell_type": "markdown", + "id": "3f4c72f6-22e7-4e99-9f4e-2524d6ab4226", + "metadata": {}, + "source": [ + "`dask.array.core.getter` calls `np.asarray` on each chunk.\n", + "\n", + "This calls `ImplicitToExplicitIndexingAdapter.__array__` which calls `np.asarray(cupy.array)` which raises.\n", + "\n", + "Xarray uses `.get_duck_array` internally to remove these adapters. We might need to add\n", + "```python\n", + "# handle xarray internal classes that might wrap cupy\n", + "if hasattr(c, \"get_duck_array\"):\n", + " c = c.get_duck_array()\n", + "else:\n", + " c = np.asarray(c)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e1256d03-9701-433a-8291-80dc8dccffce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" ] }, - "execution_count": 37, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.chunk(time=10)" + "from dask.utils import is_arraylike\n", + "\n", + "data = ds.air.variable._data\n", + "is_arraylike(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "308affa5-9fb9-4638-989b-97aac2604c16", + "metadata": {}, + "outputs": [], + "source": [ + "from xarray.core.indexing import ImplicitToExplicitIndexingAdapter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "985cd2f8-406e-4e9e-8017-42efb16aa40e", + "metadata": {}, + "outputs": [], + "source": [ + "ImplicitToExplicitIndexingAdapter(data).get_duck_array()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8ef4f7-5014-476f-b4c0-ec2f9abdb6e2", + "metadata": {}, + "outputs": [], + "source": [ + "ds.chunk(time=10).air.compute()" + ] + }, + { + "cell_type": "markdown", + "id": "17dc1bf6-7548-4eee-a5f3-ebcc20d41567", + "metadata": {}, + "source": [ + "### explicit meta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdd4b4e6-d69a-4898-964a-0e6096ca1942", + "metadata": {}, + "outputs": [], + "source": [ + "import cupy as cp\n", + "\n", + "chunked = ds.chunk(time=10, from_array_kwargs={\"meta\": cp.array([])})\n", + "chunked.air" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74f80d94-ebb6-43c3-9411-79e0442d894e", + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "\n", + "chunked.compute()" ] }, { @@ -4114,7 +5016,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.17" }, "widgets": { "application/vnd.jupyter.widget-state+json": { From d7394ef7bdc941df7e418918ef2aabb7fea5839e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Aug 2023 18:10:33 +0000 Subject: [PATCH 09/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/kvikio.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 09c5394..6117eb2 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -58,6 +58,7 @@ "import zarr\n", "\n", "import dask\n", + "\n", "dask.config.set(scheduler=\"sync\")\n", "\n", "store = \"./air-temperature.zarr\"\n", From ca0cf45d1c2cca5820b0c1ee3f6813bd1519ff10 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 2 Nov 2023 21:49:07 -0600 Subject: [PATCH 10/30] Add tests --- cupy_xarray/kvikio.py | 12 ++++--- cupy_xarray/tests/test_kvikio.py | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 cupy_xarray/tests/test_kvikio.py diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 669978f..410dee4 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -20,6 +20,10 @@ has_kvikio = False +# TODO: minimum kvikio version for supporting consolidated +# TODO: minimum xarray version for ZarrArrayWrapper._array 2023.10.0? + + class DummyZarrArrayWrapper(ZarrArrayWrapper): def __init__(self, array: np.ndarray): assert isinstance(array, np.ndarray) @@ -47,11 +51,12 @@ class EagerCupyZarrArrayWrapper(ZarrArrayWrapper): """Used to wrap dimension coordinates.""" def __array__(self): - return self.datastore.zarr_group[self.variable_name][:].get() + return self._array[:].get() def get_array(self): # total hack: make a numpy array look like a Zarr array - return DummyZarrArrayWrapper(self.datastore.zarr_group[self.variable_name][:].get()) + # this gets us through Xarray's backend layers + return DummyZarrArrayWrapper(self._array[:].get()) class GDSZarrStore(ZarrStore): @@ -84,9 +89,6 @@ def open_group( ) open_kwargs["storage_options"] = storage_options - # TODO: handle consolidated - assert not consolidated - if chunk_store: open_kwargs["chunk_store"] = chunk_store if consolidated is None: diff --git a/cupy_xarray/tests/test_kvikio.py b/cupy_xarray/tests/test_kvikio.py new file mode 100644 index 0000000..de98c09 --- /dev/null +++ b/cupy_xarray/tests/test_kvikio.py @@ -0,0 +1,54 @@ +import cupy as cp +import numpy as np +import pytest +import xarray as xr + +kvikio = pytest.importorskip("kvikio") +zarr = pytest.importorskip("zarr") + +import kvikio.zarr # noqa +import xarray.core.indexing # noqa +from xarray.core.indexing import ExplicitlyIndexedNDArrayMixin + + +@pytest.fixture +def store(tmp_path): + ds = xr.Dataset( + { + "a": ("x", np.arange(10), {"foo": "bar"}), + "scalar": np.array(1), + }, + coords={"x": ("x", np.arange(-5, 5))}, + ) + + for var in ds.variables: + ds[var].encoding["compressor"] = None + + store_path = tmp_path / "kvikio.zarr" + ds.to_zarr(store_path, consolidated=True) + return store_path + + +def test_entrypoint(): + assert "kvikio" in xr.backends.list_engines() + + +@pytest.mark.parametrize("consolidated", [True, False]) +def test_lazy_load(consolidated, store): + with xr.open_dataset(store, engine="kvikio", consolidated=consolidated) as ds: + for _, da in ds.data_vars.items(): + assert isinstance(da.variable._data, ExplicitlyIndexedNDArrayMixin) + + +@pytest.mark.parametrize("indexer", [slice(None), slice(2, 4), 2, [2, 3, 5]]) +def test_lazy_indexing(indexer, store): + with xr.open_dataset(store, engine="kvikio") as ds: + ds = ds.isel(x=indexer) + for _, da in ds.data_vars.items(): + assert isinstance(da.variable._data, ExplicitlyIndexedNDArrayMixin) + + loaded = ds.compute() + for _, da in loaded.data_vars.items(): + if da.ndim == 0: + continue + assert isinstance(da.data, cp.ndarray) From 5d27b261c3880e0f93f78fba23ca2397fcf75819 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 21 Jun 2024 15:29:28 +1200 Subject: [PATCH 11/30] Move kvikio notebook under docs/source Allow it to be rendered under the User Guide section. --- docs/index.md | 1 + docs/{ => source}/kvikio.ipynb | 0 2 files changed, 1 insertion(+) rename docs/{ => source}/kvikio.ipynb (100%) diff --git a/docs/index.md b/docs/index.md index 3bbd9a0..6cc05e9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -57,6 +57,7 @@ Large parts of this documentations comes from [SciPy 2023 Xarray on GPUs tutoria source/high-level-api source/apply-ufunc source/real-example-1 + source/kvikio **Tutorials & Presentations**: diff --git a/docs/kvikio.ipynb b/docs/source/kvikio.ipynb similarity index 100% rename from docs/kvikio.ipynb rename to docs/source/kvikio.ipynb From 85491d70ccc40d6c954beb5ab28bd6f46ed57490 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 22 Jun 2024 12:13:14 +1200 Subject: [PATCH 12/30] Add zarr as a dependency in ci/doc.yml Will need it for the kvikio.zarr docs later. --- ci/doc.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/doc.yml b/ci/doc.yml index 983d20f..945f2a7 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -15,6 +15,7 @@ dependencies: - furo - myst-nb - xarray + - zarr - pip: # relative to this file. Needs to be editable to be accepted. - --editable .. From c470b975beffd5fd53ad563542b17d943ffae41e Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 22 Jun 2024 15:56:34 +1200 Subject: [PATCH 13/30] Add entry for KvikioBackendEntrypoint in API docs Create new section in the API documentation page for the kvikIO engine. Added more docstrings to the kvikio.py file, and fixed some imports so things render nicely on the API page. Also added an intersphinx link to the kvikio docs at https://docs.rapids.ai/api/kvikio/stable. --- cupy_xarray/__init__.py | 3 ++- cupy_xarray/kvikio.py | 16 ++++++++++++++-- docs/api.rst | 13 +++++++++++++ docs/conf.py | 1 + 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/cupy_xarray/__init__.py b/cupy_xarray/__init__.py index 5c3a06c..0bb96aa 100644 --- a/cupy_xarray/__init__.py +++ b/cupy_xarray/__init__.py @@ -1,4 +1,5 @@ from . import _version -from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa +from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa: F401 +from .kvikio import KvikioBackendEntrypoint # noqa: F401 __version__ = _version.get_versions()["version"] diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 410dee4..871bc27 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -1,9 +1,13 @@ +""" +:doc:`kvikIO ` backend for xarray to read Zarr stores directly into CuPy +arrays in GPU memory. +""" + import os import warnings import cupy as cp import numpy as np -import zarr from xarray import Variable from xarray.backends import zarr as zarr_backend from xarray.backends.common import _normalize_path # TODO: can this be public @@ -14,6 +18,7 @@ try: import kvikio.zarr + import zarr has_kvikio = True except ImportError: @@ -165,9 +170,16 @@ def open_store_variable(self, name, zarr_array): class KvikioBackendEntrypoint(ZarrBackendEntrypoint): + """ + Xarray backend to read Zarr stores using 'kvikio' engine. + + For more information about the underlying library, visit + :doc:`kvikIO's Zarr page`. + """ + available = has_kvikio description = "Open zarr files (.zarr) using Kvikio" - url = "https://docs.rapids.ai/api/kvikio/nightly/api.html#zarr" + url = "https://docs.rapids.ai/api/kvikio/stable/api/#zarr" # disabled by default # We need to provide this because of the subclassing from diff --git a/docs/api.rst b/docs/api.rst index c1ad4c3..67c1cba 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -51,3 +51,16 @@ Methods Dataset.cupy.as_cupy Dataset.cupy.as_numpy + + +KvikIO engine +------------- + +.. currentmodule:: cupy_xarray + +.. automodule:: cupy_xarray.kvikio + +.. autosummary:: + :toctree: generated/ + + KvikioBackendEntrypoint diff --git a/docs/conf.py b/docs/conf.py index 1ba6a75..3e2d81b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -57,6 +57,7 @@ "python": ("https://docs.python.org/3/", None), "dask": ("https://docs.dask.org/en/latest", None), "cupy": ("https://docs.cupy.dev/en/latest", None), + "kvikio": ("https://docs.rapids.ai/api/kvikio/stable", None), "xarray": ("http://docs.xarray.dev/en/latest/", None), } From 95efa180be992ab877f22781ad240222d438af00 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 22 Jun 2024 19:50:32 +1200 Subject: [PATCH 14/30] Fix input argument into CupyZarrArrayWrapper Fixes error like `TypeError: ZarrArrayWrapper.__init__() takes 2 positional arguments but 3 were given`. --- cupy_xarray/kvikio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 871bc27..078d1f3 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -152,7 +152,7 @@ def open_store_variable(self, name, zarr_array): array_wrapper = EagerCupyZarrArrayWrapper else: array_wrapper = CupyZarrArrayWrapper - data = indexing.LazilyIndexedArray(array_wrapper(name, self)) + data = indexing.LazilyIndexedArray(array_wrapper(zarr_array)) attributes = dict(attributes) encoding = { From ae2a7f1c205358d459a90eae364708f12b4bb918 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:37:35 +0000 Subject: [PATCH 15/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cupy_xarray/kvikio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 078d1f3..fd6cb29 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -163,7 +163,7 @@ def open_store_variable(self, name, zarr_array): } # _FillValue needs to be in attributes, not encoding, so it will get # picked up by decode_cf - if getattr(zarr_array, "fill_value") is not None: + if zarr_array.fill_value is not None: attributes["_FillValue"] = zarr_array.fill_value return Variable(dimensions, data, attributes, encoding) From 15fbafd9410057b19015bc04f5c98883058961a7 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 14 Dec 2024 11:06:07 -0500 Subject: [PATCH 16/30] Re-add kvikio backend entrypoint to pyproject.toml Fix improper merge handling on d684dadc160f997ae7a6c72c73c61839d257f002 --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index d98b3fe..2d5094e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,9 @@ test = [ "pytest", ] +[project.entry-points."xarray.backends"] +kvikio = "cupy_xarray.kvikio:KvikioBackendEntrypoint" + [tool.ruff] line-length = 100 # E501 (line-too-long) exclude = [ From f3df115f2db7516cac43c9cb8d32ac817a3630cb Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 14 Dec 2024 11:20:25 -0500 Subject: [PATCH 17/30] Fix C408 and E402 --- cupy_xarray/kvikio.py | 12 ++++++------ cupy_xarray/tests/test_kvikio.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index fd6cb29..f82a07f 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -85,13 +85,13 @@ def open_group( if isinstance(store, os.PathLike): store = os.fspath(store) - open_kwargs = dict( - mode=mode, - synchronizer=synchronizer, - path=group, + open_kwargs = { + "mode": mode, + "synchronizer": synchronizer, + "path": group, ########## NEW STUFF - meta_array=cp.empty(()), - ) + "meta_array": cp.empty(()), + } open_kwargs["storage_options"] = storage_options if chunk_store: diff --git a/cupy_xarray/tests/test_kvikio.py b/cupy_xarray/tests/test_kvikio.py index de98c09..ba64fbb 100644 --- a/cupy_xarray/tests/test_kvikio.py +++ b/cupy_xarray/tests/test_kvikio.py @@ -2,13 +2,13 @@ import numpy as np import pytest import xarray as xr +from xarray.core.indexing import ExplicitlyIndexedNDArrayMixin kvikio = pytest.importorskip("kvikio") zarr = pytest.importorskip("zarr") import kvikio.zarr # noqa import xarray.core.indexing # noqa -from xarray.core.indexing import ExplicitlyIndexedNDArrayMixin @pytest.fixture From 4e1857ac88b10062d8ae9aa95a112b34c3b29a84 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:26:02 -0500 Subject: [PATCH 18/30] Use get_duck_array instead of get_array Fix `TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly` on https://github.com/pydata/xarray/blob/v2024.11.0/xarray/core/indexing.py#L578 --- cupy_xarray/kvikio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index f82a07f..aa2e846 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -58,7 +58,7 @@ class EagerCupyZarrArrayWrapper(ZarrArrayWrapper): def __array__(self): return self._array[:].get() - def get_array(self): + def get_duck_array(self): # total hack: make a numpy array look like a Zarr array # this gets us through Xarray's backend layers return DummyZarrArrayWrapper(self._array[:].get()) From 7345b617092a24f9af81f6cbfcbcca5cd63ad2fc Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:24:14 +1300 Subject: [PATCH 19/30] Fix SIM108 Use ternary operator --- cupy_xarray/kvikio.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index aa2e846..e5b7c6d 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -143,15 +143,14 @@ def open_store_variable(self, name, zarr_array): ) #### Changed from zarr array wrapper - if name in dimensions: - # we want indexed dimensions to be loaded eagerly - # Right now we load in to device and then transfer to host - # But these should be small-ish arrays - # TODO: can we tell GDSStore to load as numpy array directly - # not cupy array? - array_wrapper = EagerCupyZarrArrayWrapper - else: - array_wrapper = CupyZarrArrayWrapper + # we want indexed dimensions to be loaded eagerly + # Right now we load in to device and then transfer to host + # But these should be small-ish arrays + # TODO: can we tell GDSStore to load as numpy array directly + # not cupy array? + array_wrapper = ( + EagerCupyZarrArrayWrapper if name in dimensions else CupyZarrArrayWrapper + ) data = indexing.LazilyIndexedArray(array_wrapper(zarr_array)) attributes = dict(attributes) From e2b410e9c992c4901a43f69e4fe5b582862016b0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 22:24:47 +0000 Subject: [PATCH 20/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cupy_xarray/kvikio.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index e5b7c6d..3004ef1 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -148,9 +148,7 @@ def open_store_variable(self, name, zarr_array): # But these should be small-ish arrays # TODO: can we tell GDSStore to load as numpy array directly # not cupy array? - array_wrapper = ( - EagerCupyZarrArrayWrapper if name in dimensions else CupyZarrArrayWrapper - ) + array_wrapper = EagerCupyZarrArrayWrapper if name in dimensions else CupyZarrArrayWrapper data = indexing.LazilyIndexedArray(array_wrapper(zarr_array)) attributes = dict(attributes) From 7dd78e9b68de8baf531bf33f29510028318d0803 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 11 Mar 2025 11:50:42 +1300 Subject: [PATCH 21/30] Install nightly version of kvikio=25.04.00a and zarr>=3.0.5 Need patches from https://github.com/rapidsai/kvikio/pull/646 and https://github.com/zarr-developers/zarr-python/pull/2751. --- ci/doc.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/doc.yml b/ci/doc.yml index e4e46d6..e534502 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -3,6 +3,7 @@ channels: - conda-forge dependencies: - cupy-core + - rapidsai-nightly::kvikio>=25.04.00a - pip - python=3.10 - sphinx @@ -15,7 +16,7 @@ dependencies: - furo>=2024.8.6 - myst-nb - xarray - - zarr + - zarr>=3.0.3 - pip: # relative to this file. Needs to be editable to be accepted. - --editable .. From cb77678df14ecedbf773f5a479a433d8aba6dbc2 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 11 Mar 2025 11:57:02 +1300 Subject: [PATCH 22/30] Remove custom open_store_variable method from GDSZarrStore class Can directly rely on upstream xarray's ZarrStore.open_store_variable method since Zarr v3 compatibility was added in https://github.com/pydata/xarray/pull/9552. --- cupy_xarray/kvikio.py | 70 +------------------------------------------ 1 file changed, 1 insertion(+), 69 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 3004ef1..f4555ca 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -7,13 +7,9 @@ import warnings import cupy as cp -import numpy as np -from xarray import Variable -from xarray.backends import zarr as zarr_backend from xarray.backends.common import _normalize_path # TODO: can this be public from xarray.backends.store import StoreBackendEntrypoint -from xarray.backends.zarr import ZarrArrayWrapper, ZarrBackendEntrypoint, ZarrStore -from xarray.core import indexing +from xarray.backends.zarr import ZarrBackendEntrypoint, ZarrStore from xarray.core.utils import close_on_error # TODO: can this be public. try: @@ -29,41 +25,6 @@ # TODO: minimum xarray version for ZarrArrayWrapper._array 2023.10.0? -class DummyZarrArrayWrapper(ZarrArrayWrapper): - def __init__(self, array: np.ndarray): - assert isinstance(array, np.ndarray) - self._array = array - self.filters = None - self.dtype = array.dtype - self.shape = array.shape - - def __array__(self): - return self._array - - def get_array(self): - return self._array - - def __getitem__(self, key): - return self._array[key] - - -class CupyZarrArrayWrapper(ZarrArrayWrapper): - def __array__(self): - return self.get_array() - - -class EagerCupyZarrArrayWrapper(ZarrArrayWrapper): - """Used to wrap dimension coordinates.""" - - def __array__(self): - return self._array[:].get() - - def get_duck_array(self): - # total hack: make a numpy array look like a Zarr array - # this gets us through Xarray's backend layers - return DummyZarrArrayWrapper(self._array[:].get()) - - class GDSZarrStore(ZarrStore): @classmethod def open_group( @@ -136,35 +97,6 @@ def open_group( safe_chunks, ) - def open_store_variable(self, name, zarr_array): - try_nczarr = self._mode == "r" - dimensions, attributes = zarr_backend._get_zarr_dims_and_attrs( - zarr_array, zarr_backend.DIMENSION_KEY, try_nczarr - ) - - #### Changed from zarr array wrapper - # we want indexed dimensions to be loaded eagerly - # Right now we load in to device and then transfer to host - # But these should be small-ish arrays - # TODO: can we tell GDSStore to load as numpy array directly - # not cupy array? - array_wrapper = EagerCupyZarrArrayWrapper if name in dimensions else CupyZarrArrayWrapper - data = indexing.LazilyIndexedArray(array_wrapper(zarr_array)) - - attributes = dict(attributes) - encoding = { - "chunks": zarr_array.chunks, - "preferred_chunks": dict(zip(dimensions, zarr_array.chunks)), - "compressor": zarr_array.compressor, - "filters": zarr_array.filters, - } - # _FillValue needs to be in attributes, not encoding, so it will get - # picked up by decode_cf - if zarr_array.fill_value is not None: - attributes["_FillValue"] = zarr_array.fill_value - - return Variable(dimensions, data, attributes, encoding) - class KvikioBackendEntrypoint(ZarrBackendEntrypoint): """ From 026215148c107b4f0277a8a452c2af698fb9626e Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 11 Mar 2025 12:00:04 +1300 Subject: [PATCH 23/30] Fix UserWarning compressor -> compressors UserWarning: The `compressor` argument is deprecated. Use `compressors` instead. --- cupy_xarray/tests/test_kvikio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cupy_xarray/tests/test_kvikio.py b/cupy_xarray/tests/test_kvikio.py index ba64fbb..58e56c5 100644 --- a/cupy_xarray/tests/test_kvikio.py +++ b/cupy_xarray/tests/test_kvikio.py @@ -22,7 +22,7 @@ def store(tmp_path): ) for var in ds.variables: - ds[var].encoding["compressor"] = None + ds[var].encoding["compressors"] = None store_path = tmp_path / "kvikio.zarr" ds.to_zarr(store_path, consolidated=True) From e26ed24775e93f148629c16a10af51e2359b5b50 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 11 Mar 2025 13:21:55 +1300 Subject: [PATCH 24/30] Reuse logic from xarray.backends.zarr.ZarrStore.open_group Only difference is to pass the Zarr store's root filepath to kvikio.zarr.GDSStore instead of xarray.backends.zarr.ZarrStore. --- cupy_xarray/kvikio.py | 114 +++++++++--------------------------------- 1 file changed, 23 insertions(+), 91 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index f4555ca..01c2f8d 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -3,18 +3,14 @@ arrays in GPU memory. """ -import os -import warnings - -import cupy as cp from xarray.backends.common import _normalize_path # TODO: can this be public from xarray.backends.store import StoreBackendEntrypoint from xarray.backends.zarr import ZarrBackendEntrypoint, ZarrStore +from xarray.core.dataset import Dataset from xarray.core.utils import close_on_error # TODO: can this be public. try: import kvikio.zarr - import zarr has_kvikio = True except ImportError: @@ -25,79 +21,6 @@ # TODO: minimum xarray version for ZarrArrayWrapper._array 2023.10.0? -class GDSZarrStore(ZarrStore): - @classmethod - def open_group( - cls, - store, - mode="r", - synchronizer=None, - group=None, - consolidated=False, - consolidate_on_close=False, - chunk_store=None, - storage_options=None, - append_dim=None, - write_region=None, - safe_chunks=True, - stacklevel=2, - ): - # zarr doesn't support pathlib.Path objects yet. zarr-python#601 - if isinstance(store, os.PathLike): - store = os.fspath(store) - - open_kwargs = { - "mode": mode, - "synchronizer": synchronizer, - "path": group, - ########## NEW STUFF - "meta_array": cp.empty(()), - } - open_kwargs["storage_options"] = storage_options - - if chunk_store: - open_kwargs["chunk_store"] = chunk_store - if consolidated is None: - consolidated = False - - store = kvikio.zarr.GDSStore(store) - - if consolidated is None: - try: - zarr_group = zarr.open_consolidated(store, **open_kwargs) - except KeyError: - warnings.warn( - "Failed to open Zarr store with consolidated metadata, " - "falling back to try reading non-consolidated metadata. " - "This is typically much slower for opening a dataset. " - "To silence this warning, consider:\n" - "1. Consolidating metadata in this existing store with " - "zarr.consolidate_metadata().\n" - "2. Explicitly setting consolidated=False, to avoid trying " - "to read consolidate metadata, or\n" - "3. Explicitly setting consolidated=True, to raise an " - "error in this case instead of falling back to try " - "reading non-consolidated metadata.", - RuntimeWarning, - stacklevel=stacklevel, - ) - zarr_group = zarr.open_group(store, **open_kwargs) - elif consolidated: - # TODO: an option to pass the metadata_key keyword - zarr_group = zarr.open_consolidated(store, **open_kwargs) - else: - zarr_group = zarr.open_group(store, **open_kwargs) - - return cls( - zarr_group, - mode, - consolidate_on_close, - append_dim, - write_region, - safe_chunks, - ) - - class KvikioBackendEntrypoint(ZarrBackendEntrypoint): """ Xarray backend to read Zarr stores using 'kvikio' engine. @@ -132,20 +55,29 @@ def open_dataset( consolidated=None, chunk_store=None, storage_options=None, - stacklevel=3, - ): + zarr_version=None, + zarr_format=None, + store=None, + engine=None, + use_zarr_fill_value_as_mask=None, + cache_members: bool = True, + ) -> Dataset: filename_or_obj = _normalize_path(filename_or_obj) - store = GDSZarrStore.open_group( - filename_or_obj, - group=group, - mode=mode, - synchronizer=synchronizer, - consolidated=consolidated, - consolidate_on_close=False, - chunk_store=chunk_store, - storage_options=storage_options, - stacklevel=stacklevel + 1, - ) + if not store: + store = ZarrStore.open_group( + store=kvikio.zarr.GDSStore(root=filename_or_obj), + group=group, + mode=mode, + synchronizer=synchronizer, + consolidated=consolidated, + consolidate_on_close=False, + chunk_store=chunk_store, + storage_options=storage_options, + zarr_version=zarr_version, + use_zarr_fill_value_as_mask=None, + zarr_format=zarr_format, + cache_members=cache_members, + ) store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): From f185b448919b0850a1e0754621a77604701e0c32 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 11 Mar 2025 13:39:00 +1300 Subject: [PATCH 25/30] Install xarray=2025.1.3.dev22+g0184702f Dev version containing patch at https://github.com/pydata/xarray/pull/10078 that fixes `TypeError: NumpyIndexingAdapter only wraps np.ndarray. Trying to wrap `. --- ci/doc.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/doc.yml b/ci/doc.yml index e534502..0e068d8 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -5,7 +5,7 @@ dependencies: - cupy-core - rapidsai-nightly::kvikio>=25.04.00a - pip - - python=3.10 + - python=3.11 - sphinx - sphinx-design - sphinx-copybutton @@ -15,8 +15,8 @@ dependencies: - ipywidgets - furo>=2024.8.6 - myst-nb - - xarray - zarr>=3.0.3 - pip: + - xarray @ git+https://github.com/pydata/xarray.git@0184702f16c3f744fc9096c7dac690626dcc6922 #https://github.com/pydata/xarray/pull/10078 # relative to this file. Needs to be editable to be accepted. - --editable .. From 1a52ce5161ec6d6e7787a34bd4ed16c16a24f768 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 11 Mar 2025 14:04:25 +1300 Subject: [PATCH 26/30] Add zarr.config.enable_gpu() context manager to test_lazy_indexing Needed to return cupy.ndarray instead of numpy.ndarray objects. Should find a better place to put this `zarr.config.enable_gpu()` call later. --- cupy_xarray/tests/test_kvikio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cupy_xarray/tests/test_kvikio.py b/cupy_xarray/tests/test_kvikio.py index 58e56c5..b6fe67f 100644 --- a/cupy_xarray/tests/test_kvikio.py +++ b/cupy_xarray/tests/test_kvikio.py @@ -42,7 +42,7 @@ def test_lazy_load(consolidated, store): @pytest.mark.parametrize("indexer", [slice(None), slice(2, 4), 2, [2, 3, 5]]) def test_lazy_indexing(indexer, store): - with xr.open_dataset(store, engine="kvikio") as ds: + with zarr.config.enable_gpu(), xr.open_dataset(store, engine="kvikio") as ds: ds = ds.isel(x=indexer) for _, da in ds.data_vars.items(): assert isinstance(da.variable._data, ExplicitlyIndexedNDArrayMixin) From 789a9b6f4019ffc6aab99c2410d557400a6db8d6 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 11 Mar 2025 14:45:35 +1300 Subject: [PATCH 27/30] Refresh kvikIO demo notebook Rearranged some cells so the air-temperature.zarr store is created first. Added a couple of `zarr.config.enable_gpu()` statements in to ensure arrays are read to cupy.ndarray. Removed the flox section at the end. --- cupy_xarray/kvikio.py | 4 - docs/source/kvikio.ipynb | 2036 ++++++++++++++++++++++---------------- 2 files changed, 1169 insertions(+), 871 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 01c2f8d..2ee9331 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -17,10 +17,6 @@ has_kvikio = False -# TODO: minimum kvikio version for supporting consolidated -# TODO: minimum xarray version for ZarrArrayWrapper._array 2023.10.0? - - class KvikioBackendEntrypoint(ZarrBackendEntrypoint): """ Xarray backend to read Zarr stores using 'kvikio' engine. diff --git a/docs/source/kvikio.ipynb b/docs/source/kvikio.ipynb index 6117eb2..0a9dd45 100644 --- a/docs/source/kvikio.ipynb +++ b/docs/source/kvikio.ipynb @@ -8,8 +8,8 @@ "# Kvikio demo\n", "\n", "Requires\n", - "- [ ] https://github.com/pydata/xarray/pull/8100\n", - "- [ ] Some updates to `dask.array.core.getter`" + "- [ ] https://github.com/pydata/xarray/pull/10078\n", + "- [ ] https://github.com/rapidsai/kvikio/pull/646" ] }, { @@ -25,17 +25,11 @@ "output_type": "stream", "text": [ "Exception reporting mode: Minimal\n", - "kvikio : 23.2.0\n", - "xarray : 2022.6.1.dev458+g83c2919b2\n", - "numpy_groupies: 0.9.22+2.gd148074\n", - "json : 2.0.9\n", - "numpy : 1.24.4\n", - "flox : 0.7.3.dev12+g796dcd2\n", - "zarr : 2.16.1\n", - "dask : 2023.8.1\n", - "cupy_xarray : 0.1.1+21.gd2da1e4.dirty\n", - "sys : 3.9.17 | packaged by conda-forge | (main, Aug 10 2023, 07:02:31) \n", - "[GCC 12.3.0]\n", + "numpy : 2.2.3\n", + "zarr : 3.0.5\n", + "cupy_xarray: 0.1.4+36.ge26ed24.dirty\n", + "kvikio : 25.4.0\n", + "xarray : 2025.1.3.dev22+g0184702f\n", "\n" ] } @@ -44,25 +38,13 @@ "%load_ext watermark\n", "%xmode minimal\n", "\n", - "# These imports are currently unnecessary. I import them to show versions\n", - "# cupy_xarray registers the kvikio entrypoint on install.\n", - "# import cupy as cp\n", - "# import cudf\n", "import cupy_xarray # registers cupy accessor\n", "import kvikio.zarr\n", "\n", - "import flox\n", - "import numpy_groupies\n", "import numpy as np\n", "import xarray as xr\n", "import zarr\n", "\n", - "import dask\n", - "\n", - "dask.config.set(scheduler=\"sync\")\n", - "\n", - "store = \"./air-temperature.zarr\"\n", - "\n", "%watermark -iv" ] }, @@ -75,9 +57,12 @@ { "data": { "text/plain": [ - "{'kvikio': \n", + "{'netcdf4': \n", + " Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray\n", + " Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.NetCDF4BackendEntrypoint.html,\n", + " 'kvikio': \n", " Open zarr files (.zarr) using Kvikio\n", - " Learn more at https://docs.rapids.ai/api/kvikio/nightly/api.html#zarr,\n", + " Learn more at https://docs.rapids.ai/api/kvikio/stable/api/#zarr,\n", " 'store': \n", " Open AbstractDataStore instances in Xarray\n", " Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.StoreBackendEntrypoint.html,\n", @@ -95,53 +80,96 @@ "xr.backends.list_engines()" ] }, + { + "cell_type": "markdown", + "id": "5f12848d-a5ec-4cea-9a49-4f2bcefd9114", + "metadata": { + "tags": [] + }, + "source": [ + "## Create example dataset\n", + "\n", + "- cannot be compressed" + ] + }, { "cell_type": "code", "execution_count": 3, - "id": "81b2e5cb-4b2d-4a31-b7a0-961aadbc321d", - "metadata": {}, + "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", + "metadata": { + "tags": [] + }, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "> \u001b[0;32m/glade/u/home/dcherian/python/xarray/xarray/core/indexing.py\u001b[0m(485)\u001b[0;36m__array__\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m 484 \u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0;32m--> 485 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_duck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0;32m 486 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0m\n" + "/home/user/mambaforge/envs/cupy-xarray-doc/lib/python3.11/site-packages/xarray/core/dataset.py:2503: SerializationWarning: saving variable None with floating point data as an integer dtype without any _FillValue to use for NaNs\n", + " return to_zarr( # type: ignore[call-overload,misc]\n" ] }, { - "name": "stdin", - "output_type": "stream", - "text": [ - "ipdb> c\n" - ] - }, + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "store = \"/tmp/air-temperature.zarr\"\n", + "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", + "for var in airt.variables:\n", + " airt[var].encoding[\"compressors\"] = None\n", + "airt[\"scalar\"] = 12.0\n", + "airt.to_zarr(store, mode=\"w\", zarr_format=3, consolidated=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a3d0ec7-22fb-4558-8e60-9627266e3111", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "883d5507-988f-453a-b576-87bb563b540f", + "metadata": { + "tags": [] + }, + "source": [ + "## Test opening\n", + "\n", + "### Standard usage" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4a9ba63c-0b29-4eb8-9171-965b90071496", + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "> \u001b[0;32m/glade/u/home/dcherian/python/xarray/xarray/core/indexing.py\u001b[0m(485)\u001b[0;36m__array__\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m 484 \u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0;32m--> 485 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_duck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0;32m 486 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0m\n" - ] - }, - { - "name": "stdin", - "output_type": "stream", - "text": [ - "ipdb> c\n" + "\n" ] }, { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + "/tmp/ipykernel_72617/982297347.py:1: RuntimeWarning: Failed to open Zarr store with consolidated metadata, but successfully read with non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider:\n", + "1. Consolidating metadata in this existing store with zarr.consolidate_metadata().\n", + "2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or\n", + "3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.\n", + " ds_cpu = xr.open_dataset(store, engine=\"zarr\")\n" ] }, { @@ -177,13 +205,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -228,7 +257,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -236,7 +265,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -248,6 +279,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -269,7 +304,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -280,7 +315,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -352,15 +387,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -510,40 +545,91 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    -       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
    +       "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)> Size: 31MB\n",
    +       "array([[[241.2 , 242.5 , ..., 235.5 , 238.6 ],\n",
    +       "        [243.8 , 244.5 , ..., 235.3 , 239.3 ],\n",
    +       "        ...,\n",
    +       "        [295.9 , 296.2 , ..., 295.9 , 295.2 ],\n",
    +       "        [296.29, 296.79, ..., 296.79, 296.6 ]],\n",
    +       "\n",
    +       "       [[242.1 , 242.7 , ..., 233.6 , 235.8 ],\n",
    +       "        [243.6 , 244.1 , ..., 232.5 , 235.7 ],\n",
    +       "        ...,\n",
    +       "        [296.2 , 296.7 , ..., 295.5 , 295.1 ],\n",
    +       "        [296.29, 297.2 , ..., 296.4 , 296.6 ]],\n",
    +       "\n",
    +       "       ...,\n",
    +       "\n",
    +       "       [[245.79, 244.79, ..., 243.99, 244.79],\n",
    +       "        [249.89, 249.29, ..., 242.49, 244.29],\n",
    +       "        ...,\n",
    +       "        [296.29, 297.19, ..., 295.09, 294.39],\n",
    +       "        [297.79, 298.39, ..., 295.49, 295.19]],\n",
    +       "\n",
    +       "       [[245.09, 244.29, ..., 241.49, 241.79],\n",
    +       "        [249.89, 249.29, ..., 240.29, 241.69],\n",
    +       "        ...,\n",
    +       "        [296.09, 296.89, ..., 295.69, 295.19],\n",
    +       "        [297.69, 298.09, ..., 296.19, 295.69]]], shape=(2920, 25, 53))\n",
            "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    -       "Data variables:\n",
    -       "    air      (time, lat, lon) float32 ...\n",
    -       "    scalar   float64 ...\n",
    +       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
    +       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
    +       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
            "Attributes:\n",
    -       "    Conventions:  COARDS\n",
    -       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
    -       "    platform:     Model\n",
    -       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
    -       "    title:        4x daily NMC reanalysis (1948)
  • long_name :
    4xDaily Air temperature at sigma level 995
    units :
    degK
    precision :
    2
    GRIB_id :
    11
    GRIB_name :
    TMP
    var_desc :
    Air temperature
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    statistic :
    Individual Obs
    parent_stat :
    Other
    actual_range :
    [185.16000366210938, 322.1000061035156]
  • " ], "text/plain": [ - "\n", - "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + " Size: 31MB\n", + "array([[[241.2 , 242.5 , ..., 235.5 , 238.6 ],\n", + " [243.8 , 244.5 , ..., 235.3 , 239.3 ],\n", + " ...,\n", + " [295.9 , 296.2 , ..., 295.9 , 295.2 ],\n", + " [296.29, 296.79, ..., 296.79, 296.6 ]],\n", + "\n", + " [[242.1 , 242.7 , ..., 233.6 , 235.8 ],\n", + " [243.6 , 244.1 , ..., 232.5 , 235.7 ],\n", + " ...,\n", + " [296.2 , 296.7 , ..., 295.5 , 295.1 ],\n", + " [296.29, 297.2 , ..., 296.4 , 296.6 ]],\n", + "\n", + " ...,\n", + "\n", + " [[245.79, 244.79, ..., 243.99, 244.79],\n", + " [249.89, 249.29, ..., 242.49, 244.29],\n", + " ...,\n", + " [296.29, 297.19, ..., 295.09, 294.39],\n", + " [297.79, 298.39, ..., 295.49, 295.19]],\n", + "\n", + " [[245.09, 244.29, ..., 241.49, 241.79],\n", + " [249.89, 249.29, ..., 240.29, 241.69],\n", + " ...,\n", + " [296.09, 296.89, ..., 295.69, 295.19],\n", + " [297.69, 298.09, ..., 296.19, 295.69]]], shape=(2920, 25, 53))\n", "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", - "Data variables:\n", - " air (time, lat, lon) float32 ...\n", - " scalar float64 ...\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", "Attributes:\n", - " Conventions: COARDS\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" + " long_name: 4xDaily Air temperature at sigma level 995\n", + " units: degK\n", + " precision: 2\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " var_desc: Air temperature\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " statistic: Individual Obs\n", + " parent_stat: Other\n", + " actual_range: [185.16000366210938, 322.1000061035156]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%autoreload\n", - "\n", - "# Consolidated must be False\n", - "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", - "print(ds.air._variable._data)\n", - "ds" - ] - }, - { - "cell_type": "markdown", - "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## Create example dataset\n", - "\n", - "- cannot be compressed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", - "for var in airt.variables:\n", - " airt[var].encoding[\"compressor\"] = None\n", - "airt[\"scalar\"] = 12.0\n", - "airt.to_zarr(store, mode=\"w\", consolidated=True)" + "ds_cpu = xr.open_dataset(store, engine=\"zarr\")\n", + "print(ds_cpu.air.data.__class__)\n", + "ds_cpu.air" ] }, { "cell_type": "markdown", - "id": "883d5507-988f-453a-b576-87bb563b540f", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, + "id": "95161182-6b58-4dbd-9752-9961c251be1a", + "metadata": {}, "source": [ - "## Test opening\n", + "### Now with kvikio!\n", "\n", - "### Standard usage" + " - must read with `consolidated=False` (https://github.com/rapidsai/kvikio/issues/119)\n", + " - dask.from_zarr to GDSStore / open_mfdataset" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "58063142-b69b-46a5-9e4d-a83944e57857", + "execution_count": 5, + "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float64')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + ] + }, { "data": { "text/html": [ @@ -670,13 +755,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -721,7 +807,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -729,7 +815,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -741,6 +829,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -762,7 +854,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -773,7 +865,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -845,15 +937,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1003,43 +1095,34 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "[3869000 values with dtype=float32]\n",
    +       "
    <xarray.Dataset> Size: 31MB\n",
    +       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
            "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
    +       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
    +       "Data variables:\n",
    +       "    scalar   float64 8B ...\n",
    +       "    air      (time, lat, lon) float64 31MB ...\n",
            "Attributes:\n",
    -       "    GRIB_id:       11\n",
    -       "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    -       "    dataset:       NMC Reanalysis\n",
    -       "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
    -       "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • lon
    PandasIndex
    PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
    +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
    +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
    +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
    +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
    +       "       325.0, 327.5, 330.0],\n",
    +       "      dtype='float32', name='lon'))
  • Conventions :
    COARDS
    title :
    4x daily NMC reanalysis (1948)
    description :
    Data is from NMC initialized reanalysis\n", + "(4x/day). These are the 0.9950 sigma level values.
    platform :
    Model
    references :
    http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html
  • " ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", + " Size: 31MB\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", + "Data variables:\n", + " scalar float64 8B ...\n", + " air (time, lat, lon) float64 31MB ...\n", "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", - " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + " Conventions: COARDS\n", + " title: 4x daily NMC reanalysis (1948)\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly..." ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "xr.open_dataset(store, engine=\"zarr\").air" - ] - }, - { - "cell_type": "markdown", - "id": "95161182-6b58-4dbd-9752-9961c251be1a", - "metadata": {}, - "source": [ - "### Now with kvikio!\n", - "\n", - " - must read with `consolidated=False` (https://github.com/rapidsai/kvikio/issues/119)\n", - " - dask.from_zarr to GDSStore / open_mfdataset" + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "print(ds.air._variable._data)\n", + "ds" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", + "execution_count": 6, + "id": "6c939a04-1588-4693-9483-c6ad7152951a", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" - ] - }, { "data": { "text/html": [ @@ -1139,13 +1211,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -1190,7 +1263,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -1198,7 +1271,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -1210,6 +1285,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -1231,7 +1310,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -1242,7 +1321,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -1314,15 +1393,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1472,87 +1551,37 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    -       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
    -       "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    -       "Data variables:\n",
    -       "    air      (time, lat, lon) float32 ...\n",
    -       "    scalar   float64 ...\n",
    -       "Attributes:\n",
    -       "    Conventions:  COARDS\n",
    -       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
    -       "    platform:     Model\n",
    -       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
    -       "    title:        4x daily NMC reanalysis (1948)
    " + "
    <xarray.DataArray 'scalar' ()> Size: 8B\n",
    +       "[1 values with dtype=float64]
    " ], "text/plain": [ - "\n", - "Dimensions: (time: 2920, lat: 25, lon: 53)\n", - "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", - "Data variables:\n", - " air (time, lat, lon) float32 ...\n", - " scalar float64 ...\n", - "Attributes:\n", - " Conventions: COARDS\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" + " Size: 8B\n", + "[1 values with dtype=float64]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Consolidated must be False\n", - "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", - "print(ds.air._variable._data)\n", - "ds" + "ds.scalar" + ] + }, + { + "cell_type": "markdown", + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "metadata": { + "tags": [] + }, + "source": [ + "## Lazy reading" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "6c939a04-1588-4693-9483-c6ad7152951a", + "execution_count": 7, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", "metadata": {}, "outputs": [ { @@ -1588,13 +1617,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -1639,7 +1669,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -1647,7 +1677,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -1659,6 +1691,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -1680,7 +1716,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -1691,7 +1727,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -1763,15 +1799,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -1921,38 +1957,99 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'scalar' ()>\n",
    -       "[1 values with dtype=float64]
    " + "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)> Size: 31MB\n",
    +       "[3869000 values with dtype=float64]\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
    +       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
    +       "Attributes:\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    units:         degK\n",
    +       "    precision:     2\n",
    +       "    GRIB_id:       11\n",
    +       "    GRIB_name:     TMP\n",
    +       "    var_desc:      Air temperature\n",
    +       "    dataset:       NMC Reanalysis\n",
    +       "    level_desc:    Surface\n",
    +       "    statistic:     Individual Obs\n",
    +       "    parent_stat:   Other\n",
    +       "    actual_range:  [185.16000366210938, 322.1000061035156]
    " ], "text/plain": [ - "\n", - "[1 values with dtype=float64]" + " Size: 31MB\n", + "[3869000 values with dtype=float64]\n", + "Coordinates:\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", + "Attributes:\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " units: degK\n", + " precision: 2\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " var_desc: Air temperature\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " statistic: Individual Obs\n", + " parent_stat: Other\n", + " actual_range: [185.16000366210938, 322.1000061035156]" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.scalar" + "ds.air" ] }, { "cell_type": "markdown", - "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## Lazy reading" + "## Data load for repr" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "execution_count": 8, + "id": "00205e73-9b43-4254-9cba-f75435251391", "metadata": {}, "outputs": [ { @@ -1988,13 +2085,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -2039,7 +2137,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -2047,7 +2145,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -2059,6 +2159,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -2080,7 +2184,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -2091,7 +2195,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -2163,15 +2267,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -2321,100 +2425,88 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "[3869000 values with dtype=float32]\n",
    +       "
    <xarray.DataArray 'air' (lon: 53)> Size: 424B\n",
    +       "array([277.29, 277.4 , 277.79, 278.6 , 279.5 , 280.1 , 280.6 , 280.9 ,\n",
    +       "       280.79, 280.7 , 280.79, 281.  , 280.29, 277.7 , 273.5 , 269.  ,\n",
    +       "       265.5 , 264.  , 265.2 , 268.1 , 269.79, 267.9 , 263.  , 258.1 ,\n",
    +       "       254.6 , 251.8 , 249.6 , 249.89, 252.3 , 254.  , 254.3 , 255.89,\n",
    +       "       260.  , 263.  , 261.5 , 257.29, 255.5 , 258.29, 264.  , 268.7 ,\n",
    +       "       270.5 , 270.6 , 271.2 , 272.9 , 274.79, 276.4 , 278.2 , 280.5 ,\n",
    +       "       282.9 , 284.7 , 286.1 , 286.9 , 286.6 ])\n",
            "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "    lat      float32 4B 50.0\n",
    +       "    time     datetime64[ns] 8B 2013-01-01\n",
    +       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
            "Attributes:\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    units:         degK\n",
    +       "    precision:     2\n",
            "    GRIB_id:       11\n",
            "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    var_desc:      Air temperature\n",
            "    dataset:       NMC Reanalysis\n",
            "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
            "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • long_name :
    4xDaily Air temperature at sigma level 995
    units :
    degK
    precision :
    2
    GRIB_id :
    11
    GRIB_name :
    TMP
    var_desc :
    Air temperature
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    statistic :
    Individual Obs
    parent_stat :
    Other
    actual_range :
    [185.16000366210938, 322.1000061035156]
  • " ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", + " Size: 424B\n", + "array([277.29, 277.4 , 277.79, 278.6 , 279.5 , 280.1 , 280.6 , 280.9 ,\n", + " 280.79, 280.7 , 280.79, 281. , 280.29, 277.7 , 273.5 , 269. ,\n", + " 265.5 , 264. , 265.2 , 268.1 , 269.79, 267.9 , 263. , 258.1 ,\n", + " 254.6 , 251.8 , 249.6 , 249.89, 252.3 , 254. , 254.3 , 255.89,\n", + " 260. , 263. , 261.5 , 257.29, 255.5 , 258.29, 264. , 268.7 ,\n", + " 270.5 , 270.6 , 271.2 , 272.9 , 274.79, 276.4 , 278.2 , 280.5 ,\n", + " 282.9 , 284.7 , 286.1 , 286.9 , 286.6 ])\n", "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + " lat float32 4B 50.0\n", + " time datetime64[ns] 8B 2013-01-01\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", "Attributes:\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " units: degK\n", + " precision: 2\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", + " var_desc: Air temperature\n", " dataset: NMC Reanalysis\n", " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + " parent_stat: Other\n", + " actual_range: [185.16000366210938, 322.1000061035156]" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air" - ] - }, - { - "cell_type": "markdown", - "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## Data load for repr" + "ds[\"air\"].isel(time=0, lat=10).load()" ] }, { "cell_type": "code", - "execution_count": 8, - "id": "00205e73-9b43-4254-9cba-f75435251391", + "execution_count": 9, + "id": "80aa6892-8c7f-44b3-bd52-9795ec4ea6f3", "metadata": {}, "outputs": [ { @@ -2450,13 +2542,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -2501,7 +2594,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -2509,7 +2602,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -2521,6 +2616,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -2542,7 +2641,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -2553,7 +2652,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -2625,15 +2724,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -2783,97 +2882,27 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (lon: 53)>\n",
    -       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    -       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    -       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    -       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    -       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    -       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    -       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    -       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    -       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ],\n",
    -       "      dtype=float32)\n",
    -       "Coordinates:\n",
    -       "    lat      float32 50.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "    time     datetime64[ns] 2013-01-01\n",
    -       "Attributes:\n",
    -       "    GRIB_id:       11\n",
    -       "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    -       "    dataset:       NMC Reanalysis\n",
    -       "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
    -       "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
    " + "
    <xarray.DataArray 'scalar' ()> Size: 8B\n",
    +       "[1 values with dtype=float64]
    " ], "text/plain": [ - "\n", - "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", - " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", - " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", - " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", - " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", - " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", - " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", - " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", - " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ],\n", - " dtype=float32)\n", - "Coordinates:\n", - " lat float32 50.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " time datetime64[ns] 2013-01-01\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", - " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + " Size: 8B\n", + "[1 values with dtype=float64]" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds[\"air\"].isel(time=0, lat=10).load()" + "ds.scalar" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "80aa6892-8c7f-44b3-bd52-9795ec4ea6f3", + "execution_count": 10, + "id": "ba48a2c0-96e0-41d7-9e07-381e05e8dc33", "metadata": {}, "outputs": [ { @@ -2909,13 +2938,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -2960,7 +2990,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -2968,7 +2998,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -2980,6 +3012,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -3001,7 +3037,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -3012,7 +3048,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -3084,15 +3120,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -3242,27 +3278,174 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'scalar' ()>\n",
    -       "[1 values with dtype=float64]
    " + "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)> Size: 31MB\n",
    +       "[3869000 values with dtype=float64]\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
    +       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
    +       "Attributes:\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    units:         degK\n",
    +       "    precision:     2\n",
    +       "    GRIB_id:       11\n",
    +       "    GRIB_name:     TMP\n",
    +       "    var_desc:      Air temperature\n",
    +       "    dataset:       NMC Reanalysis\n",
    +       "    level_desc:    Surface\n",
    +       "    statistic:     Individual Obs\n",
    +       "    parent_stat:   Other\n",
    +       "    actual_range:  [185.16000366210938, 322.1000061035156]
    " ], "text/plain": [ - "\n", - "[1 values with dtype=float64]" + " Size: 31MB\n", + "[3869000 values with dtype=float64]\n", + "Coordinates:\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", + "Attributes:\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " units: degK\n", + " precision: 2\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " var_desc: Air temperature\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " statistic: Individual Obs\n", + " parent_stat: Other\n", + " actual_range: [185.16000366210938, 322.1000061035156]" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.scalar" + "ds.air" ] }, { - "cell_type": "code", - "execution_count": 10, - "id": "ba48a2c0-96e0-41d7-9e07-381e05e8dc33", + "cell_type": "markdown", + "id": "d0ea31d2-6c52-4346-b489-fc1e43200213", + "metadata": { + "tags": [] + }, + "source": [ + "## CuPy array on load\n", + "\n", + "Configure Zarr to use GPU memory by setting `zarr.config.enable_gpu()`.\n", + "\n", + "See https://zarr.readthedocs.io/en/stable/user-guide/gpu.html#using-gpus-with-zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1b34a68a-a6b3-4273-bf7c-28814ebfce11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float64')), key=BasicIndexer((0, 10, slice(None, None, None))))))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[\"air\"].isel(time=0, lat=10).variable._data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "db69559c-1fde-4b3b-914d-87d8437ec256", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "with zarr.config.enable_gpu():\n", + " print(type(ds[\"air\"].isel(time=0, lat=10).load().data))" + ] + }, + { + "cell_type": "markdown", + "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", + "metadata": { + "tags": [] + }, + "source": [ + "## Load to host" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "84094bc6-7884-414a-89cf-4526c3a54aea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zarr.config.enable_gpu()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "09b40d7d-ed38-4a50-af11-c2e5f0242a97", "metadata": {}, "outputs": [ { @@ -3298,13 +3481,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -3349,7 +3533,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -3357,7 +3541,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -3369,6 +3555,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -3390,7 +3580,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -3401,7 +3591,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -3473,15 +3663,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -3631,43 +3821,37 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "[3869000 values with dtype=float32]\n",
    +       "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)> Size: 31MB\n",
    +       "[3869000 values with dtype=float64]\n",
            "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
    +       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
            "Attributes:\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    units:         degK\n",
    +       "    precision:     2\n",
            "    GRIB_id:       11\n",
            "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    var_desc:      Air temperature\n",
            "    dataset:       NMC Reanalysis\n",
            "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
            "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • lon
    PandasIndex
    PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
    +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
    +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
    +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
    +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
    +       "       325.0, 327.5, 330.0],\n",
    +       "      dtype='float32', name='lon'))
  • long_name :
    4xDaily Air temperature at sigma level 995
    units :
    degK
    precision :
    2
    GRIB_id :
    11
    GRIB_name :
    TMP
    var_desc :
    Air temperature
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    statistic :
    Individual Obs
    parent_stat :
    Other
    actual_range :
    [185.16000366210938, 322.1000061035156]
  • " ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", + " Size: 31MB\n", + "[3869000 values with dtype=float64]\n", "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", "Attributes:\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " units: degK\n", + " precision: 2\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", + " var_desc: Air temperature\n", " dataset: NMC Reanalysis\n", " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + " parent_stat: Other\n", + " actual_range: [185.16000366210938, 322.1000061035156]" ] }, - "execution_count": 10, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -3711,41 +3901,48 @@ ] }, { - "cell_type": "markdown", - "id": "d0ea31d2-6c52-4346-b489-fc1e43200213", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, + "cell_type": "code", + "execution_count": 15, + "id": "615efd76-2194-4604-9ab8-61499e7d725d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ - "## CuPy array on load" + "print(type(ds[\"air\"].data))" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "1b34a68a-a6b3-4273-bf7c-28814ebfce11", + "execution_count": 16, + "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((0, 10, slice(None, None, None))))))" + "numpy.ndarray" ] }, - "execution_count": 11, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds[\"air\"].isel(time=0, lat=10).variable._data" + "type(ds.air.as_numpy().data)" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "db69559c-1fde-4b3b-914d-87d8437ec256", + "execution_count": 17, + "id": "140fe3e2-ea9b-445d-8401-5c624384c182", "metadata": {}, "outputs": [ { @@ -3754,30 +3951,37 @@ "cupy.ndarray" ] }, - "execution_count": 12, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "type(ds[\"air\"].isel(time=0, lat=10).load().data)" + "type(ds.air.mean(\"time\").load().data)" ] }, { "cell_type": "markdown", - "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", + "id": "cab539a7-d952-4b38-b515-712c52c62501", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## Load to host" + "## Doesn't work: Chunk with dask" + ] + }, + { + "cell_type": "markdown", + "id": "62c084eb-8df4-4b7f-a187-a736d68d430d", + "metadata": {}, + "source": [ + "`meta` is wrong" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "09b40d7d-ed38-4a50-af11-c2e5f0242a97", + "execution_count": 18, + "id": "68f93bfe-fe56-488a-a10b-dc4f48029367", "metadata": {}, "outputs": [ { @@ -3813,13 +4017,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -3864,7 +4069,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -3872,7 +4077,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -3884,6 +4091,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -3905,7 +4116,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -3916,7 +4127,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -3988,15 +4199,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -4146,170 +4357,353 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "[3869000 values with dtype=float32]\n",
    +       "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)> Size: 31MB\n",
    +       "dask.array<xarray-air, shape=(2920, 25, 53), dtype=float64, chunksize=(10, 25, 53), chunktype=numpy.ndarray>\n",
            "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
    +       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
            "Attributes:\n",
    -       "    GRIB_id:       11\n",
    -       "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    -       "    dataset:       NMC Reanalysis\n",
    -       "    level_desc:    Surface\n",
            "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
    -       "    statistic:     Individual Obs\n",
            "    units:         degK\n",
    -       "    var_desc:      Air temperature
    " - ], - "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", - "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", - "Attributes:\n", + " precision: 2\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", + " var_desc: Air temperature\n", " dataset: NMC Reanalysis\n", " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.air" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "numpy.ndarray" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(ds.air.as_numpy().data)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "615efd76-2194-4604-9ab8-61499e7d725d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "cupy.ndarray" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(ds.air.data)" - ] - }, - { + " parent_stat: Other\n", + " actual_range: [185.16000366210938, 322.1000061035156]
    " + ], + "text/plain": [ + " Size: 31MB\n", + "dask.array\n", + "Coordinates:\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", + "Attributes:\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " units: degK\n", + " precision: 2\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " var_desc: Air temperature\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " statistic: Individual Obs\n", + " parent_stat: Other\n", + " actual_range: [185.16000366210938, 322.1000061035156]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.chunk(time=10).air" + ] + }, + { + "cell_type": "markdown", + "id": "3f4c72f6-22e7-4e99-9f4e-2524d6ab4226", + "metadata": {}, + "source": [ + "`dask.array.core.getter` calls `np.asarray` on each chunk.\n", + "\n", + "This calls `ImplicitToExplicitIndexingAdapter.__array__` which calls `np.asarray(cupy.array)` which raises.\n", + "\n", + "Xarray uses `.get_duck_array` internally to remove these adapters. We might need to add\n", + "```python\n", + "# handle xarray internal classes that might wrap cupy\n", + "if hasattr(c, \"get_duck_array\"):\n", + " c = c.get_duck_array()\n", + "else:\n", + " c = np.asarray(c)\n", + "```" + ] + }, + { "cell_type": "code", - "execution_count": 16, - "id": "140fe3e2-ea9b-445d-8401-5c624384c182", + "execution_count": 19, + "id": "e1256d03-9701-433a-8291-80dc8dccffce", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy.ndarray" + "False" ] }, - "execution_count": 16, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "type(ds.air.mean(\"time\").load().data)" + "from dask.utils import is_arraylike\n", + "\n", + "data = ds.air.variable._data\n", + "is_arraylike(data)" ] }, { - "cell_type": "markdown", - "id": "cab539a7-d952-4b38-b515-712c52c62501", - "metadata": { - "tags": [] - }, + "cell_type": "code", + "execution_count": 20, + "id": "308affa5-9fb9-4638-989b-97aac2604c16", + "metadata": {}, + "outputs": [], "source": [ - "## Doesn't work: Chunk with dask" + "from xarray.core.indexing import ImplicitToExplicitIndexingAdapter" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "985cd2f8-406e-4e9e-8017-42efb16aa40e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[[241.2 , 242.5 , 243.5 , ..., 232.8 , 235.5 , 238.6 ],\n", + " [243.8 , 244.5 , 244.7 , ..., 232.8 , 235.3 , 239.3 ],\n", + " [250. , 249.8 , 248.89, ..., 233.2 , 236.39, 241.7 ],\n", + " ...,\n", + " [296.6 , 296.2 , 296.4 , ..., 295.4 , 295.1 , 294.7 ],\n", + " [295.9 , 296.2 , 296.79, ..., 295.9 , 295.9 , 295.2 ],\n", + " [296.29, 296.79, 297.1 , ..., 296.9 , 296.79, 296.6 ]],\n", + "\n", + " [[242.1 , 242.7 , 243.1 , ..., 232. , 233.6 , 235.8 ],\n", + " [243.6 , 244.1 , 244.2 , ..., 231. , 232.5 , 235.7 ],\n", + " [253.2 , 252.89, 252.1 , ..., 230.8 , 233.39, 238.5 ],\n", + " ...,\n", + " [296.4 , 295.9 , 296.2 , ..., 295.4 , 295.1 , 294.79],\n", + " [296.2 , 296.7 , 296.79, ..., 295.6 , 295.5 , 295.1 ],\n", + " [296.29, 297.2 , 297.4 , ..., 296.4 , 296.4 , 296.6 ]],\n", + "\n", + " [[242.3 , 242.2 , 242.3 , ..., 234.3 , 236.1 , 238.7 ],\n", + " [244.6 , 244.39, 244. , ..., 230.3 , 232. , 235.7 ],\n", + " [256.2 , 255.5 , 254.2 , ..., 231.2 , 233.2 , 238.2 ],\n", + " ...,\n", + " [295.6 , 295.4 , 295.4 , ..., 296.29, 295.29, 295. ],\n", + " [296.2 , 296.5 , 296.29, ..., 296.4 , 296. , 295.6 ],\n", + " [296.4 , 296.29, 296.4 , ..., 297. , 297. , 296.79]],\n", + "\n", + " ...,\n", + "\n", + " [[243.49, 242.99, 242.09, ..., 244.19, 244.49, 244.89],\n", + " [249.09, 248.99, 248.59, ..., 240.59, 241.29, 242.69],\n", + " [262.69, 262.19, 261.69, ..., 239.39, 241.69, 245.19],\n", + " ...,\n", + " [294.79, 295.29, 297.49, ..., 295.49, 295.39, 294.69],\n", + " [296.79, 297.89, 298.29, ..., 295.49, 295.49, 294.79],\n", + " [298.19, 299.19, 298.79, ..., 296.09, 295.79, 295.79]],\n", + "\n", + " [[245.79, 244.79, 243.49, ..., 243.29, 243.99, 244.79],\n", + " [249.89, 249.29, 248.49, ..., 241.29, 242.49, 244.29],\n", + " [262.39, 261.79, 261.29, ..., 240.49, 243.09, 246.89],\n", + " ...,\n", + " [293.69, 293.89, 295.39, ..., 295.09, 294.69, 294.29],\n", + " [296.29, 297.19, 297.59, ..., 295.29, 295.09, 294.39],\n", + " [297.79, 298.39, 298.49, ..., 295.69, 295.49, 295.19]],\n", + "\n", + " [[245.09, 244.29, 243.29, ..., 241.69, 241.49, 241.79],\n", + " [249.89, 249.29, 248.39, ..., 239.59, 240.29, 241.69],\n", + " [262.99, 262.19, 261.39, ..., 239.89, 242.59, 246.29],\n", + " ...,\n", + " [293.79, 293.69, 295.09, ..., 295.29, 295.09, 294.69],\n", + " [296.09, 296.89, 297.19, ..., 295.69, 295.69, 295.19],\n", + " [297.69, 298.09, 298.09, ..., 296.49, 296.19, 295.69]]],\n", + " shape=(2920, 25, 53))" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ImplicitToExplicitIndexingAdapter(data).get_duck_array()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "fa8ef4f7-5014-476f-b4c0-ec2f9abdb6e2", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly.", + "output_type": "error", + "traceback": [ + "\u001b[31mTypeError\u001b[39m\u001b[31m:\u001b[39m Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly.\n" + ] + } + ], + "source": [ + "ds.chunk(time=10).air.compute()" ] }, { "cell_type": "markdown", - "id": "62c084eb-8df4-4b7f-a187-a736d68d430d", + "id": "17dc1bf6-7548-4eee-a5f3-ebcc20d41567", "metadata": {}, "source": [ - "`meta` is wrong" + "### explicit meta" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "68f93bfe-fe56-488a-a10b-dc4f48029367", + "execution_count": 23, + "id": "cdd4b4e6-d69a-4898-964a-0e6096ca1942", "metadata": {}, "outputs": [ { @@ -4345,13 +4739,14 @@ " --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", "}\n", "\n", - "html[theme=dark],\n", - "body[data-theme=dark],\n", + "html[theme=\"dark\"],\n", + "html[data-theme=\"dark\"],\n", + "body[data-theme=\"dark\"],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", " --xr-font-color2: rgba(255, 255, 255, 0.54);\n", " --xr-font-color3: rgba(255, 255, 255, 0.38);\n", - " --xr-border-color: #1F1F1F;\n", + " --xr-border-color: #1f1f1f;\n", " --xr-disabled-color: #515151;\n", " --xr-background-color: #111111;\n", " --xr-background-color-row-even: #111111;\n", @@ -4396,7 +4791,7 @@ ".xr-sections {\n", " padding-left: 0 !important;\n", " display: grid;\n", - " grid-template-columns: 150px auto auto 1fr 20px 20px;\n", + " grid-template-columns: 150px auto auto 1fr 0 20px 0 20px;\n", "}\n", "\n", ".xr-section-item {\n", @@ -4404,7 +4799,9 @@ "}\n", "\n", ".xr-section-item input {\n", - " display: none;\n", + " display: inline-block;\n", + " opacity: 0;\n", + " height: 0;\n", "}\n", "\n", ".xr-section-item input + label {\n", @@ -4416,6 +4813,10 @@ " color: var(--xr-font-color2);\n", "}\n", "\n", + ".xr-section-item input:focus + label {\n", + " border: 2px solid var(--xr-font-color0);\n", + "}\n", + "\n", ".xr-section-item input:enabled + label:hover {\n", " color: var(--xr-font-color0);\n", "}\n", @@ -4437,7 +4838,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: '►';\n", + " content: \"►\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -4448,7 +4849,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: '▼';\n", + " content: \"▼\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -4520,15 +4921,15 @@ "}\n", "\n", ".xr-dim-list:before {\n", - " content: '(';\n", + " content: \"(\";\n", "}\n", "\n", ".xr-dim-list:after {\n", - " content: ')';\n", + " content: \")\";\n", "}\n", "\n", ".xr-dim-list li:not(:last-child):after {\n", - " content: ',';\n", + " content: \",\";\n", " padding-right: 5px;\n", "}\n", "\n", @@ -4678,24 +5079,24 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "dask.array<xarray-air, shape=(2920, 25, 53), dtype=float32, chunksize=(10, 25, 53), chunktype=numpy.ndarray>\n",
    +       "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)> Size: 31MB\n",
    +       "dask.array<xarray-air, shape=(2920, 25, 53), dtype=float64, chunksize=(10, 25, 53), chunktype=numpy.ndarray>\n",
            "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
    +       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
            "Attributes:\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    units:         degK\n",
    +       "    precision:     2\n",
            "    GRIB_id:       11\n",
            "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    var_desc:      Air temperature\n",
            "    dataset:       NMC Reanalysis\n",
            "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
            "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
    " + " dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • lon
    PandasIndex
    PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
    +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
    +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
    +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
    +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
    +       "       325.0, 327.5, 330.0],\n",
    +       "      dtype='float32', name='lon'))
  • long_name :
    4xDaily Air temperature at sigma level 995
    units :
    degK
    precision :
    2
    GRIB_id :
    11
    GRIB_name :
    TMP
    var_desc :
    Air temperature
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    statistic :
    Individual Obs
    parent_stat :
    Other
    actual_range :
    [185.16000366210938, 322.1000061035156]
  • " ], "text/plain": [ - "\n", - "dask.array\n", + " Size: 31MB\n", + "dask.array\n", "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", "Attributes:\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " units: degK\n", + " precision: 2\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", + " var_desc: Air temperature\n", " dataset: NMC Reanalysis\n", " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.chunk(time=10).air" - ] - }, - { - "cell_type": "markdown", - "id": "3f4c72f6-22e7-4e99-9f4e-2524d6ab4226", - "metadata": {}, - "source": [ - "`dask.array.core.getter` calls `np.asarray` on each chunk.\n", - "\n", - "This calls `ImplicitToExplicitIndexingAdapter.__array__` which calls `np.asarray(cupy.array)` which raises.\n", - "\n", - "Xarray uses `.get_duck_array` internally to remove these adapters. We might need to add\n", - "```python\n", - "# handle xarray internal classes that might wrap cupy\n", - "if hasattr(c, \"get_duck_array\"):\n", - " c = c.get_duck_array()\n", - "else:\n", - " c = np.asarray(c)\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "e1256d03-9701-433a-8291-80dc8dccffce", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" + " parent_stat: Other\n", + " actual_range: [185.16000366210938, 322.1000061035156]" ] }, - "execution_count": 16, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "from dask.utils import is_arraylike\n", - "\n", - "data = ds.air.variable._data\n", - "is_arraylike(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "308affa5-9fb9-4638-989b-97aac2604c16", - "metadata": {}, - "outputs": [], - "source": [ - "from xarray.core.indexing import ImplicitToExplicitIndexingAdapter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "985cd2f8-406e-4e9e-8017-42efb16aa40e", - "metadata": {}, - "outputs": [], - "source": [ - "ImplicitToExplicitIndexingAdapter(data).get_duck_array()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fa8ef4f7-5014-476f-b4c0-ec2f9abdb6e2", - "metadata": {}, - "outputs": [], - "source": [ - "ds.chunk(time=10).air.compute()" - ] - }, - { - "cell_type": "markdown", - "id": "17dc1bf6-7548-4eee-a5f3-ebcc20d41567", - "metadata": {}, - "source": [ - "### explicit meta" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cdd4b4e6-d69a-4898-964a-0e6096ca1942", - "metadata": {}, - "outputs": [], "source": [ "import cupy as cp\n", "\n", @@ -4967,45 +5277,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "74f80d94-ebb6-43c3-9411-79e0442d894e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly.", + "output_type": "error", + "traceback": [ + "\u001b[31mTypeError\u001b[39m\u001b[31m:\u001b[39m Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly.\n" + ] + } + ], "source": [ - "%autoreload\n", - "\n", "chunked.compute()" ] }, - { - "cell_type": "markdown", - "id": "1c07c449-bc43-490a-ac38-11e93200133d", - "metadata": {}, - "source": [ - "## GroupBy with flox\n", - "\n", - "Requires\n", - "\n", - "1. flox main branch?\n", - "2. https://github.com/ml31415/numpy-groupies/pull/63" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "c292cf77-c99e-40fa-8cad-d8914c346b29", + "id": "ac543634-80be-4e44-83e8-9e95a4955030", "metadata": {}, "outputs": [], - "source": [ - "ds.air.groupby(\"time.month\").mean()" - ] + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "miniconda3-kvikio_env", + "display_name": "cupy-xarray-doc", "language": "python", - "name": "conda-env-miniconda3-kvikio_env-py" + "name": "cupy-xarray-doc" }, "language_info": { "codemirror_mode": { @@ -5017,7 +5319,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.11.11" }, "widgets": { "application/vnd.jupyter.widget-state+json": { From 7fa7c06f5d7d504d7d798bc722eac6e002551500 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 22 Mar 2025 11:41:35 +1300 Subject: [PATCH 28/30] Bump xarray from 2025.1.3.dev22+g0184702f to 2025.03.0 Bumps [xarray](https://github.com/pydata/xarray) from 2025.1.3.dev22+g0184702f to 2025.03.0. - [Release notes](https://github.com/pydata/xarray/releases) - [Changelog](https://github.com/pydata/xarray/blob/main/HOW_TO_RELEASE.md) - [Commits](https://github.com/pydata/xarray/compare/0184702f16c3f744fc9096c7dac690626dcc6922...v2025.03.0) Use stable version of xarray, also with patch https://github.com/pydata/xarray/pull/10081 --- ci/doc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/doc.yml b/ci/doc.yml index 0e068d8..d84f6af 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -15,8 +15,8 @@ dependencies: - ipywidgets - furo>=2024.8.6 - myst-nb + - xarray>=2025.03.0 - zarr>=3.0.3 - pip: - - xarray @ git+https://github.com/pydata/xarray.git@0184702f16c3f744fc9096c7dac690626dcc6922 #https://github.com/pydata/xarray/pull/10078 # relative to this file. Needs to be editable to be accepted. - --editable .. From 1e205ec3649277e3072791cd0b02ff9d2c1e0659 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 1 Apr 2025 23:33:19 +1300 Subject: [PATCH 29/30] Try overriding default prototype to be GPU buffer Using functools.partial to override default buffer protocol to be GPU buffer instead of CPU buffer. Not quite working as expected, but hopefully gets a point across. --- cupy_xarray/kvikio.py | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 2ee9331..7e3180d 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -3,6 +3,8 @@ arrays in GPU memory. """ +import functools + from xarray.backends.common import _normalize_path # TODO: can this be public from xarray.backends.store import StoreBackendEntrypoint from xarray.backends.zarr import ZarrBackendEntrypoint, ZarrStore @@ -11,6 +13,7 @@ try: import kvikio.zarr + import zarr has_kvikio = True except ImportError: @@ -60,20 +63,31 @@ def open_dataset( ) -> Dataset: filename_or_obj = _normalize_path(filename_or_obj) if not store: - store = ZarrStore.open_group( - store=kvikio.zarr.GDSStore(root=filename_or_obj), - group=group, - mode=mode, - synchronizer=synchronizer, - consolidated=consolidated, - consolidate_on_close=False, - chunk_store=chunk_store, - storage_options=storage_options, - zarr_version=zarr_version, - use_zarr_fill_value_as_mask=None, - zarr_format=zarr_format, - cache_members=cache_members, - ) + with zarr.config.enable_gpu(): + _store = kvikio.zarr.GDSStore(root=filename_or_obj) + + # Override default buffer prototype to be GPU buffer + # buffer_prototype = zarr.core.buffer.core.default_buffer_prototype() + buffer_prototype = zarr.core.buffer.gpu.buffer_prototype + _store.get = functools.partial(_store.get, prototype=buffer_prototype) + _store.get_partial_values = functools.partial( + _store.get_partial_values, prototype=buffer_prototype + ) + + store = ZarrStore.open_group( + store=_store, + group=group, + mode=mode, + synchronizer=synchronizer, + consolidated=consolidated, + consolidate_on_close=False, + chunk_store=chunk_store, + storage_options=storage_options, + zarr_version=zarr_version, + use_zarr_fill_value_as_mask=None, + zarr_format=zarr_format, + cache_members=cache_members, + ) store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): From b45decb4f298266cb3aa7e14711cbb21a63edb37 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Apr 2025 13:43:28 +1200 Subject: [PATCH 30/30] Update to stable version of kvikio=25.04.00 Stable release at https://github.com/rapidsai/kvikio/releases/tag/v25.04.00 --- ci/doc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/doc.yml b/ci/doc.yml index d84f6af..867cb57 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - cupy-core - - rapidsai-nightly::kvikio>=25.04.00a + - rapidsai::kvikio>=25.04.00 - pip - python=3.11 - sphinx