diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 1900c208532..2281d939160 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -9,30 +9,12 @@ .. autosummary:: :toctree: generated/ - Coordinates.from_pandas_multiindex - Coordinates.get - Coordinates.items - Coordinates.keys - Coordinates.values - Coordinates.dims - Coordinates.dtypes - Coordinates.variables - Coordinates.xindexes - Coordinates.indexes - Coordinates.to_dataset - Coordinates.to_index - Coordinates.update - Coordinates.assign - Coordinates.merge - Coordinates.copy - Coordinates.equals - Coordinates.identical - core.coordinates.DatasetCoordinates.get core.coordinates.DatasetCoordinates.items core.coordinates.DatasetCoordinates.keys core.coordinates.DatasetCoordinates.values core.coordinates.DatasetCoordinates.dims + core.coordinates.DatasetCoordinates.sizes core.coordinates.DatasetCoordinates.dtypes core.coordinates.DatasetCoordinates.variables core.coordinates.DatasetCoordinates.xindexes @@ -74,6 +56,7 @@ core.coordinates.DataArrayCoordinates.keys core.coordinates.DataArrayCoordinates.values core.coordinates.DataArrayCoordinates.dims + core.coordinates.DataArrayCoordinates.sizes core.coordinates.DataArrayCoordinates.dtypes core.coordinates.DataArrayCoordinates.variables core.coordinates.DataArrayCoordinates.xindexes @@ -104,6 +87,25 @@ core.weighted.DataArrayWeighted.obj core.weighted.DataArrayWeighted.weights + core.coordinates.DataTreeCoordinates.get + core.coordinates.DataTreeCoordinates.items + core.coordinates.DataTreeCoordinates.keys + core.coordinates.DataTreeCoordinates.values + core.coordinates.DataTreeCoordinates.dims + core.coordinates.DataTreeCoordinates.sizes + core.coordinates.DataTreeCoordinates.dtypes + core.coordinates.DataTreeCoordinates.variables + core.coordinates.DataTreeCoordinates.xindexes + core.coordinates.DataTreeCoordinates.indexes + core.coordinates.DataTreeCoordinates.to_dataset + core.coordinates.DataTreeCoordinates.to_index + core.coordinates.DataTreeCoordinates.update + core.coordinates.DataTreeCoordinates.assign + core.coordinates.DataTreeCoordinates.merge + core.coordinates.DataTreeCoordinates.copy + core.coordinates.DataTreeCoordinates.equals + core.coordinates.DataTreeCoordinates.identical + core.accessor_dt.DatetimeAccessor.ceil core.accessor_dt.DatetimeAccessor.floor core.accessor_dt.DatetimeAccessor.round diff --git a/doc/api.rst b/doc/api.rst index 84b272e847d..3aa07830655 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -67,6 +67,7 @@ Attributes Dataset.attrs Dataset.encoding Dataset.indexes + Dataset.xindexes Dataset.chunks Dataset.chunksizes Dataset.nbytes @@ -275,6 +276,7 @@ Attributes DataArray.attrs DataArray.encoding DataArray.indexes + DataArray.xindexes DataArray.chunksizes ndarray attributes @@ -897,6 +899,84 @@ Methods copied from :py:class:`numpy.ndarray` objects, here applying to the data .. DataTree.sortby .. DataTree.broadcast_like +Coordinates +=========== + +Creating coordinates +-------------------- + +.. autosummary:: + :toctree: generated/ + + Coordinates + Coordinates.from_xindex + Coordinates.from_pandas_multiindex + +Attributes +---------- + +.. autosummary:: + :toctree: generated/ + + Coordinates.dims + Coordinates.sizes + Coordinates.dtypes + Coordinates.variables + Coordinates.indexes + Coordinates.xindexes + +Dictionary Interface +-------------------- + +Coordinates implement the mapping interface with keys given by variable names +and values given by ``DataArray`` objects. + +.. autosummary:: + :toctree: generated/ + + Coordinates.__getitem__ + Coordinates.__setitem__ + Coordinates.__delitem__ + Coordinates.update + Coordinates.get + Coordinates.items + Coordinates.keys + Coordinates.values + +Coordinates contents +-------------------- + +.. autosummary:: + :toctree: generated/ + + Coordinates.to_dataset + Coordinates.to_index + Coordinates.assign + Coordinates.merge + Coordinates.copy + +Comparisons +----------- + +.. autosummary:: + :toctree: generated/ + + Coordinates.equals + Coordinates.identical + +Proxies +------- + +Coordinates that are accessed from the ``coords`` property of Dataset, DataArray +and DataTree objects, respectively. + +.. autosummary:: + :toctree: generated/ + + core.coordinates.DatasetCoordinates + core.coordinates.DataArrayCoordinates + core.coordinates.DataTreeCoordinates + Universal functions =================== @@ -1107,27 +1187,6 @@ Coder objects coders.CFDatetimeCoder -Coordinates objects -=================== - -Dataset -------- - -.. autosummary:: - :toctree: generated/ - - core.coordinates.DatasetCoordinates - core.coordinates.DatasetCoordinates.dtypes - -DataArray ---------- - -.. autosummary:: - :toctree: generated/ - - core.coordinates.DataArrayCoordinates - core.coordinates.DataArrayCoordinates.dtypes - Plotting ======== diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6a3666d58ac..43edc5ee33e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,9 @@ v2025.02.0 (unreleased) New Features ~~~~~~~~~~~~ +- Added :py:meth:`Coordinates.from_xindex` as convenience for creating a new :py:class:`Coordinates` object + directly from an existing Xarray index object if the latter supports it (:pull:`10000`) + By `Benoit Bovy <https://github.com/benbovy>`_. - Allow kwargs in :py:meth:`DataTree.map_over_datasets` and :py:func:`map_over_datasets` (:issue:`10009`, :pull:`10012`). By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_. - support python 3.13 (no free-threading) (:issue:`9664`, :pull:`9681`) @@ -52,7 +55,8 @@ Bug fixes Documentation ~~~~~~~~~~~~~ - +- Better expose the :py:class:`Coordinates` class in API reference (:pull:`10000`) + By `Benoit Bovy <https://github.com/benbovy>`_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -131,6 +135,11 @@ New Features :py:class:`pandas.DatetimeIndex` (:pull:`9965`). By `Spencer Clark <https://github.com/spencerkclark>`_ and `Kai Mühlbauer <https://github.com/kmuehlbauer>`_. +- :py:meth:`DatasetGroupBy.first` and :py:meth:`DatasetGroupBy.last` can now use ``flox`` if available. (:issue:`9647`) + By `Deepak Cherian <https://github.com/dcherian>`_. + +Breaking changes +~~~~~~~~~~~~~~~~ - Adds shards to the list of valid_encodings in the zarr backend, so that sharded Zarr V3s can be written (:issue:`9947`, :pull:`9948`). By `Jacob Prince_Bieker <https://github.com/jacobbieker>`_ diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 4d59a7e94c1..a9ceeb08b96 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -200,10 +200,17 @@ class Coordinates(AbstractCoordinates): - returned via the :py:attr:`Dataset.coords`, :py:attr:`DataArray.coords`, and :py:attr:`DataTree.coords` properties, - - built from Pandas or other index objects - (e.g., :py:meth:`Coordinates.from_pandas_multiindex`), - - built directly from coordinate data and Xarray ``Index`` objects (beware that - no consistency check is done on those inputs), + - built from Xarray or Pandas index objects + (e.g., :py:meth:`Coordinates.from_xindex` or + :py:meth:`Coordinates.from_pandas_multiindex`), + - built manually from input coordinate data and Xarray ``Index`` objects via + :py:meth:`Coordinates.__init__` (beware that no consistency check is done + on those inputs). + + To create new coordinates from an existing Xarray ``Index`` object, use + :py:meth:`Coordinates.from_xindex` instead of + :py:meth:`Coordinates.__init__`. The latter is useful, e.g., for creating + coordinates with no default index. Parameters ---------- @@ -352,6 +359,35 @@ def _construct_direct( ) return obj + @classmethod + def from_xindex(cls, index: Index) -> Self: + """Create Xarray coordinates from an existing Xarray index. + + Parameters + ---------- + index : Index + Xarray index object. The index must support generating new + coordinate variables from itself. + + Returns + ------- + coords : Coordinates + A collection of Xarray indexed coordinates created from the index. + + """ + variables = index.create_variables() + + if not variables: + raise ValueError( + "`Coordinates.from_xindex()` only supports index objects that can generate " + "new coordinate variables from scratch. The given index (shown below) did not " + f"create any coordinate.\n{index!r}" + ) + + indexes = {name: index for name in variables} + + return cls(coords=variables, indexes=indexes) + @classmethod def from_pandas_multiindex(cls, midx: pd.MultiIndex, dim: Hashable) -> Self: """Wrap a pandas multi-index as Xarray coordinates (dimension + levels). diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 91cb9754f9a..fe54a79c32a 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections.abc import Mapping + import numpy as np import pandas as pd import pytest @@ -8,7 +10,7 @@ from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -from xarray.core.indexes import PandasIndex, PandasMultiIndex +from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex from xarray.core.variable import IndexVariable, Variable from xarray.tests import assert_identical, source_ndarray @@ -73,6 +75,27 @@ def test_init_dim_sizes_conflict(self) -> None: with pytest.raises(ValueError): Coordinates(coords={"foo": ("x", [1, 2]), "bar": ("x", [1, 2, 3, 4])}) + def test_from_xindex(self) -> None: + idx = PandasIndex([1, 2, 3], "x") + coords = Coordinates.from_xindex(idx) + + assert isinstance(coords.xindexes["x"], PandasIndex) + assert coords.xindexes["x"].equals(idx) + + expected = PandasIndex(idx, "x").create_variables() + assert list(coords.variables) == list(expected) + assert_identical(expected["x"], coords.variables["x"]) + + def test_from_xindex_error(self) -> None: + class CustomIndexNoCoordsGenerated(Index): + def create_variables(self, variables: Mapping | None = None): + return {} + + idx = CustomIndexNoCoordsGenerated() + + with pytest.raises(ValueError, match=".*index.*did not create any coordinate"): + Coordinates.from_xindex(idx) + def test_from_pandas_multiindex(self) -> None: midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) coords = Coordinates.from_pandas_multiindex(midx, "x")