Skip to content

Commit 9daf9b1

Browse files
authored
Internal refactor of label-based data selection (#5322)
* xindexes also returns multi-index levels as keys * wip: move label selection into PandasIndex add Index.query() method split pd.Index vs. pd.MultiIndex logic * Revert "xindexes also returns multi-index levels as keys" This reverts commit 261fb78. Let's keep this for later. There are too many places in Xarray that assume that xindexes keys are dimension names. * fix broken tests * remove old code + move/update tests * remove duplicate function * add PandasMultiIndex class + refactor query impl * remove PandasIndex.from_variables for now Add it later in the refactoring when it will be needed elsewhere (e.g., in ``set_index``). * fix broken tests Is this what we want? * prevent loading values for xarray objs in slice * update what's new
1 parent da0489f commit 9daf9b1

10 files changed

+395
-297
lines changed

doc/whats-new.rst

+7
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ Internal Changes
7272
(:pull:`5433`)
7373
By `Maximilian Roos <https://github.com/max-sixty>`_.
7474

75+
- Explicit indexes refactor: add a ``xarray.Index.query()`` method in which
76+
one may eventually provide a custom implementation of label-based data
77+
selection (not ready yet for public use). Also refactor the internal,
78+
pandas-specific implementation into ``PandasIndex.query()`` and
79+
``PandasMultiIndex.query()`` (:pull:`5322`).
80+
By `Benoit Bovy <https://github.com/benbovy>`_.
81+
7582
.. _whats-new.0.18.2:
7683

7784
v0.18.2 (19 May 2021)

xarray/core/alignment.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
import pandas as pd
1919

2020
from . import dtypes
21-
from .indexes import Index, PandasIndex
22-
from .indexing import get_indexer_nd
21+
from .indexes import Index, PandasIndex, get_indexer_nd, wrap_pandas_index
2322
from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str, safe_cast_to_index
2423
from .variable import IndexVariable, Variable
2524

@@ -561,7 +560,7 @@ def reindex_variables(
561560
"from that to be indexed along {:s}".format(str(indexer.dims), dim)
562561
)
563562

564-
target = new_indexes[dim] = PandasIndex(safe_cast_to_index(indexers[dim]))
563+
target = new_indexes[dim] = wrap_pandas_index(safe_cast_to_index(indexers[dim]))
565564

566565
if dim in indexes:
567566
# TODO (benbovy - flexible indexes): support other indexes than pd.Index?

xarray/core/dataarray.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,13 @@
5151
)
5252
from .dataset import Dataset, split_indexes
5353
from .formatting import format_item
54-
from .indexes import Index, Indexes, PandasIndex, default_indexes, propagate_indexes
54+
from .indexes import (
55+
Index,
56+
Indexes,
57+
default_indexes,
58+
propagate_indexes,
59+
wrap_pandas_index,
60+
)
5561
from .indexing import is_fancy_indexer
5662
from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords
5763
from .options import OPTIONS, _get_keep_attrs
@@ -1005,7 +1011,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray":
10051011
# TODO: benbovy: flexible indexes: support all xarray indexes (not just pandas.Index)
10061012
# xarray Index needs a copy method.
10071013
indexes = {
1008-
k: PandasIndex(v.to_pandas_index().copy(deep=deep))
1014+
k: wrap_pandas_index(v.to_pandas_index().copy(deep=deep))
10091015
for k, v in self._indexes.items()
10101016
}
10111017
return self._replace(variable, coords, indexes=indexes)

xarray/core/dataset.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,13 @@
6363
Index,
6464
Indexes,
6565
PandasIndex,
66+
PandasMultiIndex,
6667
default_indexes,
6768
isel_variable_and_index,
6869
propagate_indexes,
6970
remove_unused_levels_categories,
7071
roll_index,
72+
wrap_pandas_index,
7173
)
7274
from .indexing import is_fancy_indexer
7375
from .merge import (
@@ -3284,10 +3286,9 @@ def _rename_indexes(self, name_dict, dims_set):
32843286
continue
32853287
if isinstance(index, pd.MultiIndex):
32863288
new_names = [name_dict.get(k, k) for k in index.names]
3287-
new_index = index.rename(names=new_names)
3289+
indexes[new_name] = PandasMultiIndex(index.rename(names=new_names))
32883290
else:
3289-
new_index = index.rename(new_name)
3290-
indexes[new_name] = PandasIndex(new_index)
3291+
indexes[new_name] = PandasIndex(index.rename(new_name))
32913292
return indexes
32923293

32933294
def _rename_all(self, name_dict, dims_dict):
@@ -3516,7 +3517,7 @@ def swap_dims(
35163517
if new_index.nlevels == 1:
35173518
# make sure index name matches dimension name
35183519
new_index = new_index.rename(k)
3519-
indexes[k] = PandasIndex(new_index)
3520+
indexes[k] = wrap_pandas_index(new_index)
35203521
else:
35213522
var = v.to_base_variable()
35223523
var.dims = dims
@@ -3789,7 +3790,7 @@ def reorder_levels(
37893790
raise ValueError(f"coordinate {dim} has no MultiIndex")
37903791
new_index = index.reorder_levels(order)
37913792
variables[dim] = IndexVariable(coord.dims, new_index)
3792-
indexes[dim] = PandasIndex(new_index)
3793+
indexes[dim] = PandasMultiIndex(new_index)
37933794

37943795
return self._replace(variables, indexes=indexes)
37953796

@@ -3817,7 +3818,7 @@ def _stack_once(self, dims, new_dim):
38173818
coord_names = set(self._coord_names) - set(dims) | {new_dim}
38183819

38193820
indexes = {k: v for k, v in self.xindexes.items() if k not in dims}
3820-
indexes[new_dim] = PandasIndex(idx)
3821+
indexes[new_dim] = wrap_pandas_index(idx)
38213822

38223823
return self._replace_with_new_dims(
38233824
variables, coord_names=coord_names, indexes=indexes

0 commit comments

Comments
 (0)