|
| 1 | +diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py |
| 2 | +index 601acac20..7c89cab6b 100644 |
| 3 | +--- a/pandas/core/groupby.py |
| 4 | ++++ b/pandas/core/groupby.py |
| 5 | +@@ -44,7 +44,7 @@ from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, |
| 6 | + DataError, SpecificationError) |
| 7 | + from pandas.core.index import (Index, MultiIndex, |
| 8 | + CategoricalIndex, _ensure_index) |
| 9 | +-from pandas.core.arrays import Categorical |
| 10 | ++from pandas.core.arrays import ExtensionArray, Categorical |
| 11 | + from pandas.core.frame import DataFrame |
| 12 | + from pandas.core.generic import NDFrame, _shared_docs |
| 13 | + from pandas.core.internals import BlockManager, make_block |
| 14 | +@@ -2968,7 +2968,7 @@ class Grouping(object): |
| 15 | + |
| 16 | + # no level passed |
| 17 | + elif not isinstance(self.grouper, |
| 18 | +- (Series, Index, Categorical, np.ndarray)): |
| 19 | ++ (Series, Index, ExtensionArray, np.ndarray)): |
| 20 | + if getattr(self.grouper, 'ndim', 1) != 1: |
| 21 | + t = self.name or str(type(self.grouper)) |
| 22 | + raise ValueError("Grouper for '%s' not 1-dimensional" % t) |
| 23 | +diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py |
| 24 | +index 27c106efd..f8078d279 100644 |
| 25 | +--- a/pandas/tests/extension/base/__init__.py |
| 26 | ++++ b/pandas/tests/extension/base/__init__.py |
| 27 | +@@ -44,6 +44,7 @@ from .casting import BaseCastingTests # noqa |
| 28 | + from .constructors import BaseConstructorsTests # noqa |
| 29 | + from .dtype import BaseDtypeTests # noqa |
| 30 | + from .getitem import BaseGetitemTests # noqa |
| 31 | ++from .groupby import BaseGroupbyTests # noqa |
| 32 | + from .interface import BaseInterfaceTests # noqa |
| 33 | + from .methods import BaseMethodsTests # noqa |
| 34 | + from .missing import BaseMissingTests # noqa |
| 35 | +diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py |
| 36 | +new file mode 100644 |
| 37 | +index 000000000..a29ef2a50 |
| 38 | +--- /dev/null |
| 39 | ++++ b/pandas/tests/extension/base/groupby.py |
| 40 | +@@ -0,0 +1,69 @@ |
| 41 | ++import pytest |
| 42 | ++ |
| 43 | ++import pandas.util.testing as tm |
| 44 | ++import pandas as pd |
| 45 | ++from .base import BaseExtensionTests |
| 46 | ++ |
| 47 | ++ |
| 48 | ++class BaseGroupbyTests(BaseExtensionTests): |
| 49 | ++ """Groupby-specific tests.""" |
| 50 | ++ |
| 51 | ++ def test_grouping_grouper(self, data_for_grouping): |
| 52 | ++ df = pd.DataFrame({ |
| 53 | ++ "A": ["B", "B", None, None, "A", "A", "B", "C"], |
| 54 | ++ "B": data_for_grouping |
| 55 | ++ }) |
| 56 | ++ gr1 = df.groupby("A").grouper.groupings[0] |
| 57 | ++ gr2 = df.groupby("B").grouper.groupings[0] |
| 58 | ++ |
| 59 | ++ tm.assert_numpy_array_equal(gr1.grouper, df.A.values) |
| 60 | ++ tm.assert_extension_array_equal(gr2.grouper, data_for_grouping) |
| 61 | ++ |
| 62 | ++ @pytest.mark.parametrize('as_index', [True, False]) |
| 63 | ++ def test_groupby_extension_agg(self, as_index, data_for_grouping): |
| 64 | ++ df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], |
| 65 | ++ "B": data_for_grouping}) |
| 66 | ++ result = df.groupby("B", as_index=as_index).A.mean() |
| 67 | ++ _, index = pd.factorize(data_for_grouping, sort=True) |
| 68 | ++ # TODO(ExtensionIndex): remove astype |
| 69 | ++ index = pd.Index(index.astype(object), name="B") |
| 70 | ++ expected = pd.Series([3, 1, 4], index=index, name="A") |
| 71 | ++ if as_index: |
| 72 | ++ self.assert_series_equal(result, expected) |
| 73 | ++ else: |
| 74 | ++ expected = expected.reset_index() |
| 75 | ++ self.assert_frame_equal(result, expected) |
| 76 | ++ |
| 77 | ++ def test_groupby_extension_no_sort(self, data_for_grouping): |
| 78 | ++ df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], |
| 79 | ++ "B": data_for_grouping}) |
| 80 | ++ result = df.groupby("B", sort=False).A.mean() |
| 81 | ++ _, index = pd.factorize(data_for_grouping, sort=False) |
| 82 | ++ # TODO(ExtensionIndex): remove astype |
| 83 | ++ index = pd.Index(index.astype(object), name="B") |
| 84 | ++ expected = pd.Series([1, 3, 4], index=index, name="A") |
| 85 | ++ self.assert_series_equal(result, expected) |
| 86 | ++ |
| 87 | ++ def test_groupby_extension_transform(self, data_for_grouping): |
| 88 | ++ valid = data_for_grouping[~data_for_grouping.isna()] |
| 89 | ++ df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], |
| 90 | ++ "B": valid}) |
| 91 | ++ |
| 92 | ++ result = df.groupby("B").A.transform(len) |
| 93 | ++ expected = pd.Series([3, 3, 2, 2, 3, 1], name="A") |
| 94 | ++ |
| 95 | ++ self.assert_series_equal(result, expected) |
| 96 | ++ |
| 97 | ++ @pytest.mark.parametrize('op', [ |
| 98 | ++ lambda x: 1, |
| 99 | ++ lambda x: [1] * len(x), |
| 100 | ++ lambda x: pd.Series([1] * len(x)), |
| 101 | ++ lambda x: x, |
| 102 | ++ ], ids=['scalar', 'list', 'series', 'object']) |
| 103 | ++ def test_groupby_extension_apply(self, data_for_grouping, op): |
| 104 | ++ df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], |
| 105 | ++ "B": data_for_grouping}) |
| 106 | ++ df.groupby("B").apply(op) |
| 107 | ++ df.groupby("B").A.apply(op) |
| 108 | ++ df.groupby("A").apply(op) |
| 109 | ++ df.groupby("A").B.apply(op) |
| 110 | +diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py |
| 111 | +index 22c1a67a0..d50917056 100644 |
| 112 | +--- a/pandas/tests/extension/decimal/test_decimal.py |
| 113 | ++++ b/pandas/tests/extension/decimal/test_decimal.py |
| 114 | +@@ -127,6 +127,10 @@ class TestCasting(BaseDecimal, base.BaseCastingTests): |
| 115 | + pass |
| 116 | + |
| 117 | + |
| 118 | ++class TestGroupby(BaseDecimal, base.BaseGroupbyTests): |
| 119 | ++ pass |
| 120 | ++ |
| 121 | ++ |
| 122 | + def test_series_constructor_coerce_data_to_extension_dtype_raises(): |
| 123 | + xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the " |
| 124 | + "extension array directly.") |
| 125 | +diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py |
| 126 | +index 51a68a370..d9ae49d87 100644 |
| 127 | +--- a/pandas/tests/extension/json/array.py |
| 128 | ++++ b/pandas/tests/extension/json/array.py |
| 129 | +@@ -113,8 +113,8 @@ class JSONArray(ExtensionArray): |
| 130 | + return cls(data) |
| 131 | + |
| 132 | + def _values_for_factorize(self): |
| 133 | +- frozen = tuple(tuple(x.items()) for x in self) |
| 134 | +- return np.array(frozen, dtype=object), () |
| 135 | ++ frozen = self._values_for_argsort() |
| 136 | ++ return frozen, () |
| 137 | + |
| 138 | + def _values_for_argsort(self): |
| 139 | + # Disable NumPy's shape inference by including an empty tuple... |
| 140 | +diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py |
| 141 | +index 63d97d5e7..5e9639c48 100644 |
| 142 | +--- a/pandas/tests/extension/json/test_json.py |
| 143 | ++++ b/pandas/tests/extension/json/test_json.py |
| 144 | +@@ -89,11 +89,12 @@ class TestMissing(base.BaseMissingTests): |
| 145 | + """We treat dictionaries as a mapping in fillna, not a scalar.""" |
| 146 | + |
| 147 | + |
| 148 | +-class TestMethods(base.BaseMethodsTests): |
| 149 | +- unhashable = pytest.mark.skip(reason="Unhashable") |
| 150 | +- unstable = pytest.mark.skipif(not PY36, # 3.6 or higher |
| 151 | +- reason="Dictionary order unstable") |
| 152 | ++unhashable = pytest.mark.skip(reason="Unhashable") |
| 153 | ++unstable = pytest.mark.skipif(not PY36, # 3.6 or higher |
| 154 | ++ reason="Dictionary order unstable") |
| 155 | ++ |
| 156 | + |
| 157 | ++class TestMethods(base.BaseMethodsTests): |
| 158 | + @unhashable |
| 159 | + def test_value_counts(self, all_data, dropna): |
| 160 | + pass |
| 161 | +@@ -118,6 +119,7 @@ class TestMethods(base.BaseMethodsTests): |
| 162 | + super(TestMethods, self).test_sort_values( |
| 163 | + data_for_sorting, ascending) |
| 164 | + |
| 165 | ++ @unstable |
| 166 | + @pytest.mark.parametrize('ascending', [True, False]) |
| 167 | + def test_sort_values_missing(self, data_missing_for_sorting, ascending): |
| 168 | + super(TestMethods, self).test_sort_values_missing( |
| 169 | +@@ -126,3 +128,34 @@ class TestMethods(base.BaseMethodsTests): |
| 170 | + |
| 171 | + class TestCasting(base.BaseCastingTests): |
| 172 | + pass |
| 173 | ++ |
| 174 | ++ |
| 175 | ++class TestGroupby(base.BaseGroupbyTests): |
| 176 | ++ |
| 177 | ++ @unhashable |
| 178 | ++ def test_groupby_extension_transform(self): |
| 179 | ++ """ |
| 180 | ++ This currently fails in Series.name.setter, since the |
| 181 | ++ name must be hashable, but the value is a dictionary. |
| 182 | ++ I think this is what we want, i.e. `.name` should be the original |
| 183 | ++ values, and not the values for factorization. |
| 184 | ++ """ |
| 185 | ++ |
| 186 | ++ @unhashable |
| 187 | ++ def test_groupby_extension_apply(self): |
| 188 | ++ """ |
| 189 | ++ This fails in Index._do_unique_check with |
| 190 | ++ |
| 191 | ++ > hash(val) |
| 192 | ++ E TypeError: unhashable type: 'UserDict' with |
| 193 | ++ |
| 194 | ++ I suspect that once we support Index[ExtensionArray], |
| 195 | ++ we'll be able to dispatch unique. |
| 196 | ++ """ |
| 197 | ++ |
| 198 | ++ @unstable |
| 199 | ++ @pytest.mark.parametrize('as_index', [True, False]) |
| 200 | ++ def test_groupby_extension_agg(self, as_index, data_for_grouping): |
| 201 | ++ super(TestGroupby, self).test_groupby_extension_agg( |
| 202 | ++ as_index, data_for_grouping |
| 203 | ++ ) |
0 commit comments