From ef922f2f23347bf0241d3312043f756b86926582 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Nov 2019 12:26:44 -0800 Subject: [PATCH 1/6] Added failing test --- pandas/tests/groupby/test_groupby.py | 29 +++++++++------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index b848e9caad9be..5f454f7aefae4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1734,34 +1734,23 @@ def test_empty_dataframe_groupby(): tm.assert_frame_equal(result, expected) -def test_tuple_warns(): +def test_tuple_as_grouping(): # https://github.com/pandas-dev/pandas/issues/18314 df = pd.DataFrame( { - ("a", "b"): [1, 1, 2, 2], - "a": [1, 1, 1, 2], - "b": [1, 2, 2, 2], + ("a", "b"): [1, 1, 1, 1], + "a": [2, 2, 2, 2], + "b": [2, 2, 2, 2], "c": [1, 1, 1, 1], } ) - with tm.assert_produces_warning(FutureWarning) as w: - df[["a", "b", "c"]].groupby(("a", "b")).c.mean() - assert "Interpreting tuple 'by' as a list" in str(w[0].message) + with pytest.raises(KeyError): + df[["a", "b", "c"]].groupby(("a", "b")) - with tm.assert_produces_warning(None): - df.groupby(("a", "b")).c.mean() - - -def test_tuple_warns_unhashable(): - # https://github.com/pandas-dev/pandas/issues/18314 - business_dates = date_range(start="4/1/2014", end="6/30/2014", freq="B") - df = DataFrame(1, index=business_dates, columns=["a", "b"]) - - with tm.assert_produces_warning(FutureWarning) as w: - df.groupby((df.index.year, df.index.month)).nth([0, 3, -1]) - - assert "Interpreting tuple 'by' as a list" in str(w[0].message) + result = df.groupby(("a", "b"))["c"].sum() + expected = pd.Series([4], name="c", index=pd.Index([1], name=("a", "b"))) + tm.assert_series_equal(result, expected) def test_tuple_correct_keyerror(): From d078ad5e9fa6b3940d3968ec0be1901ac5614471 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Nov 2019 12:29:45 -0800 Subject: [PATCH 2/6] Removed code for special casing tuples --- pandas/core/groupby/grouper.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c37617b1f1f7f..f8373b98e102b 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -525,28 +525,6 @@ def get_grouper( elif isinstance(key, ops.BaseGrouper): return key, [], obj - # In the future, a tuple key will always mean an actual key, - # not an iterable of keys. In the meantime, we attempt to provide - # a warning. We can assume that the user wanted a list of keys when - # the key is not in the index. We just have to be careful with - # unhashable elements of `key`. Any unhashable elements implies that - # they wanted a list of keys. - # https://github.com/pandas-dev/pandas/issues/18314 - if isinstance(key, tuple): - all_hashable = is_hashable(key) - if ( - all_hashable and key not in obj and set(key).issubset(obj) - ) or not all_hashable: - # column names ('a', 'b') -> ['a', 'b'] - # arrays like (a, b) -> [a, b] - msg = ( - "Interpreting tuple 'by' as a list of keys, rather than " - "a single key. Use 'by=[...]' instead of 'by=(...)'. In " - "the future, a tuple will always mean a single key." - ) - warnings.warn(msg, FutureWarning, stacklevel=5) - key = list(key) - if not isinstance(key, list): keys = [key] match_axis_length = False From 8ed6ff829c20c0a0e2a0d9b3a9a21db245299d83 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Nov 2019 12:36:15 -0800 Subject: [PATCH 3/6] docs --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7ac1830af568c..f68fc97317e4a 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -313,6 +313,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`) - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`) - Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`) +- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`) - Removed :meth:`Series.from_array` (:issue:`18258`) - Removed :meth:`DataFrame.from_items` (:issue:`18458`) - Removed :meth:`DataFrame.as_matrix`, :meth:`Series.as_matrix` (:issue:`18458`) From 347fb478fb691e2011659d5c01e828d26d2abc65 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Nov 2019 12:52:41 -0800 Subject: [PATCH 4/6] Annotations --- pandas/core/groupby/groupby.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f7282950498c5..c352e139e3e30 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -14,8 +14,10 @@ class providing the base-class of operations. import re import types from typing import ( + Callable, Dict, FrozenSet, + Hashable, Iterable, List, Mapping, @@ -343,6 +345,17 @@ def _group_selection_context(groupby): groupby._reset_group_selection() +KeysArgType = Optional[ + Union[ + Hashable, + List[Hashable], + Callable[[Hashable], Hashable], + List[Callable[[Hashable], Hashable]], + Mapping[Hashable, Hashable], + ] +] + + class _GroupBy(PandasObject, SelectionMixin): _group_selection = None _apply_whitelist = frozenset() # type: FrozenSet[str] @@ -350,7 +363,7 @@ class _GroupBy(PandasObject, SelectionMixin): def __init__( self, obj: NDFrame, - keys=None, + keys: KeysArgType = None, axis: int = 0, level=None, grouper: "Optional[ops.BaseGrouper]" = None, @@ -2506,7 +2519,7 @@ def _reindex_output( @Appender(GroupBy.__doc__) def get_groupby( obj: NDFrame, - by=None, + by: KeysArgType = None, axis: int = 0, level=None, grouper: "Optional[ops.BaseGrouper]" = None, From 1337aefa539de68a266dc27d40eacd79d3fdff05 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Nov 2019 20:58:07 -0800 Subject: [PATCH 5/6] lint fixup --- pandas/core/groupby/grouper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index f8373b98e102b..7915a65f09e81 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -4,7 +4,6 @@ """ from typing import Hashable, List, Optional, Tuple -import warnings import numpy as np @@ -14,7 +13,6 @@ ensure_categorical, is_categorical_dtype, is_datetime64_dtype, - is_hashable, is_list_like, is_scalar, is_timedelta64_dtype, From ea9011e5cba423c1400fb4286f37d8853110cb16 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 21 Nov 2019 09:11:00 -0800 Subject: [PATCH 6/6] alias changes --- pandas/core/groupby/groupby.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index bc30feacef070..232315660da8d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -345,14 +345,12 @@ def _group_selection_context(groupby): groupby._reset_group_selection() -KeysArgType = Optional[ - Union[ - Hashable, - List[Hashable], - Callable[[Hashable], Hashable], - List[Callable[[Hashable], Hashable]], - Mapping[Hashable, Hashable], - ] +_KeysArgType = Union[ + Hashable, + List[Hashable], + Callable[[Hashable], Hashable], + List[Callable[[Hashable], Hashable]], + Mapping[Hashable, Hashable], ] @@ -363,7 +361,7 @@ class _GroupBy(PandasObject, SelectionMixin): def __init__( self, obj: NDFrame, - keys: KeysArgType = None, + keys: Optional[_KeysArgType] = None, axis: int = 0, level=None, grouper: "Optional[ops.BaseGrouper]" = None, @@ -2517,7 +2515,7 @@ def _reindex_output( @Appender(GroupBy.__doc__) def get_groupby( obj: NDFrame, - by: KeysArgType = None, + by: Optional[_KeysArgType] = None, axis: int = 0, level=None, grouper: "Optional[ops.BaseGrouper]" = None,