Skip to content

API: Disallow dict as agg parameter during groupby #23393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,8 @@ Removal of prior version deprecations/changes
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
- Grouped, rolled, and resampled ``Series`` will now raise a ``ValueError`` when a dictionary is passed in during aggregation (:issue:`15931`)
- Grouped, rolled, and resampled ``DataFrame`` will now raise a ``ValueError`` when a nested dictionary is passed in during aggregation (:issue:`15931`)
- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`)
- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`)
- Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`)
Expand Down
18 changes: 7 additions & 11 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,14 +366,10 @@ def _aggregate(self, arg, *args, **kwargs):

obj = self._selected_obj

def nested_renaming_depr(level=4):
# deprecation of nested renaming
# GH 15931
warnings.warn(
("using a dict with renaming "
"is deprecated and will be removed in a future "
"version"),
FutureWarning, stacklevel=level)
def raise_on_dict_renaming():
# Originally deprecated in gh-15931, now enforcing.
rename_msg_err = "Using a dict with renaming is not allowed"
raise ValueError(rename_msg_err)

# if we have a dict of any non-scalars
# eg. {'A' : ['mean']}, normalize all to
Expand Down Expand Up @@ -403,10 +399,10 @@ def nested_renaming_depr(level=4):
msg = ('cannot perform renaming for {key} with a '
'nested dictionary').format(key=k)
raise SpecificationError(msg)
nested_renaming_depr(4 + (_level or 0))
raise_on_dict_renaming()

elif isinstance(obj, ABCSeries):
nested_renaming_depr()
raise_on_dict_renaming()
elif (isinstance(obj, ABCDataFrame) and
k not in obj.columns):
raise KeyError(
Expand All @@ -420,7 +416,7 @@ def nested_renaming_depr(level=4):
keys = list(compat.iterkeys(arg))
if (isinstance(obj, ABCDataFrame) and
len(obj.columns.intersection(keys)) != len(keys)):
nested_renaming_depr()
raise_on_dict_renaming()

from pandas.core.reshape.concat import concat

Expand Down
13 changes: 3 additions & 10 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import collections
import copy
import warnings
from functools import partial
from textwrap import dedent

Expand Down Expand Up @@ -785,15 +784,9 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
def _aggregate_multiple_funcs(self, arg, _level):
if isinstance(arg, dict):

# show the deprecation, but only if we
# have not shown a higher level one
# GH 15931
if isinstance(self._selected_obj, Series) and _level <= 1:
warnings.warn(
("using a dict on a Series for aggregation\n"
"is deprecated and will be removed in a future "
"version"),
FutureWarning, stacklevel=3)
# Deprecated in gh-15931, now enforcing.
if isinstance(self._selected_obj, Series):
raise ValueError("Using a dict with renaming is not allowed")

columns = list(arg.keys())
arg = list(arg.items())
Expand Down
9 changes: 4 additions & 5 deletions pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def int_frame_const_col():
return df


class TestDataFrameApply():
class TestDataFrameApply(object):

def test_apply(self, float_frame):
with np.errstate(all='ignore'):
Expand Down Expand Up @@ -948,12 +948,11 @@ def test_agg_multiple_mixed_no_warning(self):
expected = expected[['D', 'C', 'B', 'A']]
tm.assert_frame_equal(result, expected)

def test_agg_dict_nested_renaming_depr(self):

def test_agg_dict_nested_renaming_fail(self):
df = pd.DataFrame({'A': range(5), 'B': 5})
msg = "Using a dict with renaming is not allowed"

# nested renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with tm.assert_raises_regex(ValueError, msg):
df.agg({'A': {'foo': 'min'},
'B': {'bar': 'max'}})

Expand Down
46 changes: 0 additions & 46 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,49 +241,3 @@ def test_more_flexible_frame_multi_function(df):
expected = grouped.aggregate(OrderedDict([['C', np.mean],
['D', [np.mean, np.std]]]))
tm.assert_frame_equal(result, expected)

def foo(x):
return np.mean(x)

def bar(x):
return np.std(x, ddof=1)

# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
d = OrderedDict([['C', np.mean],
['D', OrderedDict([['foo', np.mean],
['bar', np.std]])]])
result = grouped.aggregate(d)

d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
expected = grouped.aggregate(d)

tm.assert_frame_equal(result, expected)


def test_multi_function_flexible_mix(df):
# GH #1268
grouped = df.groupby('A')

# Expected
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', {'sum': 'sum'}]])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = grouped.aggregate(d)

# Test 1
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', 'sum']])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped.aggregate(d)
tm.assert_frame_equal(result, expected)

# Test 2
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
['D', ['sum']]])
# this uses column selection & renaming
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped.aggregate(d)
tm.assert_frame_equal(result, expected)
87 changes: 5 additions & 82 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,103 +195,26 @@ def test_aggregate_api_consistency():
expected = pd.concat([d_sum, c_mean], axis=1)
tm.assert_frame_equal(result, expected, check_like=True)

result = grouped.agg({'C': ['mean', 'sum'],
'D': ['mean', 'sum']})
expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
expected.columns = MultiIndex.from_product([['C', 'D'],
['mean', 'sum']])

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = grouped[['D', 'C']].agg({'r': np.sum,
'r2': np.mean})
expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
expected.columns = MultiIndex.from_product([['r', 'r2'],
['D', 'C']])
tm.assert_frame_equal(result, expected, check_like=True)


def test_agg_dict_renaming_deprecation():
# 15931
df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
'B': range(5),
'C': range(5)})

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False) as w:
df.groupby('A').agg({'B': {'foo': ['sum', 'max']},
'C': {'bar': ['count', 'min']}})
assert "using a dict with renaming" in str(w[0].message)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
df.groupby('A')[['B', 'C']].agg({'ma': 'max'})

with tm.assert_produces_warning(FutureWarning) as w:
df.groupby('A').B.agg({'foo': 'count'})
assert "using a dict on a Series for aggregation" in str(w[0].message)


def test_agg_compat():
# GH 12334
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'two',
'two', 'two', 'one', 'two'],
'C': np.random.randn(8) + 1.0,
'D': np.arange(8)})

g = df.groupby(['A', 'B'])

expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
expected.columns = MultiIndex.from_tuples([('C', 'sum'),
('C', 'std')])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g['D'].agg({'C': ['sum', 'std']})
tm.assert_frame_equal(result, expected, check_like=True)

expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
expected.columns = ['C', 'D']

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g['D'].agg({'C': 'sum', 'D': 'std'})
tm.assert_frame_equal(result, expected, check_like=True)


def test_agg_nested_dicts():
# API change for disallowing these types of nested dicts
# API change for disallowing these types of nested dicts.
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'two',
'two', 'two', 'one', 'two'],
'C': np.random.randn(8) + 1.0,
'D': np.arange(8)})

g = df.groupby(['A', 'B'])

msg = r'cannot perform renaming for r[1-2] with a nested dictionary'
with tm.assert_raises_regex(SpecificationError, msg):
g.aggregate({'r1': {'C': ['mean', 'sum']},
'r2': {'D': ['mean', 'sum']}})

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g.agg({'C': {'ra': ['mean', 'std']},
'D': {'rb': ['mean', 'std']}})
expected = pd.concat([g['C'].mean(), g['C'].std(),
g['D'].mean(), g['D'].std()],
axis=1)
expected.columns = pd.MultiIndex.from_tuples(
[('ra', 'mean'), ('ra', 'std'),
('rb', 'mean'), ('rb', 'std')])
tm.assert_frame_equal(result, expected, check_like=True)

# same name as the original column
# GH9052
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = g['D'].agg({'result1': np.sum, 'result2': np.mean})
expected = expected.rename(columns={'result1': 'D'})

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = g['D'].agg({'D': np.sum, 'result2': np.mean})
tm.assert_frame_equal(result, expected, check_like=True)
msg = "Using a dict with renaming is not allowed"
with tm.assert_raises_regex(ValueError, msg):
g.agg({'C': {'ra': ['mean', 'std']},
'D': {'rb': ['mean', 'std']}})


def test_agg_item_by_item_raise_typeerror():
Expand Down
22 changes: 4 additions & 18 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ def test_basic(dtype):
check_index_type=False)

# complex agg
agged = grouped.aggregate([np.mean, np.std])
grouped.aggregate([np.mean, np.std])

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
agged = grouped.aggregate({'one': np.mean, 'two': np.std})
msg = "Using a dict with renaming is not allowed"
with tm.assert_raises_regex(ValueError, msg):
grouped.aggregate({'one': np.mean, 'two': np.std})

group_constants = {0: 10, 1: 20, 2: 30}
agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
Expand Down Expand Up @@ -444,11 +444,6 @@ def test_frame_set_name_single(df):
result = grouped['C'].agg([np.mean, np.std])
assert result.index.name == 'A'

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = grouped['C'].agg({'foo': np.mean, 'bar': np.std})
assert result.index.name == 'A'


def test_multi_func(df):
col1 = df['A']
Expand Down Expand Up @@ -553,15 +548,6 @@ def test_groupby_as_index_agg(df):
expected2['D'] = grouped.sum()['D']
assert_frame_equal(result2, expected2)

grouped = df.groupby('A', as_index=True)
expected3 = grouped['C'].sum()
expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result3 = grouped['C'].agg({'Q': np.sum})
assert_frame_equal(result3, expected3)

# multi-key

grouped = df.groupby(['A', 'B'], as_index=False)
Expand Down
46 changes: 15 additions & 31 deletions pandas/tests/series/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pandas.util.testing import assert_frame_equal, assert_series_equal


class TestSeriesApply():
class TestSeriesApply(object):

def test_apply(self, datetime_series):
with np.errstate(all='ignore'):
Expand Down Expand Up @@ -153,16 +153,17 @@ def f(x):
exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
tm.assert_series_equal(result, exp)

def test_apply_dict_depr(self):
def test_apply_dict_fail(self):
ts_df = pd.DataFrame(np.random.randn(10, 3),
columns=["A", "B", "C"],
index=pd.date_range("1/1/2000", periods=10))
msg = "Using a dict with renaming is not allowed"

tsdf = pd.DataFrame(np.random.randn(10, 3),
columns=['A', 'B', 'C'],
index=pd.date_range('1/1/2000', periods=10))
with tm.assert_produces_warning(FutureWarning):
tsdf.A.agg({'foo': ['sum', 'mean']})
with tm.assert_raises_regex(ValueError, msg):
ts_df.A.agg({'foo': ['sum', 'mean']})


class TestSeriesAggregate():
class TestSeriesAggregate(object):

def test_transform(self, string_series):
# transforming functions
Expand Down Expand Up @@ -245,29 +246,12 @@ def test_demo(self):
expected = Series([0], index=['foo'], name='series')
tm.assert_series_equal(result, expected)

# nested renaming
with tm.assert_produces_warning(FutureWarning):
result = s.agg({'foo': ['min', 'max']})
def test_multiple_agg_nested_rename_fail(self):
msg = "Using a dict with renaming is not allowed"
s = Series(range(6), dtype="int64", name="series")

expected = DataFrame(
{'foo': [0, 5]},
index=['min', 'max']).unstack().rename('series')
tm.assert_series_equal(result, expected)

def test_multiple_aggregators_with_dict_api(self):

s = Series(range(6), dtype='int64', name='series')
# nested renaming
with tm.assert_produces_warning(FutureWarning):
result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})

expected = DataFrame(
{'foo': [5.0, np.nan, 0.0, np.nan],
'bar': [np.nan, 2.5, np.nan, 15.0]},
columns=['foo', 'bar'],
index=['max', 'mean',
'min', 'sum']).unstack().rename('series')
tm.assert_series_equal(result.reindex_like(expected), expected)
with tm.assert_raises_regex(ValueError, msg):
s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]})

def test_agg_apply_evaluate_lambdas_the_same(self, string_series):
# test that we are evaluating row-by-row first
Expand Down Expand Up @@ -412,7 +396,7 @@ def test_agg_cython_table_raises(self, series, func, expected):
series.agg(func)


class TestSeriesMap():
class TestSeriesMap(object):

def test_map(self, datetime_series):
index, data = tm.getMixedTypeDict()
Expand Down
Loading