Skip to content

Commit 8130850

Browse files
authored
BUG: #57775 Fix groupby apply in case func returns None for all groups (#57800)
* Ensure that the empty frame has the information of the original frame * Adjust test to expect DataFrame with columns * Construct leaner dataframe * Update doc * Add example to doc * Update whatsnew * Add issue #; phrasing * Fix doc * Fix doc * Fix docstring formatting * move from 2.2.2 to 3.0.0 * remove description * fix whitespace
1 parent f15f678 commit 8130850

File tree

4 files changed

+16
-3
lines changed

4 files changed

+16
-3
lines changed

Diff for: doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ Bug fixes
289289
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
290290
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
291291
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
292+
- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
292293
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
293294
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
294295

Diff for: pandas/core/groupby/generic.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1642,8 +1642,11 @@ def _wrap_applied_output(
16421642
first_not_none = next(com.not_none(*values), None)
16431643

16441644
if first_not_none is None:
1645-
# GH9684 - All values are None, return an empty frame.
1646-
return self.obj._constructor()
1645+
# GH9684 - All values are None, return an empty frame
1646+
# GH57775 - Ensure that columns and dtypes from original frame are kept.
1647+
result = self.obj._constructor(columns=data.columns)
1648+
result = result.astype(data.dtypes)
1649+
return result
16471650
elif isinstance(first_not_none, DataFrame):
16481651
return self._concat_objects(
16491652
values,

Diff for: pandas/core/groupby/groupby.py

+8
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,14 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
16361636
a 5
16371637
b 2
16381638
dtype: int64
1639+
1640+
Example 4: The function passed to ``apply`` returns ``None`` for one of the
1641+
group. This group is filtered from the result:
1642+
1643+
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
1644+
B C
1645+
0 1 4
1646+
1 2 6
16391647
"""
16401648
if isinstance(func, str):
16411649
if hasattr(self, func):

Diff for: pandas/tests/groupby/test_apply.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,8 @@ def test_func(x):
838838
msg = "DataFrameGroupBy.apply operated on the grouping columns"
839839
with tm.assert_produces_warning(DeprecationWarning, match=msg):
840840
result = test_df.groupby("groups").apply(test_func)
841-
expected = DataFrame()
841+
expected = DataFrame(columns=test_df.columns)
842+
expected = expected.astype(test_df.dtypes)
842843
tm.assert_frame_equal(result, expected)
843844

844845

0 commit comments

Comments
 (0)