Skip to content

Commit 76854ce

Browse files
authored
DEPR: concat ignoring empty objects (#52532)
* DEPR: concat with empty objects * xfail on 32bit * missing reason * Fix AM build * post-merge fixup * catch more specifically * un-xfail * mypy fixup * update test * Fix broken test * remove duplicate whatsnew entries * remove unused
1 parent 4019d41 commit 76854ce

File tree

14 files changed

+99
-44
lines changed

14 files changed

+99
-44
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ Deprecations
253253
- Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`)
254254
- Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
255255
- Deprecated 'quantile' keyword in :meth:`Rolling.quantile` and :meth:`Expanding.quantile`, renamed as 'q' instead (:issue:`52550`)
256+
- Deprecated :func:`concat` behavior when any of the objects being concatenated have length 0; in the past the dtypes of empty objects were ignored when determining the resulting dtype, in a future version they will not (:issue:`39122`)
256257
- Deprecated :meth:`.DataFrameGroupBy.apply` and methods on the objects returned by :meth:`.DataFrameGroupBy.resample` operating on the grouping column(s); select the columns to operate on after groupby to either explicitly include or exclude the groupings and avoid the ``FutureWarning`` (:issue:`7155`)
257258
- Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)
258259
- Deprecated :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)

pandas/core/dtypes/concat.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77
TYPE_CHECKING,
88
cast,
99
)
10+
import warnings
1011

1112
import numpy as np
1213

1314
from pandas._libs import lib
15+
from pandas.util._exceptions import find_stack_level
1416

1517
from pandas.core.dtypes.astype import astype_array
1618
from pandas.core.dtypes.cast import (
@@ -108,6 +110,17 @@ def concat_compat(
108110

109111
if len(to_concat) < len(orig):
110112
_, _, alt_dtype = _get_result_dtype(orig, non_empties)
113+
if alt_dtype != target_dtype:
114+
# GH#39122
115+
warnings.warn(
116+
"The behavior of array concatenation with empty entries is "
117+
"deprecated. In a future version, this will no longer exclude "
118+
"empty items when determining the result dtype. "
119+
"To retain the old behavior, exclude the empty entries before "
120+
"the concat operation.",
121+
FutureWarning,
122+
stacklevel=find_stack_level(),
123+
)
111124

112125
if target_dtype is not None:
113126
to_concat = [astype_array(arr, target_dtype, copy=False) for arr in to_concat]

pandas/core/internals/concat.py

+16-15
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,9 @@ def is_na(self) -> bool:
397397

398398
values = blk.values
399399
if values.size == 0:
400+
# GH#39122 this case will return False once deprecation is enforced
400401
return True
402+
401403
if isinstance(values.dtype, SparseDtype):
402404
return False
403405

@@ -416,16 +418,14 @@ def is_na(self) -> bool:
416418
return all(isna_all(row) for row in values)
417419

418420
@cache_readonly
419-
def is_na_without_isna_all(self) -> bool:
421+
def is_na_after_size_and_isna_all_deprecation(self) -> bool:
422+
"""
423+
Will self.is_na be True after values.size == 0 deprecation and isna_all
424+
deprecation are enforced?
425+
"""
420426
blk = self.block
421427
if blk.dtype.kind == "V":
422428
return True
423-
if not blk._can_hold_na:
424-
return False
425-
426-
values = blk.values
427-
if values.size == 0:
428-
return True
429429
return False
430430

431431
def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
@@ -487,17 +487,16 @@ def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike
487487

488488
if empty_dtype != empty_dtype_future:
489489
if empty_dtype == concat_values.dtype:
490-
# GH#40893
490+
# GH#39122, GH#40893
491491
warnings.warn(
492-
"The behavior of DataFrame concatenation with all-NA entries is "
493-
"deprecated. In a future version, this will no longer exclude "
494-
"all-NA columns when determining the result dtypes. "
495-
"To retain the old behavior, cast the all-NA columns to the "
496-
"desired dtype before the concat operation.",
492+
"The behavior of DataFrame concatenation with empty or all-NA "
493+
"entries is deprecated. In a future version, this will no longer "
494+
"exclude empty or all-NA columns when determining the result dtypes. "
495+
"To retain the old behavior, exclude the relevant entries before "
496+
"the concat operation.",
497497
FutureWarning,
498498
stacklevel=find_stack_level(),
499499
)
500-
501500
return concat_values
502501

503502

@@ -553,7 +552,9 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> tuple[DtypeObj, DtypeObj
553552
dtype_future = dtype
554553
if len(dtypes) != len(join_units):
555554
dtypes_future = [
556-
unit.block.dtype for unit in join_units if not unit.is_na_without_isna_all
555+
unit.block.dtype
556+
for unit in join_units
557+
if not unit.is_na_after_size_and_isna_all_deprecation
557558
]
558559
if not len(dtypes_future):
559560
dtypes_future = [

pandas/tests/dtypes/test_concat.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ def test_concat_mismatched_categoricals_with_empty():
1212
ser1 = Series(["a", "b", "c"], dtype="category")
1313
ser2 = Series([], dtype="category")
1414

15-
result = _concat.concat_compat([ser1._values, ser2._values])
16-
expected = pd.concat([ser1, ser2])._values
15+
msg = "The behavior of array concatenation with empty entries is deprecated"
16+
with tm.assert_produces_warning(FutureWarning, match=msg):
17+
result = _concat.concat_compat([ser1._values, ser2._values])
18+
with tm.assert_produces_warning(FutureWarning, match=msg):
19+
expected = pd.concat([ser1, ser2])._values
1720
tm.assert_categorical_equal(result, expected)
1821

1922

pandas/tests/groupby/test_groupby.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,13 @@ def f3(x):
387387

388388
df2 = DataFrame({"a": [3, 2, 2, 2], "b": range(4), "c": range(5, 9)})
389389

390+
depr_msg = "The behavior of array concatenation with empty entries is deprecated"
391+
390392
# correct result
391-
result1 = df.groupby("a").apply(f1)
392-
result2 = df2.groupby("a").apply(f1)
393+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
394+
result1 = df.groupby("a").apply(f1)
395+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
396+
result2 = df2.groupby("a").apply(f1)
393397
tm.assert_frame_equal(result1, result2)
394398

395399
# should fail (not the same number of levels)
@@ -403,7 +407,8 @@ def f3(x):
403407
with pytest.raises(AssertionError, match=msg):
404408
df.groupby("a").apply(f3)
405409
with pytest.raises(AssertionError, match=msg):
406-
df2.groupby("a").apply(f3)
410+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
411+
df2.groupby("a").apply(f3)
407412

408413

409414
def test_attr_wrapper(ts):

pandas/tests/indexes/test_base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,9 @@ def test_append_empty_preserve_name(self, name, expected):
613613
left = Index([], name="foo")
614614
right = Index([1, 2, 3], name=name)
615615

616-
result = left.append(right)
616+
msg = "The behavior of array concatenation with empty entries is deprecated"
617+
with tm.assert_produces_warning(FutureWarning, match=msg):
618+
result = left.append(right)
617619
assert result.name == expected
618620

619621
@pytest.mark.parametrize(

pandas/tests/reshape/concat/test_append.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,9 @@ def test_append_preserve_index_name(self):
162162
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
163163
df2 = df2.set_index(["A"])
164164

165-
result = df1._append(df2)
165+
msg = "The behavior of array concatenation with empty entries is deprecated"
166+
with tm.assert_produces_warning(FutureWarning, match=msg):
167+
result = df1._append(df2)
166168
assert result.index.name == "A"
167169

168170
indexes_can_append = [

pandas/tests/reshape/concat/test_append_common.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -695,11 +695,14 @@ def test_concat_categorical_empty(self):
695695
s1 = Series([], dtype="category")
696696
s2 = Series([1, 2], dtype="category")
697697

698-
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
699-
tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
698+
msg = "The behavior of array concatenation with empty entries is deprecated"
699+
with tm.assert_produces_warning(FutureWarning, match=msg):
700+
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
701+
tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
700702

701-
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
702-
tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
703+
with tm.assert_produces_warning(FutureWarning, match=msg):
704+
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
705+
tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
703706

704707
s1 = Series([], dtype="category")
705708
s2 = Series([], dtype="category")
@@ -721,11 +724,13 @@ def test_concat_categorical_empty(self):
721724

722725
# empty Series is ignored
723726
exp = Series([np.nan, np.nan])
724-
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
725-
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
727+
with tm.assert_produces_warning(FutureWarning, match=msg):
728+
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
729+
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
726730

727-
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
728-
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
731+
with tm.assert_produces_warning(FutureWarning, match=msg):
732+
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
733+
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
729734

730735
def test_categorical_concat_append(self):
731736
cat = Categorical(["a", "b"], categories=["a", "b"])

pandas/tests/reshape/concat/test_concat.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -756,8 +756,14 @@ def test_concat_ignore_empty_object_float(empty_dtype, df_dtype):
756756
df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype)
757757
empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype)
758758

759-
result = concat([empty, df])
760-
759+
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
760+
warn = None
761+
if df_dtype == "datetime64[ns]" or (
762+
df_dtype == "float64" and empty_dtype != "float64"
763+
):
764+
warn = FutureWarning
765+
with tm.assert_produces_warning(warn, match=msg):
766+
result = concat([empty, df])
761767
expected = df
762768
if df_dtype == "int64":
763769
# TODO what exact behaviour do we want for integer eventually?
@@ -782,7 +788,7 @@ def test_concat_ignore_all_na_object_float(empty_dtype, df_dtype):
782788
else:
783789
df_dtype = "float64"
784790

785-
msg = "The behavior of DataFrame concatenation with all-NA entries"
791+
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
786792
warn = None
787793
if empty_dtype != df_dtype and empty_dtype is not None:
788794
warn = FutureWarning
@@ -804,7 +810,7 @@ def test_concat_ignore_empty_from_reindex():
804810

805811
aligned = df2.reindex(columns=df1.columns)
806812

807-
msg = "The behavior of DataFrame concatenation with all-NA entries"
813+
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
808814
with tm.assert_produces_warning(FutureWarning, match=msg):
809815
result = concat([df1, aligned], ignore_index=True)
810816
expected = df1 = DataFrame({"a": [1, 2], "b": [pd.Timestamp("2012-01-01"), pd.NaT]})

pandas/tests/reshape/concat/test_datetimes.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,9 @@ def test_concat_float_datetime64(using_array_manager):
578578

579579
if not using_array_manager:
580580
expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")})
581-
result = concat([df_time, df_float.iloc[:0]])
581+
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
582+
with tm.assert_produces_warning(FutureWarning, match=msg):
583+
result = concat([df_time, df_float.iloc[:0]])
582584
tm.assert_frame_equal(result, expected)
583585
else:
584586
expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}).astype(

pandas/tests/reshape/concat/test_empty.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ def test_concat_empty_series(self):
5858

5959
s1 = Series([1, 2, 3], name="x")
6060
s2 = Series(name="y", dtype="float64")
61-
res = concat([s1, s2], axis=0)
61+
msg = "The behavior of array concatenation with empty entries is deprecated"
62+
with tm.assert_produces_warning(FutureWarning, match=msg):
63+
res = concat([s1, s2], axis=0)
6264
# name will be reset
6365
exp = Series([1, 2, 3])
6466
tm.assert_series_equal(res, exp)
@@ -238,9 +240,11 @@ def test_concat_inner_join_empty(self):
238240
df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64")
239241
df_expected = DataFrame({"a": []}, index=RangeIndex(0), dtype="int64")
240242

241-
for how, expected in [("inner", df_expected), ("outer", df_a)]:
242-
result = concat([df_a, df_empty], axis=1, join=how)
243-
tm.assert_frame_equal(result, expected)
243+
result = concat([df_a, df_empty], axis=1, join="inner")
244+
tm.assert_frame_equal(result, df_expected)
245+
246+
result = concat([df_a, df_empty], axis=1, join="outer")
247+
tm.assert_frame_equal(result, df_a)
244248

245249
def test_empty_dtype_coerce(self):
246250
# xref to #12411

pandas/tests/reshape/concat/test_series.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ def test_concat_empty_and_non_empty_series_regression(self):
4040
s2 = Series([], dtype=object)
4141

4242
expected = s1
43-
result = concat([s1, s2])
43+
msg = "The behavior of array concatenation with empty entries is deprecated"
44+
with tm.assert_produces_warning(FutureWarning, match=msg):
45+
result = concat([s1, s2])
4446
tm.assert_series_equal(result, expected)
4547

4648
def test_concat_series_axis1(self):

pandas/tests/reshape/merge/test_merge.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,13 @@ def test_join_append_timedeltas(self, using_array_manager):
688688
{"d": [datetime(2013, 11, 5, 5, 56)], "t": [timedelta(0, 22500)]}
689689
)
690690
df = DataFrame(columns=list("dt"))
691-
df = concat([df, d], ignore_index=True)
692-
result = concat([df, d], ignore_index=True)
691+
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
692+
warn = FutureWarning
693+
if using_array_manager:
694+
warn = None
695+
with tm.assert_produces_warning(warn, match=msg):
696+
df = concat([df, d], ignore_index=True)
697+
result = concat([df, d], ignore_index=True)
693698
expected = DataFrame(
694699
{
695700
"d": [datetime(2013, 11, 5, 5, 56), datetime(2013, 11, 5, 5, 56)],

pandas/tests/series/methods/test_combine_first.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ def test_combine_first(self):
6363
# corner case
6464
ser = Series([1.0, 2, 3], index=[0, 1, 2])
6565
empty = Series([], index=[], dtype=object)
66-
result = ser.combine_first(empty)
66+
msg = "The behavior of array concatenation with empty entries is deprecated"
67+
with tm.assert_produces_warning(FutureWarning, match=msg):
68+
result = ser.combine_first(empty)
6769
ser.index = ser.index.astype("O")
6870
tm.assert_series_equal(ser, result)
6971

@@ -110,7 +112,9 @@ def test_combine_first_timezone_series_with_empty_series(self):
110112
)
111113
s1 = Series(range(10), index=time_index)
112114
s2 = Series(index=time_index)
113-
result = s1.combine_first(s2)
115+
msg = "The behavior of array concatenation with empty entries is deprecated"
116+
with tm.assert_produces_warning(FutureWarning, match=msg):
117+
result = s1.combine_first(s2)
114118
tm.assert_series_equal(result, s1)
115119

116120
def test_combine_first_preserves_dtype(self):

0 commit comments

Comments
 (0)