diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 0b7c2bac1be6a..7b094307cc728 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -59,6 +59,16 @@ Increased minimum version for Python pandas 2.3.0 supports Python 3.10 and higher. +.. _whatsnew_230.api_changes: + +API changes +~~~~~~~~~~~ + +- When enabling the ``future.infer_string`` option: Index set operations (like + union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or + empty ``Index`` with object dtype when determining the dtype of the resulting + Index (:issue:`60797`) + .. --------------------------------------------------------------------------- .. _whatsnew_230.deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ad39907e7400e..58f3b37250eb6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6387,6 +6387,24 @@ def _find_common_type_compat(self, target) -> DtypeObj: """ target_dtype, _ = infer_dtype_from(target) + if using_string_dtype(): + # special case: if left or right is a zero-length RangeIndex or + # Index[object], those can be created by the default empty constructors + # -> for that case ignore this dtype and always return the other + # (https://github.com/pandas-dev/pandas/pull/60797) + from pandas.core.indexes.range import RangeIndex + + if len(self) == 0 and ( + isinstance(self, RangeIndex) or self.dtype == np.object_ + ): + return target_dtype + if ( + isinstance(target, Index) + and len(target) == 0 + and (isinstance(target, RangeIndex) or target_dtype == np.object_) + ): + return self.dtype + # special case: if one dtype is uint64 and the other a signed int, return object # See https://github.com/pandas-dev/pandas/issues/26778 for discussion # Now it's: @@ -7005,6 +7023,14 @@ def insert(self, loc: int, item) -> Index: arr = self._values + if using_string_dtype() and len(self) == 0 and self.dtype == np.object_: + # special case: if we are an empty object-dtype Index, also + # take into account the inserted item for the resulting dtype + # (https://github.com/pandas-dev/pandas/pull/60797) + dtype = self._find_common_type_compat(item) + if dtype != self.dtype: + return self.astype(dtype).insert(loc, item) + try: if isinstance(arr, ExtensionArray): res_values = arr.insert(loc, item) diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 1509c47ba65c7..845174bbf600e 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( DataFrame, Index, @@ -44,7 +42,6 @@ def test_constructor_single_row(self): ) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken") def test_constructor_list_of_series(self): data = [ OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index f7f7b2c7c872a..d1ee2fb1bd5ac 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -103,12 +103,7 @@ def test_26395(indexer_al): df["D"] = 0 indexer_al(df)["C", "D"] = 2 - expected = DataFrame( - {"D": [0, 0, 2]}, - index=["A", "B", "C"], - columns=pd.Index(["D"], dtype=object), - dtype=np.int64, - ) + expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64) tm.assert_frame_equal(df, expected) with tm.assert_produces_warning( diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index a8249ed7f9828..93f4c2c6e3273 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1206,7 +1206,7 @@ def test_loc_setitem_datetimelike_with_inference(self): result = df.dtypes expected = Series( [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2, - index=Index(list("ABCDEFGH"), dtype=object), + index=list("ABCDEFGH"), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 4cf297b4c037d..7e702bdc993bd 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -67,8 +67,7 @@ def test_insert_with_columns_dups(self): df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True) df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True) exp = DataFrame( - [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], - columns=Index(["A", "A", "A"], dtype=object), + [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"] ) tm.assert_frame_equal(df, exp) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 190218a82d231..b3fe538d938f4 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -146,18 +146,32 @@ def test_setitem_different_dtype(self): ) tm.assert_series_equal(result, expected) - def test_setitem_empty_columns(self): - # GH 13522 + def test_setitem_overwrite_index(self): + # GH 13522 - assign the index as a column and then overwrite the values + # -> should not affect the index df = DataFrame(index=["A", "B", "C"]) df["X"] = df.index df["X"] = ["x", "y", "z"] exp = DataFrame( - data={"X": ["x", "y", "z"]}, - index=["A", "B", "C"], - columns=Index(["X"], dtype=object), + data={"X": ["x", "y", "z"]}, index=["A", "B", "C"], columns=["X"] ) tm.assert_frame_equal(df, exp) + def test_setitem_empty_columns(self): + # Starting from an empty DataFrame and setting a column should result + # in a default string dtype for the columns' Index + # https://github.com/pandas-dev/pandas/issues/60338 + + df = DataFrame() + df["foo"] = [1, 2, 3] + expected = DataFrame({"foo": [1, 2, 3]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=Index([])) + df["foo"] = [1, 2, 3] + expected = DataFrame({"foo": [1, 2, 3]}) + tm.assert_frame_equal(df, expected) + def test_setitem_dt64_index_empty_columns(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") df = DataFrame(index=np.arange(len(rng))) @@ -171,9 +185,7 @@ def test_setitem_timestamp_empty_columns(self): df["now"] = Timestamp("20130101", tz="UTC").as_unit("ns") expected = DataFrame( - [[Timestamp("20130101", tz="UTC")]] * 3, - index=range(3), - columns=Index(["now"], dtype=object), + [[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"] ) tm.assert_frame_equal(df, expected) @@ -212,7 +224,7 @@ def test_setitem_period_preserves_dtype(self): result = DataFrame([]) result["a"] = data - expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object)) + expected = DataFrame({"a": data}, columns=["a"]) tm.assert_frame_equal(result, expected) @@ -939,7 +951,7 @@ def test_setitem_scalars_no_index(self): # GH#16823 / GH#17894 df = DataFrame() df["foo"] = 1 - expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64) + expected = DataFrame(columns=["foo"]).astype(np.int64) tm.assert_frame_equal(df, expected) def test_setitem_newcol_tuple_key(self, float_frame): diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 0d4a6a065111f..7899b4aeac3fd 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -182,12 +182,9 @@ def test_dropna_multiple_axes(self): with pytest.raises(TypeError, match="supplying multiple axes"): inp.dropna(how="all", axis=(0, 1), inplace=True) - def test_dropna_tz_aware_datetime(self, using_infer_string): + def test_dropna_tz_aware_datetime(self): # GH13407 - df = DataFrame() - if using_infer_string: - df.columns = df.columns.astype("str") dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc()) dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc()) df["Time"] = [dt1] diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 9e51ac0bc2612..e762c8ebdcd60 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.common import ( is_float_dtype, is_integer_dtype, @@ -646,7 +644,6 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes): tm.assert_frame_equal(res, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) - GH#60338") @pytest.mark.parametrize( "array, dtype", [ @@ -783,3 +780,34 @@ def test_reset_index_false_index_name(): result_frame.reset_index() expected_frame = DataFrame(range(5, 10), RangeIndex(range(5), name=False)) tm.assert_frame_equal(result_frame, expected_frame) + + +@pytest.mark.parametrize("columns", [None, Index([])]) +def test_reset_index_with_empty_frame(columns): + # Currently empty DataFrame has RangeIndex or object dtype Index, but when + # resetting the index we still want to end up with the default string dtype + # https://github.com/pandas-dev/pandas/issues/60338 + + index = Index([], name="foo") + df = DataFrame(index=index, columns=columns) + result = df.reset_index() + expected = DataFrame(columns=["foo"]) + tm.assert_frame_equal(result, expected) + + index = Index([1, 2, 3], name="foo") + df = DataFrame(index=index, columns=columns) + result = df.reset_index() + expected = DataFrame({"foo": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_tuples([], names=["foo", "bar"]) + df = DataFrame(index=index, columns=columns) + result = df.reset_index() + expected = DataFrame(columns=["foo", "bar"]) + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_tuples([(1, 2), (2, 3)], names=["foo", "bar"]) + df = DataFrame(index=index, columns=columns) + result = df.reset_index() + expected = DataFrame({"foo": [1, 2], "bar": [2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f16068e0b6538..efc40536d56be 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -21,8 +21,6 @@ import pytest import pytz -from pandas._config import using_string_dtype - from pandas._libs import lib from pandas.compat.numpy import np_version_gt2 from pandas.errors import IntCastingNaNError @@ -2002,7 +2000,6 @@ def test_constructor_with_datetimes4(self): df = DataFrame({"value": dr}) assert str(df.iat[0, 0].tz) == "US/Eastern" - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_constructor_with_datetimes5(self): # GH 7822 # preserver an index with a tz on dict construction diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 27848e4d18596..ffabf238a4884 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -757,7 +757,6 @@ def test_check_tz_aware_index_query(self, tz_aware_fixture): tm.assert_frame_equal(result, expected) expected = DataFrame(df_index) - expected.columns = expected.columns.astype(object) result = df.reset_index().query('"2018-01-03 00:00:00+00" < time') tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 07ddbc36b5ab0..7ebecdafdc8ae 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1623,7 +1623,7 @@ def test_groupby_2d_malformed(): d["label"] = ["l1", "l2"] tmp = d.groupby(["group"]).mean(numeric_only=True) res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) - tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"], dtype=object)) + tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py index b1a6c30b52f68..548f32fd53323 100644 --- a/pandas/tests/indexes/base_class/test_reshape.py +++ b/pandas/tests/indexes/base_class/test_reshape.py @@ -34,7 +34,7 @@ def test_insert(self): # test empty null_index = Index([]) - tm.assert_index_equal(Index(["a"], dtype=object), null_index.insert(0, "a")) + tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) def test_insert_missing(self, request, nulls_fixture, using_infer_string): if using_infer_string and nulls_fixture is pd.NA: diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index a897e5aca058a..3ef3f3ad4d3a2 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -240,7 +240,6 @@ def test_tuple_union_bug(self, method, expected, sort): def test_union_name_preservation( self, first_list, second_list, first_name, second_name, expected_name, sort ): - expected_dtype = object if not first_list or not second_list else "str" first = Index(first_list, name=first_name) second = Index(second_list, name=second_name) union = first.union(second, sort=sort) @@ -251,7 +250,7 @@ def test_union_name_preservation( expected = Index(sorted(vals), name=expected_name) tm.assert_index_equal(union, expected) else: - expected = Index(vals, name=expected_name, dtype=expected_dtype) + expected = Index(vals, name=expected_name) tm.assert_index_equal(union.sort_values(), expected.sort_values()) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index d0ac32939296c..abf6809d67f9c 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -70,13 +70,17 @@ def test_join_utc_convert(self, join_type): assert isinstance(result, DatetimeIndex) assert result.tz is timezone.utc - def test_datetimeindex_union_join_empty(self, sort): + def test_datetimeindex_union_join_empty(self, sort, using_infer_string): dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") empty = Index([]) result = dti.union(empty, sort=sort) - expected = dti.astype("O") - tm.assert_index_equal(result, expected) + if using_infer_string: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) + else: + expected = dti.astype("O") + tm.assert_index_equal(result, expected) result = dti.join(empty) assert isinstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 2f6bdb1fd8969..ae9b4e108448d 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -442,10 +442,12 @@ def test_insert_out_of_bounds(self, index, using_infer_string): else: msg = "slice indices must be integers or None or have an __index__ method" - if using_infer_string and ( - index.dtype == "string" or index.dtype == "category" # noqa: PLR1714 - ): - msg = "loc must be an integer between" + if using_infer_string: + if index.dtype == "string" or index.dtype == "category": # noqa: PLR1714 + msg = "loc must be an integer between" + elif index.dtype == "object" and len(index) == 0: + msg = "loc must be an integer between" + err = TypeError with pytest.raises(err, match=msg): index.insert(0.5, "foo") diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index f6a865ccbb3a0..b52d11d967b4a 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -524,7 +524,7 @@ def test_intersection_difference_match_empty(self, index, sort): @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] ) -def test_setop_with_categorical(index_flat, sort, method): +def test_setop_with_categorical(index_flat, sort, method, using_infer_string): # MultiIndex tested separately in tests.indexes.multi.test_setops index = index_flat @@ -533,10 +533,22 @@ def test_setop_with_categorical(index_flat, sort, method): result = getattr(index, method)(other, sort=sort) expected = getattr(index, method)(index, sort=sort) + if ( + using_infer_string + and index.empty + and method in ("union", "symmetric_difference") + ): + expected = expected.astype("category") tm.assert_index_equal(result, expected, exact=exact) result = getattr(index, method)(other[:5], sort=sort) expected = getattr(index, method)(index[:5], sort=sort) + if ( + using_infer_string + and index.empty + and method in ("union", "symmetric_difference") + ): + expected = expected.astype("category") tm.assert_index_equal(result, expected, exact=exact) diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index d78694018749c..7504c984794e8 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -13,7 +13,6 @@ CategoricalIndex, DataFrame, DatetimeIndex, - Index, MultiIndex, Series, Timestamp, @@ -71,11 +70,7 @@ def test_at_setitem_item_cache_cleared(self): df.at[0, "x"] = 4 df.at[0, "cost"] = 789 - expected = DataFrame( - {"x": [4], "cost": 789}, - index=[0], - columns=Index(["x", "cost"], dtype=object), - ) + expected = DataFrame({"x": [4], "cost": 789}, index=[0]) tm.assert_frame_equal(df, expected) # And in particular, check that the _item_cache has updated correctly. diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index dc4f159cfd3c3..bb22a6d403086 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -808,9 +808,9 @@ def test_loc_setitem_empty_frame(self): # is inplace, so that dtype is retained sera = Series(val1, index=keys1, dtype=np.float64) serb = Series(val2, index=keys2) - expected = DataFrame( - {"A": sera, "B": serb}, columns=Index(["A", "B"], dtype=object) - ).reindex(index=index) + expected = DataFrame({"A": sera, "B": serb}, columns=Index(["A", "B"])).reindex( + index=index + ) tm.assert_frame_equal(df, expected) def test_loc_setitem_frame(self): @@ -1008,7 +1008,7 @@ def test_setitem_new_key_tz(self, indexer_sl): to_datetime(42).tz_localize("UTC"), to_datetime(666).tz_localize("UTC"), ] - expected = Series(vals, index=Index(["foo", "bar"], dtype=object)) + expected = Series(vals, index=Index(["foo", "bar"])) ser = Series(dtype=object) indexer_sl(ser)["foo"] = vals[0] @@ -2050,15 +2050,11 @@ def test_loc_setitem_empty_series_str_idx(self): # partially set with an empty object series ser = Series(dtype=object) ser.loc["foo"] = 1 - tm.assert_series_equal(ser, Series([1], index=Index(["foo"], dtype=object))) + tm.assert_series_equal(ser, Series([1], index=Index(["foo"]))) ser.loc["bar"] = 3 - tm.assert_series_equal( - ser, Series([1, 3], index=Index(["foo", "bar"], dtype=object)) - ) + tm.assert_series_equal(ser, Series([1, 3], index=Index(["foo", "bar"]))) ser.loc[3] = 4 - tm.assert_series_equal( - ser, Series([1, 3, 4], index=Index(["foo", "bar", 3], dtype=object)) - ) + tm.assert_series_equal(ser, Series([1, 3, 4], index=Index(["foo", "bar", 3]))) def test_loc_setitem_incremental_with_dst(self): # GH#20724 @@ -2080,18 +2076,26 @@ def test_loc_setitem_incremental_with_dst(self): ], ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"], ) - def test_loc_setitem_datetime_keys_cast(self, conv): - # GH#9516 + def test_loc_setitem_datetime_keys_cast(self, conv, using_infer_string): + # GH#9516, GH#51363 changed in 3.0 to not cast on Index.insert dt1 = Timestamp("20130101 09:00:00") dt2 = Timestamp("20130101 10:00:00") df = DataFrame() df.loc[conv(dt1), "one"] = 100 df.loc[conv(dt2), "one"] = 200 + # the dtype constructed by Index([..]) does not yet follow the unit + # of the input on 2.3.x -> so checking this is datetime64, but then + # specifying the exact dtype in the expected result + if using_infer_string: + assert df.index.dtype.kind == "M" + exp_dtype = df.index.dtype + else: + exp_dtype = "datetime64[ns]" expected = DataFrame( {"one": [100.0, 200.0]}, - index=[dt1, dt2], - columns=Index(["one"], dtype=object), + index=Index([dt1, dt2], dtype=exp_dtype), + columns=Index(["one"]), ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 5fcb71d0186a6..e3246fd3c2a59 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -30,7 +30,7 @@ def test_empty_frame_setitem_index_name_retained(self): expected = DataFrame( {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index"), - columns=Index(["series"], dtype=object), + columns=Index(["series"]), ) tm.assert_frame_equal(df, expected) @@ -43,7 +43,7 @@ def test_empty_frame_setitem_index_name_inherited(self): expected = DataFrame( {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index"), - columns=Index(["series"], dtype=object), + columns=Index(["series"]), ) tm.assert_frame_equal(df, expected) @@ -96,9 +96,7 @@ def test_partial_set_empty_frame2(self): # these work as they don't really change # anything but the index # GH#5632 - expected = DataFrame( - columns=Index(["foo"], dtype=object), index=Index([], dtype="object") - ) + expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="object")) df = DataFrame(index=Index([], dtype="object")) df["foo"] = Series([], dtype="object") @@ -116,9 +114,7 @@ def test_partial_set_empty_frame2(self): tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame3(self): - expected = DataFrame( - columns=Index(["foo"], dtype=object), index=Index([], dtype="int64") - ) + expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64")) expected["foo"] = expected["foo"].astype("float64") df = DataFrame(index=Index([], dtype="int64")) @@ -135,9 +131,7 @@ def test_partial_set_empty_frame4(self): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = range(len(df)) - expected = DataFrame( - columns=Index(["foo"], dtype=object), index=Index([], dtype="int64") - ) + expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64")) # range is int-dtype-like, so we get int64 dtype expected["foo"] = expected["foo"].astype("int64") tm.assert_frame_equal(df, expected) @@ -210,7 +204,7 @@ def test_partial_set_empty_frame_empty_copy_assignment(self): df = DataFrame(index=[0]) df = df.copy() df["a"] = 0 - expected = DataFrame(0, index=[0], columns=Index(["a"], dtype=object)) + expected = DataFrame(0, index=[0], columns=Index(["a"])) tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 9ab7dff64b182..3b62f7c4c0549 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -272,25 +272,17 @@ def test_timedelta_assignment(): # GH 8209 s = Series([], dtype=object) s.loc["B"] = timedelta(1) - expected = Series( - Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object) - ) + expected = Series(Timedelta("1 days"), dtype="timedelta64[ns]", index=["B"]) tm.assert_series_equal(s, expected) s = s.reindex(s.index.insert(0, "A")) expected = Series( - [np.nan, Timedelta("1 days")], - dtype="timedelta64[ns]", - index=Index(["A", "B"], dtype=object), + [np.nan, Timedelta("1 days")], dtype="timedelta64[ns]", index=["A", "B"] ) tm.assert_series_equal(s, expected) s.loc["A"] = timedelta(1) - expected = Series( - Timedelta("1 days"), - dtype="timedelta64[ns]", - index=Index(["A", "B"], dtype=object), - ) + expected = Series(Timedelta("1 days"), dtype="timedelta64[ns]", index=["A", "B"]) tm.assert_series_equal(s, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 85558e85494eb..ad2c5a27bea9e 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -567,10 +567,7 @@ def test_setitem_with_expansion_type_promotion(self): ser["a"] = Timestamp("2016-01-01") ser["b"] = 3.0 ser["c"] = "foo" - expected = Series( - [Timestamp("2016-01-01"), 3.0, "foo"], - index=Index(["a", "b", "c"], dtype=object), - ) + expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) tm.assert_series_equal(ser, expected) def test_setitem_not_contained(self, string_series): diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index e1ec8afda33a9..9ad83b7570290 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -31,7 +31,7 @@ def test_combine_first_name(self, datetime_series): result = datetime_series.combine_first(datetime_series[:5]) assert result.name == datetime_series.name - def test_combine_first(self): + def test_combine_first(self, using_infer_string): values = np.arange(20, dtype=np.float64) series = Series(values, index=np.arange(20, dtype=np.int64)) @@ -66,7 +66,8 @@ def test_combine_first(self): msg = "The behavior of array concatenation with empty entries is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): result = ser.combine_first(empty) - ser.index = ser.index.astype("O") + if not using_infer_string: + ser.index = ser.index.astype("O") tm.assert_series_equal(ser, result) def test_combine_first_dt64(self, unit):