From 55f59e97cb1f48cd19e11314555783ae3dcf935b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 17 Feb 2025 10:15:41 +0100 Subject: [PATCH 1/4] API: ignore empty range/object dtype in Index setop operations (string dtype compat) (#60797) (cherry picked from commit ee06e714fcb35e0b6d321b3edd454eb0e363e5e4) --- doc/source/whatsnew/v2.3.0.rst | 10 + doc/source/whatsnew/v3.0.0.rst | 841 ++++++++++++++++++ pandas/core/indexes/base.py | 26 + .../frame/constructors/test_from_dict.py | 3 - pandas/tests/frame/indexing/test_coercion.py | 7 +- pandas/tests/frame/indexing/test_indexing.py | 2 +- pandas/tests/frame/indexing/test_insert.py | 3 +- pandas/tests/frame/indexing/test_setitem.py | 32 +- pandas/tests/frame/methods/test_dropna.py | 5 +- .../tests/frame/methods/test_reset_index.py | 34 +- pandas/tests/frame/test_constructors.py | 3 - pandas/tests/frame/test_query_eval.py | 1 - pandas/tests/groupby/test_groupby.py | 2 +- .../tests/indexes/base_class/test_reshape.py | 2 +- .../tests/indexes/base_class/test_setops.py | 3 +- pandas/tests/indexes/datetimes/test_join.py | 10 +- pandas/tests/indexes/test_old_base.py | 10 +- pandas/tests/indexes/test_setops.py | 14 +- pandas/tests/indexing/test_at.py | 7 +- pandas/tests/indexing/test_loc.py | 28 +- pandas/tests/indexing/test_partial.py | 18 +- pandas/tests/series/indexing/test_indexing.py | 14 +- pandas/tests/series/indexing/test_setitem.py | 5 +- .../series/methods/test_combine_first.py | 5 +- 24 files changed, 990 insertions(+), 95 deletions(-) create mode 100644 doc/source/whatsnew/v3.0.0.rst diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 0b7c2bac1be6a..7b094307cc728 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -59,6 +59,16 @@ Increased minimum version for Python pandas 2.3.0 supports Python 3.10 and higher. +.. _whatsnew_230.api_changes: + +API changes +~~~~~~~~~~~ + +- When enabling the ``future.infer_string`` option: Index set operations (like + union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or + empty ``Index`` with object dtype when determining the dtype of the resulting + Index (:issue:`60797`) + .. --------------------------------------------------------------------------- .. _whatsnew_230.deprecations: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst new file mode 100644 index 0000000000000..64e4a30453366 --- /dev/null +++ b/doc/source/whatsnew/v3.0.0.rst @@ -0,0 +1,841 @@ +.. _whatsnew_300: + +What's new in 3.0.0 (Month XX, 2024) +------------------------------------ + +These are the changes in pandas 3.0.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_300.enhancements: + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_300.enhancements.enhancement1: + +Enhancement1 +^^^^^^^^^^^^ + +.. _whatsnew_300.enhancements.enhancement2: + +Enhancement2 +^^^^^^^^^^^^ + +.. _whatsnew_300.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ +- :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`) +- :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`) +- :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface `_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`) +- Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`) +- Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`) +- :class:`pandas.api.typing.NoDefault` is available for typing ``no_default`` +- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) +- :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`) +- :func:`pandas.merge`, :meth:`DataFrame.merge` and :meth:`DataFrame.join` now support anti joins (``left_anti`` and ``right_anti``) in the ``how`` parameter (:issue:`42916`) +- :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`) +- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) +- :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`). +- :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`) +- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`) +- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`) +- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) +- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) +- Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`) +- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) +- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) +- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) +- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`) +- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`) +- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) +- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`) +- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) +- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) +- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) +- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) +- :meth:`DataFrame.to_json` now encodes ``Decimal`` as strings instead of floats (:issue:`60698`) +- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) +- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) +- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`) +- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`) +- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) +- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) +- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) +- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) +- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) +- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) +- :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) +- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) +- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) +- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`) +- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) +- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) +- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) +- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) +- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) +- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_300.notable_bug_fixes: + +Notable bug fixes +~~~~~~~~~~~~~~~~~ + +These are bug fixes that might have notable behavior changes. + +.. _whatsnew_300.notable_bug_fixes.groupby_unobs_and_na: + +Improved behavior in groupby for ``observed=False`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A number of bugs have been fixed due to improved handling of unobserved groups (:issue:`55738`). All remarks in this section equally impact :class:`.SeriesGroupBy`. + +In previous versions of pandas, a single grouping with :meth:`.DataFrameGroupBy.apply` or :meth:`.DataFrameGroupBy.agg` would pass the unobserved groups to the provided function, resulting in ``0`` below. + +.. ipython:: python + + df = pd.DataFrame( + { + "key1": pd.Categorical(list("aabb"), categories=list("abc")), + "key2": [1, 1, 1, 2], + "values": [1, 2, 3, 4], + } + ) + df + gb = df.groupby("key1", observed=False) + gb[["values"]].apply(lambda x: x.sum()) + +However this was not the case when using multiple groupings, resulting in ``NaN`` below. + +.. code-block:: ipython + + In [1]: gb = df.groupby(["key1", "key2"], observed=False) + In [2]: gb[["values"]].apply(lambda x: x.sum()) + Out[2]: + values + key1 key2 + a 1 3.0 + 2 NaN + b 1 3.0 + 2 4.0 + c 1 NaN + 2 NaN + +Now using multiple groupings will also pass the unobserved groups to the provided function. + +.. ipython:: python + + gb = df.groupby(["key1", "key2"], observed=False) + gb[["values"]].apply(lambda x: x.sum()) + +Similarly: + +- In previous versions of pandas the method :meth:`.DataFrameGroupBy.sum` would result in ``0`` for unobserved groups, but :meth:`.DataFrameGroupBy.prod`, :meth:`.DataFrameGroupBy.all`, and :meth:`.DataFrameGroupBy.any` would all result in NA values. Now these methods result in ``1``, ``True``, and ``False`` respectively. +- :meth:`.DataFrameGroupBy.groups` did not include unobserved groups and now does. + +These improvements also fixed certain bugs in groupby: + +- :meth:`.DataFrameGroupBy.agg` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`36698`) +- :meth:`.DataFrameGroupBy.groups` with ``sort=False`` would sort groups; they now occur in the order they are observed (:issue:`56966`) +- :meth:`.DataFrameGroupBy.nunique` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`52848`) +- :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`) +- :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`) + +.. _whatsnew_300.notable_bug_fixes.notable_bug_fix2: + +notable_bug_fix2 +^^^^^^^^^^^^^^^^ + +.. --------------------------------------------------------------------------- +.. _whatsnew_300.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_300.api_breaking.datetime_resolution_inference: + +Datetime resolution inference +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Converting a sequence of strings, ``datetime`` objects, or ``np.datetime64`` objects to +a ``datetime64`` dtype now performs inference on the appropriate resolution (AKA unit) for the output dtype. This affects :class:`Series`, :class:`DataFrame`, :class:`Index`, :class:`DatetimeIndex`, and :func:`to_datetime`. + +Previously, these would always give nanosecond resolution: + +.. code-block:: ipython + + In [1]: dt = pd.Timestamp("2024-03-22 11:36").to_pydatetime() + In [2]: pd.to_datetime([dt]).dtype + Out[2]: dtype('`_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++------------------------+---------------------+ +| Package | New Minimum Version | ++========================+=====================+ +| pytz | 2023.4 | ++------------------------+---------------------+ +| fastparquet | 2023.10.0 | ++------------------------+---------------------+ +| adbc-driver-postgresql | 0.10.0 | ++------------------------+---------------------+ +| mypy (dev) | 1.9.0 | ++------------------------+---------------------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_300.api_breaking.pytz: + +``pytz`` now an optional dependency +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas now uses :py:mod:`zoneinfo` from the standard library as the default timezone implementation when passing a timezone +string to various methods. (:issue:`34916`) + +*Old behavior:* + +.. code-block:: ipython + + In [1]: ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific") + In [2]: ts.tz + + +*New behavior:* + +.. ipython:: python + + ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific") + ts.tz + +``pytz`` timezone objects are still supported when passed directly, but they will no longer be returned by default +from string inputs. Moreover, ``pytz`` is no longer a required dependency of pandas, but can be installed +with the pip extra ``pip install pandas[timezone]``. + + +Additionally, pandas no longer throws ``pytz`` exceptions for timezone operations leading to ambiguous or nonexistent +times. These cases will now raise a ``ValueError``. + +.. _whatsnew_300.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ +- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`) +- :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`) +- All classes inheriting from builtin ``tuple`` (including types created with :func:`collections.namedtuple`) are now hashed and compared as builtin ``tuple`` during indexing operations (:issue:`57922`) +- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`) +- Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`) +- Removed :meth:`Index.sort` which always raised a ``TypeError``. This attribute is not defined and will raise an ``AttributeError`` (:issue:`59283`) +- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) +- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`) +- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`) +- when comparing the indexes in :func:`testing.assert_series_equal`, check_exact defaults to True if an :class:`Index` is of integer dtypes. (:issue:`57386`) +- Index set operations (like union or intersection) will now ignore the dtype of + an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining + the dtype of the resulting Index (:issue:`60797`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_300.deprecations: + +Deprecations +~~~~~~~~~~~~ + +Copy keyword +^^^^^^^^^^^^ + +The ``copy`` keyword argument in the following methods is deprecated and +will be removed in a future version: + +- :meth:`DataFrame.truncate` / :meth:`Series.truncate` +- :meth:`DataFrame.tz_convert` / :meth:`Series.tz_convert` +- :meth:`DataFrame.tz_localize` / :meth:`Series.tz_localize` +- :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects` +- :meth:`DataFrame.align` / :meth:`Series.align` +- :meth:`DataFrame.astype` / :meth:`Series.astype` +- :meth:`DataFrame.reindex` / :meth:`Series.reindex` +- :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` +- :meth:`DataFrame.set_axis` / :meth:`Series.set_axis` +- :meth:`DataFrame.to_period` / :meth:`Series.to_period` +- :meth:`DataFrame.to_timestamp` / :meth:`Series.to_timestamp` +- :meth:`DataFrame.rename` / :meth:`Series.rename` +- :meth:`DataFrame.transpose` +- :meth:`DataFrame.swaplevel` +- :meth:`DataFrame.merge` / :func:`pd.merge` + +Copy-on-Write utilizes a lazy copy mechanism that defers copying the data until +necessary. Use ``.copy`` to trigger an eager copy. The copy keyword has no effect +starting with 3.0, so it can be safely removed from your code. + +Other Deprecations +^^^^^^^^^^^^^^^^^^ + +- Deprecated :func:`core.internals.api.make_block`, use public APIs instead (:issue:`56815`) +- Deprecated :meth:`.DataFrameGroupby.corrwith` (:issue:`57158`) +- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`) +- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`) +- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`) +- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`) +- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`) +- Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`) +- Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`) +- Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`) +- Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) +- Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`) +- Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`) +- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_300.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enforced deprecation of aliases ``M``, ``Q``, ``Y``, etc. in favour of ``ME``, ``QE``, ``YE``, etc. for offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Renamed the following offset aliases (:issue:`57986`): + ++-------------------------------+------------------+------------------+ +| offset | removed alias | new alias | ++===============================+==================+==================+ +|:class:`MonthEnd` | ``M`` | ``ME`` | ++-------------------------------+------------------+------------------+ +|:class:`BusinessMonthEnd` | ``BM`` | ``BME`` | ++-------------------------------+------------------+------------------+ +|:class:`SemiMonthEnd` | ``SM`` | ``SME`` | ++-------------------------------+------------------+------------------+ +|:class:`CustomBusinessMonthEnd`| ``CBM`` | ``CBME`` | ++-------------------------------+------------------+------------------+ +|:class:`QuarterEnd` | ``Q`` | ``QE`` | ++-------------------------------+------------------+------------------+ +|:class:`BQuarterEnd` | ``BQ`` | ``BQE`` | ++-------------------------------+------------------+------------------+ +|:class:`YearEnd` | ``Y`` | ``YE`` | ++-------------------------------+------------------+------------------+ +|:class:`BYearEnd` | ``BY`` | ``BYE`` | ++-------------------------------+------------------+------------------+ + +Other Removals +^^^^^^^^^^^^^^ +- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`) +- :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`) +- :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`) +- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) +- :func:`to_datetime` with a ``unit`` specified no longer parses strings into floats, instead parses them the same way as without ``unit`` (:issue:`50735`) +- :meth:`DataFrame.groupby` with ``as_index=False`` and aggregation methods will no longer exclude from the result the groupings that do not arise from the input (:issue:`49519`) +- :meth:`ExtensionArray._reduce` now requires a ``keepdims: bool = False`` parameter in the signature (:issue:`52788`) +- :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`) +- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) +- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`) +- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) +- Changed behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` to always treat integer keys as labels, never as positional, consistent with :class:`DataFrame` behavior (:issue:`50617`) +- Changed behavior of :meth:`Series.__getitem__`, :meth:`Series.__setitem__`, :meth:`DataFrame.__getitem__`, :meth:`DataFrame.__setitem__` with an integer slice on objects with a floating-dtype index. This is now treated as *positional* indexing (:issue:`49612`) +- Disallow a callable argument to :meth:`Series.iloc` to return a ``tuple`` (:issue:`53769`) +- Disallow allowing logical operations (``||``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``); wrap the objects in :class:`Series`, :class:`Index`, or ``np.array`` first instead (:issue:`52264`) +- Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`) +- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`) +- Disallow constructing a :class:`arrays.SparseArray` with scalar data (:issue:`53039`) +- Disallow indexing an :class:`Index` with a boolean indexer of length zero, it now raises ``ValueError`` (:issue:`55820`) +- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`) +- Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`) +- Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`) +- Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`) +- Removed 'fastpath' keyword in :class:`Categorical` constructor (:issue:`20110`) +- Removed 'kind' keyword in :meth:`Series.resample` and :meth:`DataFrame.resample` (:issue:`58125`) +- Removed ``Block``, ``DatetimeTZBlock``, ``ExtensionBlock``, ``create_block_manager_from_blocks`` from ``pandas.core.internals`` and ``pandas.core.internals.api`` (:issue:`55139`) +- Removed alias :class:`arrays.PandasArray` for :class:`arrays.NumpyExtensionArray` (:issue:`53694`) +- Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`) +- Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`) +- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`) +- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`) +- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`) +- Stopped automatically casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`) +- Stopped performing dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when given a pandas object (:class:`Series`, :class:`Index`, :class:`ExtensionArray`), call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`) +- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`) +- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`) +- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`) +- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`) +- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) +- Changed the default value of ``na_action`` in :meth:`Categorical.map` to ``None`` (:issue:`51645`) +- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) +- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`) +- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 `_) +- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`) +- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`) +- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) +- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`) +- Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) +- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) +- Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`) +- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) +- Enforced deprecation of ``core.internals`` member ``DatetimeTZBlock`` (:issue:`58467`) +- Enforced deprecation of ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`) +- Enforced deprecation of ``keep_date_col`` keyword in :func:`read_csv` (:issue:`55569`) +- Enforced deprecation of ``quantile`` keyword in :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile`, renamed to ``q`` instead. (:issue:`52550`) +- Enforced deprecation of argument ``infer_datetime_format`` in :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) +- Enforced deprecation of combining parsed datetime columns in :func:`read_csv` in ``parse_dates`` (:issue:`55569`) +- Enforced deprecation of non-standard (``np.ndarray``, :class:`ExtensionArray`, :class:`Index`, or :class:`Series`) argument to :func:`api.extensions.take` (:issue:`52981`) +- Enforced deprecation of parsing system timezone strings to ``tzlocal``, which depended on system timezone, pass the 'tz' keyword instead (:issue:`50791`) +- Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) +- Enforced deprecation of string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) +- Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`) +- Enforced deprecation of string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) +- Enforced deprecation of string ``BA`` denoting frequency in :class:`BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57793`) +- Enforced deprecation of strings ``H``, ``BH``, and ``CBH`` denoting frequencies in :class:`Hour`, :class:`BusinessHour`, :class:`CustomBusinessHour` (:issue:`59143`) +- Enforced deprecation of strings ``H``, ``BH``, and ``CBH`` denoting units in :class:`Timedelta` (:issue:`59143`) +- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`) +- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`) +- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`) +- Enforced deprecation of the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype` that would introduce new categories. (:issue:`58270`) +- Enforced deprecation of the behavior of :meth:`Series.argsort` in the presence of NA values (:issue:`58232`) +- Enforced deprecation of values "pad", "ffill", "bfill", and "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`57869`) +- Enforced deprecation removing :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`) +- Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) +- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) +- Iterating over a :class:`.DataFrameGroupBy` or :class:`.SeriesGroupBy` will return tuples of length 1 for the groups when grouping by ``level`` a list of length 1 (:issue:`50064`) +- Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`) +- Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`) +- Removed :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` supporting bool dtype (:issue:`53975`) +- Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`) +- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`) +- Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`) +- Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`) +- Removed ``DataFrame.swapaxes`` and ``Series.swapaxes`` (:issue:`51946`) +- Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) +- Removed ``DataFrameGroupby.fillna`` and ``SeriesGroupBy.fillna``` (:issue:`55719`) +- Removed ``Index.format``, use :meth:`Index.astype` with ``str`` or :meth:`Index.map` with a ``formatter`` function instead (:issue:`55439`) +- Removed ``Resample.fillna`` (:issue:`55719`) +- Removed ``Series.__int__`` and ``Series.__float__``. Call ``int(Series.iloc[0])`` or ``float(Series.iloc[0])`` instead. (:issue:`51131`) +- Removed ``Series.ravel`` (:issue:`56053`) +- Removed ``Series.view`` (:issue:`56054`) +- Removed ``StataReader.close`` (:issue:`49228`) +- Removed ``_data`` from :class:`DataFrame`, :class:`Series`, :class:`.arrays.ArrowExtensionArray` (:issue:`52003`) +- Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) +- Removed ``axis`` argument from all groupby operations (:issue:`50405`) +- Removed ``convert_dtype`` from :meth:`Series.apply` (:issue:`52257`) +- Removed ``method``, ``limit`` ``fill_axis`` and ``broadcast_axis`` keywords from :meth:`DataFrame.align` (:issue:`51968`) +- Removed ``pandas.api.types.is_interval`` and ``pandas.api.types.is_period``, use ``isinstance(obj, pd.Interval)`` and ``isinstance(obj, pd.Period)`` instead (:issue:`55264`) +- Removed ``pandas.io.sql.execute`` (:issue:`50185`) +- Removed ``pandas.value_counts``, use :meth:`Series.value_counts` instead (:issue:`53493`) +- Removed ``read_gbq`` and ``DataFrame.to_gbq``. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`) +- Removed ``use_nullable_dtypes`` from :func:`read_parquet` (:issue:`51853`) +- Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`) +- Removed argument ``limit`` from :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`.DataFrameGroupBy.pct_change`, and :meth:`.SeriesGroupBy.pct_change`; the argument ``method`` must be set to ``None`` and will be removed in a future version of pandas (:issue:`53520`) +- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) +- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`) +- Removed deprecated keyword ``method`` on :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`57760`) +- Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`) +- Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`) +- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`) +- Removed support for ``slice`` in :meth:`DataFrame.take` (:issue:`51539`) +- Removed the ``ArrayManager`` (:issue:`55043`) +- Removed the ``fastpath`` argument from the :class:`Series` constructor (:issue:`55466`) +- Removed the ``is_boolean``, ``is_integer``, ``is_floating``, ``holds_integer``, ``is_numeric``, ``is_categorical``, ``is_object``, and ``is_interval`` attributes of :class:`Index` (:issue:`50042`) +- Removed the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`) +- Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`) +- Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`) +- Removed the :class:`Grouper` attributes ``ax``, ``groups``, ``indexer``, and ``obj`` (:issue:`51206`, :issue:`51182`) +- Removed deprecated keyword ``verbose`` on :func:`read_csv` and :func:`read_table` (:issue:`56556`) +- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`) +- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`) +- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`) +- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_300.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ +- Eliminated circular reference in to original pandas object in accessor attributes (e.g. :attr:`Series.str`). However, accessor instantiation is no longer cached (:issue:`47667`, :issue:`41357`) +- :attr:`Categorical.categories` returns a :class:`RangeIndex` columns instead of an :class:`Index` if the constructed ``values`` was a ``range``. (:issue:`57787`) +- :class:`DataFrame` returns a :class:`RangeIndex` columns when possible when ``data`` is a ``dict`` (:issue:`57943`) +- :class:`Series` returns a :class:`RangeIndex` index when possible when ``data`` is a ``dict`` (:issue:`58118`) +- :func:`concat` returns a :class:`RangeIndex` column when possible when ``objs`` contains :class:`Series` and :class:`DataFrame` and ``axis=0`` (:issue:`58119`) +- :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`) +- :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) +- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) +- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`) +- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`) +- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`) +- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) +- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) +- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`) +- Performance improvement in :meth:`DataFrame.to_csv` when ``index=False`` (:issue:`59312`) +- Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`) +- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`) +- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) +- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`) +- Performance improvement in :meth:`MultiIndex._engine` to use smaller dtypes if possible (:issue:`58411`) +- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) +- Performance improvement in :meth:`MultiIndex.memory_usage` to ignore the index engine when it isn't already cached. (:issue:`58385`) +- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) +- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) +- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`) +- Performance improvement in :meth:`RangeIndex.insert` returning a :class:`RangeIndex` instead of a :class:`Index` when the :class:`RangeIndex` is empty. (:issue:`57833`) +- Performance improvement in :meth:`RangeIndex.round` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57824`) +- Performance improvement in :meth:`RangeIndex.searchsorted` (:issue:`58376`) +- Performance improvement in :meth:`RangeIndex.to_numpy` when specifying an ``na_value`` (:issue:`58376`) +- Performance improvement in :meth:`RangeIndex.value_counts` (:issue:`58376`) +- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`) +- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`) +- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`) +- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`) +- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`) +- Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`) +- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`) +- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) +- Performance improvement in indexing operations for string dtypes (:issue:`56997`) +- Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_300.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ +- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) +- + +Datetimelike +^^^^^^^^^^^^ +- Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`) +- Bug in :class:`DataFrame` raising ``ValueError`` when ``dtype`` is ``timedelta64`` and ``data`` is a list containing ``None`` (:issue:`60064`) +- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`) +- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) +- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`) +- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) +- Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`) +- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` casting ``datetime64`` and ``timedelta64`` columns to ``float64`` and losing precision (:issue:`60850`) +- Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`) +- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) +- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) +- Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`) +- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`) +- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`) +- Bug in :meth:`to_datetime` on float32 df with year, month, day etc. columns leads to precision issues and incorrect result. (:issue:`60506`) +- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`) +- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`) +- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) + +Timedelta +^^^^^^^^^ +- Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`) +- Bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) + +Timezones +^^^^^^^^^ +- +- + +Numeric +^^^^^^^ +- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`) +- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`) + +Conversion +^^^^^^^^^^ +- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) +- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) +- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) +- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) + +Strings +^^^^^^^ +- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`) +- + +Interval +^^^^^^^^ +- :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`) +- Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`) +- + +Indexing +^^^^^^^^ +- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) +- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) +- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) +- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) + +Missing +^^^^^^^ +- Bug in :meth:`DataFrame.fillna` and :meth:`Series.fillna` that would ignore the ``limit`` argument on :class:`.ExtensionArray` dtypes (:issue:`58001`) +- + +MultiIndex +^^^^^^^^^^ +- :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`) +- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) +- :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`) +- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) +- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) +- + +I/O +^^^ +- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`) +- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) +- Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) +- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) +- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) +- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) +- Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`) +- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) +- Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`) +- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) +- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) +- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`) +- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) +- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) +- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`) +- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`) +- Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`) +- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`) +- Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`) +- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) +- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`) +- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`) +- Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`) +- Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) + +Period +^^^^^^ +- Fixed error message when passing invalid period alias to :meth:`PeriodIndex.to_timestamp` (:issue:`58974`) +- + +Plotting +^^^^^^^^ +- Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`) +- Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) +- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) +- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) +- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :meth:`.DataFrameGroupBy.__len__` and :meth:`.SeriesGroupBy.__len__` would raise when the grouping contained NA values and ``dropna=False`` (:issue:`58644`) +- Bug in :meth:`.DataFrameGroupBy.any` that returned True for groups where all Timedelta values are NaT. (:issue:`59712`) +- Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`) +- Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) +- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`) +- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) +- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) +- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) +- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`) +- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) +- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) +- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) +- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) +- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) +- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) +- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) +- Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`) + +Reshaping +^^^^^^^^^ +- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) +- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) +- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) +- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) +- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) +- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`) +- Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`) +- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) +- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) +- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) + +Sparse +^^^^^^ +- Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) +- Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`) +- Bug in :meth:`DataFrame.sparse.to_dense` which ignored subclassing and always returned an instance of :class:`DataFrame` (:issue:`59913`) + +ExtensionArray +^^^^^^^^^^^^^^ +- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) +- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) +- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) +- Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`) +- Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`) + +Styler +^^^^^^ +- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed. + +Other +^^^^^ +- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) +- Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`) +- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) +- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) +- Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`) +- Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`) +- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) +- Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) +- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) +- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) +- Bug in :meth:`DataFrame.query` where using duplicate column names led to a ``TypeError``. (:issue:`59950`) +- Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`) +- Bug in :meth:`DataFrame.shift` where passing a ``freq`` on a DataFrame with no columns did not shift the index correctly. (:issue:`60102`) +- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) +- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) +- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) +- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`) +- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) +- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) +- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) +- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`) +- Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`) +- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) +- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) +- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) +- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`) +- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`) +- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`) +- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`) + +.. ***DO NOT USE THIS SECTION*** + +- +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_300.contributors: + +Contributors +~~~~~~~~~~~~ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ad39907e7400e..58f3b37250eb6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6387,6 +6387,24 @@ def _find_common_type_compat(self, target) -> DtypeObj: """ target_dtype, _ = infer_dtype_from(target) + if using_string_dtype(): + # special case: if left or right is a zero-length RangeIndex or + # Index[object], those can be created by the default empty constructors + # -> for that case ignore this dtype and always return the other + # (https://github.com/pandas-dev/pandas/pull/60797) + from pandas.core.indexes.range import RangeIndex + + if len(self) == 0 and ( + isinstance(self, RangeIndex) or self.dtype == np.object_ + ): + return target_dtype + if ( + isinstance(target, Index) + and len(target) == 0 + and (isinstance(target, RangeIndex) or target_dtype == np.object_) + ): + return self.dtype + # special case: if one dtype is uint64 and the other a signed int, return object # See https://github.com/pandas-dev/pandas/issues/26778 for discussion # Now it's: @@ -7005,6 +7023,14 @@ def insert(self, loc: int, item) -> Index: arr = self._values + if using_string_dtype() and len(self) == 0 and self.dtype == np.object_: + # special case: if we are an empty object-dtype Index, also + # take into account the inserted item for the resulting dtype + # (https://github.com/pandas-dev/pandas/pull/60797) + dtype = self._find_common_type_compat(item) + if dtype != self.dtype: + return self.astype(dtype).insert(loc, item) + try: if isinstance(arr, ExtensionArray): res_values = arr.insert(loc, item) diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 1509c47ba65c7..845174bbf600e 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( DataFrame, Index, @@ -44,7 +42,6 @@ def test_constructor_single_row(self): ) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken") def test_constructor_list_of_series(self): data = [ OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index f7f7b2c7c872a..d1ee2fb1bd5ac 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -103,12 +103,7 @@ def test_26395(indexer_al): df["D"] = 0 indexer_al(df)["C", "D"] = 2 - expected = DataFrame( - {"D": [0, 0, 2]}, - index=["A", "B", "C"], - columns=pd.Index(["D"], dtype=object), - dtype=np.int64, - ) + expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64) tm.assert_frame_equal(df, expected) with tm.assert_produces_warning( diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index a8249ed7f9828..93f4c2c6e3273 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1206,7 +1206,7 @@ def test_loc_setitem_datetimelike_with_inference(self): result = df.dtypes expected = Series( [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2, - index=Index(list("ABCDEFGH"), dtype=object), + index=list("ABCDEFGH"), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 4cf297b4c037d..7e702bdc993bd 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -67,8 +67,7 @@ def test_insert_with_columns_dups(self): df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True) df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True) exp = DataFrame( - [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], - columns=Index(["A", "A", "A"], dtype=object), + [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"] ) tm.assert_frame_equal(df, exp) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 190218a82d231..b3fe538d938f4 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -146,18 +146,32 @@ def test_setitem_different_dtype(self): ) tm.assert_series_equal(result, expected) - def test_setitem_empty_columns(self): - # GH 13522 + def test_setitem_overwrite_index(self): + # GH 13522 - assign the index as a column and then overwrite the values + # -> should not affect the index df = DataFrame(index=["A", "B", "C"]) df["X"] = df.index df["X"] = ["x", "y", "z"] exp = DataFrame( - data={"X": ["x", "y", "z"]}, - index=["A", "B", "C"], - columns=Index(["X"], dtype=object), + data={"X": ["x", "y", "z"]}, index=["A", "B", "C"], columns=["X"] ) tm.assert_frame_equal(df, exp) + def test_setitem_empty_columns(self): + # Starting from an empty DataFrame and setting a column should result + # in a default string dtype for the columns' Index + # https://github.com/pandas-dev/pandas/issues/60338 + + df = DataFrame() + df["foo"] = [1, 2, 3] + expected = DataFrame({"foo": [1, 2, 3]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=Index([])) + df["foo"] = [1, 2, 3] + expected = DataFrame({"foo": [1, 2, 3]}) + tm.assert_frame_equal(df, expected) + def test_setitem_dt64_index_empty_columns(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") df = DataFrame(index=np.arange(len(rng))) @@ -171,9 +185,7 @@ def test_setitem_timestamp_empty_columns(self): df["now"] = Timestamp("20130101", tz="UTC").as_unit("ns") expected = DataFrame( - [[Timestamp("20130101", tz="UTC")]] * 3, - index=range(3), - columns=Index(["now"], dtype=object), + [[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"] ) tm.assert_frame_equal(df, expected) @@ -212,7 +224,7 @@ def test_setitem_period_preserves_dtype(self): result = DataFrame([]) result["a"] = data - expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object)) + expected = DataFrame({"a": data}, columns=["a"]) tm.assert_frame_equal(result, expected) @@ -939,7 +951,7 @@ def test_setitem_scalars_no_index(self): # GH#16823 / GH#17894 df = DataFrame() df["foo"] = 1 - expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64) + expected = DataFrame(columns=["foo"]).astype(np.int64) tm.assert_frame_equal(df, expected) def test_setitem_newcol_tuple_key(self, float_frame): diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 0d4a6a065111f..7899b4aeac3fd 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -182,12 +182,9 @@ def test_dropna_multiple_axes(self): with pytest.raises(TypeError, match="supplying multiple axes"): inp.dropna(how="all", axis=(0, 1), inplace=True) - def test_dropna_tz_aware_datetime(self, using_infer_string): + def test_dropna_tz_aware_datetime(self): # GH13407 - df = DataFrame() - if using_infer_string: - df.columns = df.columns.astype("str") dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc()) dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc()) df["Time"] = [dt1] diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 9e51ac0bc2612..e762c8ebdcd60 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.common import ( is_float_dtype, is_integer_dtype, @@ -646,7 +644,6 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes): tm.assert_frame_equal(res, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) - GH#60338") @pytest.mark.parametrize( "array, dtype", [ @@ -783,3 +780,34 @@ def test_reset_index_false_index_name(): result_frame.reset_index() expected_frame = DataFrame(range(5, 10), RangeIndex(range(5), name=False)) tm.assert_frame_equal(result_frame, expected_frame) + + +@pytest.mark.parametrize("columns", [None, Index([])]) +def test_reset_index_with_empty_frame(columns): + # Currently empty DataFrame has RangeIndex or object dtype Index, but when + # resetting the index we still want to end up with the default string dtype + # https://github.com/pandas-dev/pandas/issues/60338 + + index = Index([], name="foo") + df = DataFrame(index=index, columns=columns) + result = df.reset_index() + expected = DataFrame(columns=["foo"]) + tm.assert_frame_equal(result, expected) + + index = Index([1, 2, 3], name="foo") + df = DataFrame(index=index, columns=columns) + result = df.reset_index() + expected = DataFrame({"foo": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_tuples([], names=["foo", "bar"]) + df = DataFrame(index=index, columns=columns) + result = df.reset_index() + expected = DataFrame(columns=["foo", "bar"]) + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_tuples([(1, 2), (2, 3)], names=["foo", "bar"]) + df = DataFrame(index=index, columns=columns) + result = df.reset_index() + expected = DataFrame({"foo": [1, 2], "bar": [2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f16068e0b6538..efc40536d56be 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -21,8 +21,6 @@ import pytest import pytz -from pandas._config import using_string_dtype - from pandas._libs import lib from pandas.compat.numpy import np_version_gt2 from pandas.errors import IntCastingNaNError @@ -2002,7 +2000,6 @@ def test_constructor_with_datetimes4(self): df = DataFrame({"value": dr}) assert str(df.iat[0, 0].tz) == "US/Eastern" - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_constructor_with_datetimes5(self): # GH 7822 # preserver an index with a tz on dict construction diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 27848e4d18596..ffabf238a4884 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -757,7 +757,6 @@ def test_check_tz_aware_index_query(self, tz_aware_fixture): tm.assert_frame_equal(result, expected) expected = DataFrame(df_index) - expected.columns = expected.columns.astype(object) result = df.reset_index().query('"2018-01-03 00:00:00+00" < time') tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 07ddbc36b5ab0..7ebecdafdc8ae 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1623,7 +1623,7 @@ def test_groupby_2d_malformed(): d["label"] = ["l1", "l2"] tmp = d.groupby(["group"]).mean(numeric_only=True) res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) - tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"], dtype=object)) + tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py index b1a6c30b52f68..548f32fd53323 100644 --- a/pandas/tests/indexes/base_class/test_reshape.py +++ b/pandas/tests/indexes/base_class/test_reshape.py @@ -34,7 +34,7 @@ def test_insert(self): # test empty null_index = Index([]) - tm.assert_index_equal(Index(["a"], dtype=object), null_index.insert(0, "a")) + tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) def test_insert_missing(self, request, nulls_fixture, using_infer_string): if using_infer_string and nulls_fixture is pd.NA: diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index a897e5aca058a..3ef3f3ad4d3a2 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -240,7 +240,6 @@ def test_tuple_union_bug(self, method, expected, sort): def test_union_name_preservation( self, first_list, second_list, first_name, second_name, expected_name, sort ): - expected_dtype = object if not first_list or not second_list else "str" first = Index(first_list, name=first_name) second = Index(second_list, name=second_name) union = first.union(second, sort=sort) @@ -251,7 +250,7 @@ def test_union_name_preservation( expected = Index(sorted(vals), name=expected_name) tm.assert_index_equal(union, expected) else: - expected = Index(vals, name=expected_name, dtype=expected_dtype) + expected = Index(vals, name=expected_name) tm.assert_index_equal(union.sort_values(), expected.sort_values()) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index d0ac32939296c..abf6809d67f9c 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -70,13 +70,17 @@ def test_join_utc_convert(self, join_type): assert isinstance(result, DatetimeIndex) assert result.tz is timezone.utc - def test_datetimeindex_union_join_empty(self, sort): + def test_datetimeindex_union_join_empty(self, sort, using_infer_string): dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") empty = Index([]) result = dti.union(empty, sort=sort) - expected = dti.astype("O") - tm.assert_index_equal(result, expected) + if using_infer_string: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) + else: + expected = dti.astype("O") + tm.assert_index_equal(result, expected) result = dti.join(empty) assert isinstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 2f6bdb1fd8969..ae9b4e108448d 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -442,10 +442,12 @@ def test_insert_out_of_bounds(self, index, using_infer_string): else: msg = "slice indices must be integers or None or have an __index__ method" - if using_infer_string and ( - index.dtype == "string" or index.dtype == "category" # noqa: PLR1714 - ): - msg = "loc must be an integer between" + if using_infer_string: + if index.dtype == "string" or index.dtype == "category": # noqa: PLR1714 + msg = "loc must be an integer between" + elif index.dtype == "object" and len(index) == 0: + msg = "loc must be an integer between" + err = TypeError with pytest.raises(err, match=msg): index.insert(0.5, "foo") diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index f6a865ccbb3a0..b52d11d967b4a 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -524,7 +524,7 @@ def test_intersection_difference_match_empty(self, index, sort): @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] ) -def test_setop_with_categorical(index_flat, sort, method): +def test_setop_with_categorical(index_flat, sort, method, using_infer_string): # MultiIndex tested separately in tests.indexes.multi.test_setops index = index_flat @@ -533,10 +533,22 @@ def test_setop_with_categorical(index_flat, sort, method): result = getattr(index, method)(other, sort=sort) expected = getattr(index, method)(index, sort=sort) + if ( + using_infer_string + and index.empty + and method in ("union", "symmetric_difference") + ): + expected = expected.astype("category") tm.assert_index_equal(result, expected, exact=exact) result = getattr(index, method)(other[:5], sort=sort) expected = getattr(index, method)(index[:5], sort=sort) + if ( + using_infer_string + and index.empty + and method in ("union", "symmetric_difference") + ): + expected = expected.astype("category") tm.assert_index_equal(result, expected, exact=exact) diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index d78694018749c..7504c984794e8 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -13,7 +13,6 @@ CategoricalIndex, DataFrame, DatetimeIndex, - Index, MultiIndex, Series, Timestamp, @@ -71,11 +70,7 @@ def test_at_setitem_item_cache_cleared(self): df.at[0, "x"] = 4 df.at[0, "cost"] = 789 - expected = DataFrame( - {"x": [4], "cost": 789}, - index=[0], - columns=Index(["x", "cost"], dtype=object), - ) + expected = DataFrame({"x": [4], "cost": 789}, index=[0]) tm.assert_frame_equal(df, expected) # And in particular, check that the _item_cache has updated correctly. diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index dc4f159cfd3c3..ddcce4553ba37 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -808,9 +808,9 @@ def test_loc_setitem_empty_frame(self): # is inplace, so that dtype is retained sera = Series(val1, index=keys1, dtype=np.float64) serb = Series(val2, index=keys2) - expected = DataFrame( - {"A": sera, "B": serb}, columns=Index(["A", "B"], dtype=object) - ).reindex(index=index) + expected = DataFrame({"A": sera, "B": serb}, columns=Index(["A", "B"])).reindex( + index=index + ) tm.assert_frame_equal(df, expected) def test_loc_setitem_frame(self): @@ -1008,7 +1008,7 @@ def test_setitem_new_key_tz(self, indexer_sl): to_datetime(42).tz_localize("UTC"), to_datetime(666).tz_localize("UTC"), ] - expected = Series(vals, index=Index(["foo", "bar"], dtype=object)) + expected = Series(vals, index=Index(["foo", "bar"])) ser = Series(dtype=object) indexer_sl(ser)["foo"] = vals[0] @@ -2050,15 +2050,11 @@ def test_loc_setitem_empty_series_str_idx(self): # partially set with an empty object series ser = Series(dtype=object) ser.loc["foo"] = 1 - tm.assert_series_equal(ser, Series([1], index=Index(["foo"], dtype=object))) + tm.assert_series_equal(ser, Series([1], index=Index(["foo"]))) ser.loc["bar"] = 3 - tm.assert_series_equal( - ser, Series([1, 3], index=Index(["foo", "bar"], dtype=object)) - ) + tm.assert_series_equal(ser, Series([1, 3], index=Index(["foo", "bar"]))) ser.loc[3] = 4 - tm.assert_series_equal( - ser, Series([1, 3, 4], index=Index(["foo", "bar", 3], dtype=object)) - ) + tm.assert_series_equal(ser, Series([1, 3, 4], index=Index(["foo", "bar", 3]))) def test_loc_setitem_incremental_with_dst(self): # GH#20724 @@ -2080,8 +2076,8 @@ def test_loc_setitem_incremental_with_dst(self): ], ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"], ) - def test_loc_setitem_datetime_keys_cast(self, conv): - # GH#9516 + def test_loc_setitem_datetime_keys_cast(self, conv, using_infer_string): + # GH#9516, GH#51363 changed in 3.0 to not cast on Index.insert dt1 = Timestamp("20130101 09:00:00") dt2 = Timestamp("20130101 10:00:00") df = DataFrame() @@ -2090,8 +2086,10 @@ def test_loc_setitem_datetime_keys_cast(self, conv): expected = DataFrame( {"one": [100.0, 200.0]}, - index=[dt1, dt2], - columns=Index(["one"], dtype=object), + index=Index( + [conv(dt1), conv(dt2)], dtype=None if using_infer_string else object + ), + columns=Index(["one"]), ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 5fcb71d0186a6..e3246fd3c2a59 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -30,7 +30,7 @@ def test_empty_frame_setitem_index_name_retained(self): expected = DataFrame( {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index"), - columns=Index(["series"], dtype=object), + columns=Index(["series"]), ) tm.assert_frame_equal(df, expected) @@ -43,7 +43,7 @@ def test_empty_frame_setitem_index_name_inherited(self): expected = DataFrame( {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index"), - columns=Index(["series"], dtype=object), + columns=Index(["series"]), ) tm.assert_frame_equal(df, expected) @@ -96,9 +96,7 @@ def test_partial_set_empty_frame2(self): # these work as they don't really change # anything but the index # GH#5632 - expected = DataFrame( - columns=Index(["foo"], dtype=object), index=Index([], dtype="object") - ) + expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="object")) df = DataFrame(index=Index([], dtype="object")) df["foo"] = Series([], dtype="object") @@ -116,9 +114,7 @@ def test_partial_set_empty_frame2(self): tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame3(self): - expected = DataFrame( - columns=Index(["foo"], dtype=object), index=Index([], dtype="int64") - ) + expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64")) expected["foo"] = expected["foo"].astype("float64") df = DataFrame(index=Index([], dtype="int64")) @@ -135,9 +131,7 @@ def test_partial_set_empty_frame4(self): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = range(len(df)) - expected = DataFrame( - columns=Index(["foo"], dtype=object), index=Index([], dtype="int64") - ) + expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64")) # range is int-dtype-like, so we get int64 dtype expected["foo"] = expected["foo"].astype("int64") tm.assert_frame_equal(df, expected) @@ -210,7 +204,7 @@ def test_partial_set_empty_frame_empty_copy_assignment(self): df = DataFrame(index=[0]) df = df.copy() df["a"] = 0 - expected = DataFrame(0, index=[0], columns=Index(["a"], dtype=object)) + expected = DataFrame(0, index=[0], columns=Index(["a"])) tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 9ab7dff64b182..3b62f7c4c0549 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -272,25 +272,17 @@ def test_timedelta_assignment(): # GH 8209 s = Series([], dtype=object) s.loc["B"] = timedelta(1) - expected = Series( - Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object) - ) + expected = Series(Timedelta("1 days"), dtype="timedelta64[ns]", index=["B"]) tm.assert_series_equal(s, expected) s = s.reindex(s.index.insert(0, "A")) expected = Series( - [np.nan, Timedelta("1 days")], - dtype="timedelta64[ns]", - index=Index(["A", "B"], dtype=object), + [np.nan, Timedelta("1 days")], dtype="timedelta64[ns]", index=["A", "B"] ) tm.assert_series_equal(s, expected) s.loc["A"] = timedelta(1) - expected = Series( - Timedelta("1 days"), - dtype="timedelta64[ns]", - index=Index(["A", "B"], dtype=object), - ) + expected = Series(Timedelta("1 days"), dtype="timedelta64[ns]", index=["A", "B"]) tm.assert_series_equal(s, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 85558e85494eb..ad2c5a27bea9e 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -567,10 +567,7 @@ def test_setitem_with_expansion_type_promotion(self): ser["a"] = Timestamp("2016-01-01") ser["b"] = 3.0 ser["c"] = "foo" - expected = Series( - [Timestamp("2016-01-01"), 3.0, "foo"], - index=Index(["a", "b", "c"], dtype=object), - ) + expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) tm.assert_series_equal(ser, expected) def test_setitem_not_contained(self, string_series): diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index e1ec8afda33a9..9ad83b7570290 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -31,7 +31,7 @@ def test_combine_first_name(self, datetime_series): result = datetime_series.combine_first(datetime_series[:5]) assert result.name == datetime_series.name - def test_combine_first(self): + def test_combine_first(self, using_infer_string): values = np.arange(20, dtype=np.float64) series = Series(values, index=np.arange(20, dtype=np.int64)) @@ -66,7 +66,8 @@ def test_combine_first(self): msg = "The behavior of array concatenation with empty entries is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): result = ser.combine_first(empty) - ser.index = ser.index.astype("O") + if not using_infer_string: + ser.index = ser.index.astype("O") tm.assert_series_equal(ser, result) def test_combine_first_dt64(self, unit): From 5645c3ea95a6c3ac2931fc551c3a823a10fd7377 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 17 Feb 2025 10:38:31 +0100 Subject: [PATCH 2/4] fixup backpor --- doc/source/whatsnew/v3.0.0.rst | 841 --------------------------------- 1 file changed, 841 deletions(-) delete mode 100644 doc/source/whatsnew/v3.0.0.rst diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst deleted file mode 100644 index 64e4a30453366..0000000000000 --- a/doc/source/whatsnew/v3.0.0.rst +++ /dev/null @@ -1,841 +0,0 @@ -.. _whatsnew_300: - -What's new in 3.0.0 (Month XX, 2024) ------------------------------------- - -These are the changes in pandas 3.0.0. See :ref:`release` for a full changelog -including other versions of pandas. - -{{ header }} - -.. --------------------------------------------------------------------------- -.. _whatsnew_300.enhancements: - -Enhancements -~~~~~~~~~~~~ - -.. _whatsnew_300.enhancements.enhancement1: - -Enhancement1 -^^^^^^^^^^^^ - -.. _whatsnew_300.enhancements.enhancement2: - -Enhancement2 -^^^^^^^^^^^^ - -.. _whatsnew_300.enhancements.other: - -Other enhancements -^^^^^^^^^^^^^^^^^^ -- :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`) -- :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`) -- :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface `_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`) -- Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`) -- Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`) -- :class:`pandas.api.typing.NoDefault` is available for typing ``no_default`` -- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) -- :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`) -- :func:`pandas.merge`, :meth:`DataFrame.merge` and :meth:`DataFrame.join` now support anti joins (``left_anti`` and ``right_anti``) in the ``how`` parameter (:issue:`42916`) -- :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`) -- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) -- :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`). -- :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`) -- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`) -- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`) -- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) -- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) -- Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`) -- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) -- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) -- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) -- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`) -- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`) -- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) -- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`) -- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) -- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) -- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) -- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) -- :meth:`DataFrame.to_json` now encodes ``Decimal`` as strings instead of floats (:issue:`60698`) -- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) -- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) -- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`) -- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`) -- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) -- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) -- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) -- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) -- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) -- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) -- :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) -- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) -- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) -- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`) -- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) -- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) -- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) -- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) -- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) -- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - -.. --------------------------------------------------------------------------- -.. _whatsnew_300.notable_bug_fixes: - -Notable bug fixes -~~~~~~~~~~~~~~~~~ - -These are bug fixes that might have notable behavior changes. - -.. _whatsnew_300.notable_bug_fixes.groupby_unobs_and_na: - -Improved behavior in groupby for ``observed=False`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A number of bugs have been fixed due to improved handling of unobserved groups (:issue:`55738`). All remarks in this section equally impact :class:`.SeriesGroupBy`. - -In previous versions of pandas, a single grouping with :meth:`.DataFrameGroupBy.apply` or :meth:`.DataFrameGroupBy.agg` would pass the unobserved groups to the provided function, resulting in ``0`` below. - -.. ipython:: python - - df = pd.DataFrame( - { - "key1": pd.Categorical(list("aabb"), categories=list("abc")), - "key2": [1, 1, 1, 2], - "values": [1, 2, 3, 4], - } - ) - df - gb = df.groupby("key1", observed=False) - gb[["values"]].apply(lambda x: x.sum()) - -However this was not the case when using multiple groupings, resulting in ``NaN`` below. - -.. code-block:: ipython - - In [1]: gb = df.groupby(["key1", "key2"], observed=False) - In [2]: gb[["values"]].apply(lambda x: x.sum()) - Out[2]: - values - key1 key2 - a 1 3.0 - 2 NaN - b 1 3.0 - 2 4.0 - c 1 NaN - 2 NaN - -Now using multiple groupings will also pass the unobserved groups to the provided function. - -.. ipython:: python - - gb = df.groupby(["key1", "key2"], observed=False) - gb[["values"]].apply(lambda x: x.sum()) - -Similarly: - -- In previous versions of pandas the method :meth:`.DataFrameGroupBy.sum` would result in ``0`` for unobserved groups, but :meth:`.DataFrameGroupBy.prod`, :meth:`.DataFrameGroupBy.all`, and :meth:`.DataFrameGroupBy.any` would all result in NA values. Now these methods result in ``1``, ``True``, and ``False`` respectively. -- :meth:`.DataFrameGroupBy.groups` did not include unobserved groups and now does. - -These improvements also fixed certain bugs in groupby: - -- :meth:`.DataFrameGroupBy.agg` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`36698`) -- :meth:`.DataFrameGroupBy.groups` with ``sort=False`` would sort groups; they now occur in the order they are observed (:issue:`56966`) -- :meth:`.DataFrameGroupBy.nunique` would fail when there are multiple groupings, unobserved groups, and ``as_index=False`` (:issue:`52848`) -- :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`) -- :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`) - -.. _whatsnew_300.notable_bug_fixes.notable_bug_fix2: - -notable_bug_fix2 -^^^^^^^^^^^^^^^^ - -.. --------------------------------------------------------------------------- -.. _whatsnew_300.api_breaking: - -Backwards incompatible API changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. _whatsnew_300.api_breaking.datetime_resolution_inference: - -Datetime resolution inference -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Converting a sequence of strings, ``datetime`` objects, or ``np.datetime64`` objects to -a ``datetime64`` dtype now performs inference on the appropriate resolution (AKA unit) for the output dtype. This affects :class:`Series`, :class:`DataFrame`, :class:`Index`, :class:`DatetimeIndex`, and :func:`to_datetime`. - -Previously, these would always give nanosecond resolution: - -.. code-block:: ipython - - In [1]: dt = pd.Timestamp("2024-03-22 11:36").to_pydatetime() - In [2]: pd.to_datetime([dt]).dtype - Out[2]: dtype('`_ the general recommendation is to use the latest version. -The following table lists the lowest version per library that is currently being tested throughout the development of pandas. -Optional libraries below the lowest tested version may still work, but are not considered supported. - -+------------------------+---------------------+ -| Package | New Minimum Version | -+========================+=====================+ -| pytz | 2023.4 | -+------------------------+---------------------+ -| fastparquet | 2023.10.0 | -+------------------------+---------------------+ -| adbc-driver-postgresql | 0.10.0 | -+------------------------+---------------------+ -| mypy (dev) | 1.9.0 | -+------------------------+---------------------+ - -See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. - -.. _whatsnew_300.api_breaking.pytz: - -``pytz`` now an optional dependency -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -pandas now uses :py:mod:`zoneinfo` from the standard library as the default timezone implementation when passing a timezone -string to various methods. (:issue:`34916`) - -*Old behavior:* - -.. code-block:: ipython - - In [1]: ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific") - In [2]: ts.tz - - -*New behavior:* - -.. ipython:: python - - ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific") - ts.tz - -``pytz`` timezone objects are still supported when passed directly, but they will no longer be returned by default -from string inputs. Moreover, ``pytz`` is no longer a required dependency of pandas, but can be installed -with the pip extra ``pip install pandas[timezone]``. - - -Additionally, pandas no longer throws ``pytz`` exceptions for timezone operations leading to ambiguous or nonexistent -times. These cases will now raise a ``ValueError``. - -.. _whatsnew_300.api_breaking.other: - -Other API changes -^^^^^^^^^^^^^^^^^ -- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`) -- :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`) -- All classes inheriting from builtin ``tuple`` (including types created with :func:`collections.namedtuple`) are now hashed and compared as builtin ``tuple`` during indexing operations (:issue:`57922`) -- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`) -- Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`) -- Removed :meth:`Index.sort` which always raised a ``TypeError``. This attribute is not defined and will raise an ``AttributeError`` (:issue:`59283`) -- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) -- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`) -- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`) -- when comparing the indexes in :func:`testing.assert_series_equal`, check_exact defaults to True if an :class:`Index` is of integer dtypes. (:issue:`57386`) -- Index set operations (like union or intersection) will now ignore the dtype of - an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining - the dtype of the resulting Index (:issue:`60797`) - -.. --------------------------------------------------------------------------- -.. _whatsnew_300.deprecations: - -Deprecations -~~~~~~~~~~~~ - -Copy keyword -^^^^^^^^^^^^ - -The ``copy`` keyword argument in the following methods is deprecated and -will be removed in a future version: - -- :meth:`DataFrame.truncate` / :meth:`Series.truncate` -- :meth:`DataFrame.tz_convert` / :meth:`Series.tz_convert` -- :meth:`DataFrame.tz_localize` / :meth:`Series.tz_localize` -- :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects` -- :meth:`DataFrame.align` / :meth:`Series.align` -- :meth:`DataFrame.astype` / :meth:`Series.astype` -- :meth:`DataFrame.reindex` / :meth:`Series.reindex` -- :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` -- :meth:`DataFrame.set_axis` / :meth:`Series.set_axis` -- :meth:`DataFrame.to_period` / :meth:`Series.to_period` -- :meth:`DataFrame.to_timestamp` / :meth:`Series.to_timestamp` -- :meth:`DataFrame.rename` / :meth:`Series.rename` -- :meth:`DataFrame.transpose` -- :meth:`DataFrame.swaplevel` -- :meth:`DataFrame.merge` / :func:`pd.merge` - -Copy-on-Write utilizes a lazy copy mechanism that defers copying the data until -necessary. Use ``.copy`` to trigger an eager copy. The copy keyword has no effect -starting with 3.0, so it can be safely removed from your code. - -Other Deprecations -^^^^^^^^^^^^^^^^^^ - -- Deprecated :func:`core.internals.api.make_block`, use public APIs instead (:issue:`56815`) -- Deprecated :meth:`.DataFrameGroupby.corrwith` (:issue:`57158`) -- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`) -- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`) -- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`) -- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`) -- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`) -- Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`) -- Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`) -- Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`) -- Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) -- Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`) -- Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`) -- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`) - -.. --------------------------------------------------------------------------- -.. _whatsnew_300.prior_deprecations: - -Removal of prior version deprecations/changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Enforced deprecation of aliases ``M``, ``Q``, ``Y``, etc. in favour of ``ME``, ``QE``, ``YE``, etc. for offsets -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Renamed the following offset aliases (:issue:`57986`): - -+-------------------------------+------------------+------------------+ -| offset | removed alias | new alias | -+===============================+==================+==================+ -|:class:`MonthEnd` | ``M`` | ``ME`` | -+-------------------------------+------------------+------------------+ -|:class:`BusinessMonthEnd` | ``BM`` | ``BME`` | -+-------------------------------+------------------+------------------+ -|:class:`SemiMonthEnd` | ``SM`` | ``SME`` | -+-------------------------------+------------------+------------------+ -|:class:`CustomBusinessMonthEnd`| ``CBM`` | ``CBME`` | -+-------------------------------+------------------+------------------+ -|:class:`QuarterEnd` | ``Q`` | ``QE`` | -+-------------------------------+------------------+------------------+ -|:class:`BQuarterEnd` | ``BQ`` | ``BQE`` | -+-------------------------------+------------------+------------------+ -|:class:`YearEnd` | ``Y`` | ``YE`` | -+-------------------------------+------------------+------------------+ -|:class:`BYearEnd` | ``BY`` | ``BYE`` | -+-------------------------------+------------------+------------------+ - -Other Removals -^^^^^^^^^^^^^^ -- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`) -- :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`) -- :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`) -- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) -- :func:`to_datetime` with a ``unit`` specified no longer parses strings into floats, instead parses them the same way as without ``unit`` (:issue:`50735`) -- :meth:`DataFrame.groupby` with ``as_index=False`` and aggregation methods will no longer exclude from the result the groupings that do not arise from the input (:issue:`49519`) -- :meth:`ExtensionArray._reduce` now requires a ``keepdims: bool = False`` parameter in the signature (:issue:`52788`) -- :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`) -- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) -- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`) -- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) -- Changed behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` to always treat integer keys as labels, never as positional, consistent with :class:`DataFrame` behavior (:issue:`50617`) -- Changed behavior of :meth:`Series.__getitem__`, :meth:`Series.__setitem__`, :meth:`DataFrame.__getitem__`, :meth:`DataFrame.__setitem__` with an integer slice on objects with a floating-dtype index. This is now treated as *positional* indexing (:issue:`49612`) -- Disallow a callable argument to :meth:`Series.iloc` to return a ``tuple`` (:issue:`53769`) -- Disallow allowing logical operations (``||``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``); wrap the objects in :class:`Series`, :class:`Index`, or ``np.array`` first instead (:issue:`52264`) -- Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`) -- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`) -- Disallow constructing a :class:`arrays.SparseArray` with scalar data (:issue:`53039`) -- Disallow indexing an :class:`Index` with a boolean indexer of length zero, it now raises ``ValueError`` (:issue:`55820`) -- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`) -- Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`) -- Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`) -- Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`) -- Removed 'fastpath' keyword in :class:`Categorical` constructor (:issue:`20110`) -- Removed 'kind' keyword in :meth:`Series.resample` and :meth:`DataFrame.resample` (:issue:`58125`) -- Removed ``Block``, ``DatetimeTZBlock``, ``ExtensionBlock``, ``create_block_manager_from_blocks`` from ``pandas.core.internals`` and ``pandas.core.internals.api`` (:issue:`55139`) -- Removed alias :class:`arrays.PandasArray` for :class:`arrays.NumpyExtensionArray` (:issue:`53694`) -- Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`) -- Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`) -- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`) -- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`) -- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`) -- Stopped automatically casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`) -- Stopped performing dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when given a pandas object (:class:`Series`, :class:`Index`, :class:`ExtensionArray`), call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`) -- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`) -- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`) -- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`) -- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`) -- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) -- Changed the default value of ``na_action`` in :meth:`Categorical.map` to ``None`` (:issue:`51645`) -- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) -- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`) -- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 `_) -- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`) -- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`) -- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) -- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`) -- Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) -- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) -- Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`) -- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) -- Enforced deprecation of ``core.internals`` member ``DatetimeTZBlock`` (:issue:`58467`) -- Enforced deprecation of ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`) -- Enforced deprecation of ``keep_date_col`` keyword in :func:`read_csv` (:issue:`55569`) -- Enforced deprecation of ``quantile`` keyword in :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile`, renamed to ``q`` instead. (:issue:`52550`) -- Enforced deprecation of argument ``infer_datetime_format`` in :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) -- Enforced deprecation of combining parsed datetime columns in :func:`read_csv` in ``parse_dates`` (:issue:`55569`) -- Enforced deprecation of non-standard (``np.ndarray``, :class:`ExtensionArray`, :class:`Index`, or :class:`Series`) argument to :func:`api.extensions.take` (:issue:`52981`) -- Enforced deprecation of parsing system timezone strings to ``tzlocal``, which depended on system timezone, pass the 'tz' keyword instead (:issue:`50791`) -- Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) -- Enforced deprecation of string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) -- Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`) -- Enforced deprecation of string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) -- Enforced deprecation of string ``BA`` denoting frequency in :class:`BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57793`) -- Enforced deprecation of strings ``H``, ``BH``, and ``CBH`` denoting frequencies in :class:`Hour`, :class:`BusinessHour`, :class:`CustomBusinessHour` (:issue:`59143`) -- Enforced deprecation of strings ``H``, ``BH``, and ``CBH`` denoting units in :class:`Timedelta` (:issue:`59143`) -- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`) -- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`) -- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`) -- Enforced deprecation of the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype` that would introduce new categories. (:issue:`58270`) -- Enforced deprecation of the behavior of :meth:`Series.argsort` in the presence of NA values (:issue:`58232`) -- Enforced deprecation of values "pad", "ffill", "bfill", and "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`57869`) -- Enforced deprecation removing :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`) -- Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) -- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) -- Iterating over a :class:`.DataFrameGroupBy` or :class:`.SeriesGroupBy` will return tuples of length 1 for the groups when grouping by ``level`` a list of length 1 (:issue:`50064`) -- Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`) -- Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`) -- Removed :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` supporting bool dtype (:issue:`53975`) -- Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`) -- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`) -- Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`) -- Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`) -- Removed ``DataFrame.swapaxes`` and ``Series.swapaxes`` (:issue:`51946`) -- Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) -- Removed ``DataFrameGroupby.fillna`` and ``SeriesGroupBy.fillna``` (:issue:`55719`) -- Removed ``Index.format``, use :meth:`Index.astype` with ``str`` or :meth:`Index.map` with a ``formatter`` function instead (:issue:`55439`) -- Removed ``Resample.fillna`` (:issue:`55719`) -- Removed ``Series.__int__`` and ``Series.__float__``. Call ``int(Series.iloc[0])`` or ``float(Series.iloc[0])`` instead. (:issue:`51131`) -- Removed ``Series.ravel`` (:issue:`56053`) -- Removed ``Series.view`` (:issue:`56054`) -- Removed ``StataReader.close`` (:issue:`49228`) -- Removed ``_data`` from :class:`DataFrame`, :class:`Series`, :class:`.arrays.ArrowExtensionArray` (:issue:`52003`) -- Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) -- Removed ``axis`` argument from all groupby operations (:issue:`50405`) -- Removed ``convert_dtype`` from :meth:`Series.apply` (:issue:`52257`) -- Removed ``method``, ``limit`` ``fill_axis`` and ``broadcast_axis`` keywords from :meth:`DataFrame.align` (:issue:`51968`) -- Removed ``pandas.api.types.is_interval`` and ``pandas.api.types.is_period``, use ``isinstance(obj, pd.Interval)`` and ``isinstance(obj, pd.Period)`` instead (:issue:`55264`) -- Removed ``pandas.io.sql.execute`` (:issue:`50185`) -- Removed ``pandas.value_counts``, use :meth:`Series.value_counts` instead (:issue:`53493`) -- Removed ``read_gbq`` and ``DataFrame.to_gbq``. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`) -- Removed ``use_nullable_dtypes`` from :func:`read_parquet` (:issue:`51853`) -- Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`) -- Removed argument ``limit`` from :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`.DataFrameGroupBy.pct_change`, and :meth:`.SeriesGroupBy.pct_change`; the argument ``method`` must be set to ``None`` and will be removed in a future version of pandas (:issue:`53520`) -- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) -- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`) -- Removed deprecated keyword ``method`` on :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`57760`) -- Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`) -- Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`) -- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`) -- Removed support for ``slice`` in :meth:`DataFrame.take` (:issue:`51539`) -- Removed the ``ArrayManager`` (:issue:`55043`) -- Removed the ``fastpath`` argument from the :class:`Series` constructor (:issue:`55466`) -- Removed the ``is_boolean``, ``is_integer``, ``is_floating``, ``holds_integer``, ``is_numeric``, ``is_categorical``, ``is_object``, and ``is_interval`` attributes of :class:`Index` (:issue:`50042`) -- Removed the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`) -- Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`) -- Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`) -- Removed the :class:`Grouper` attributes ``ax``, ``groups``, ``indexer``, and ``obj`` (:issue:`51206`, :issue:`51182`) -- Removed deprecated keyword ``verbose`` on :func:`read_csv` and :func:`read_table` (:issue:`56556`) -- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`) -- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`) -- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`) -- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`) - -.. --------------------------------------------------------------------------- -.. _whatsnew_300.performance: - -Performance improvements -~~~~~~~~~~~~~~~~~~~~~~~~ -- Eliminated circular reference in to original pandas object in accessor attributes (e.g. :attr:`Series.str`). However, accessor instantiation is no longer cached (:issue:`47667`, :issue:`41357`) -- :attr:`Categorical.categories` returns a :class:`RangeIndex` columns instead of an :class:`Index` if the constructed ``values`` was a ``range``. (:issue:`57787`) -- :class:`DataFrame` returns a :class:`RangeIndex` columns when possible when ``data`` is a ``dict`` (:issue:`57943`) -- :class:`Series` returns a :class:`RangeIndex` index when possible when ``data`` is a ``dict`` (:issue:`58118`) -- :func:`concat` returns a :class:`RangeIndex` column when possible when ``objs`` contains :class:`Series` and :class:`DataFrame` and ``axis=0`` (:issue:`58119`) -- :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`) -- :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) -- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) -- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`) -- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`) -- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`) -- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) -- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) -- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`) -- Performance improvement in :meth:`DataFrame.to_csv` when ``index=False`` (:issue:`59312`) -- Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`) -- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`) -- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) -- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`) -- Performance improvement in :meth:`MultiIndex._engine` to use smaller dtypes if possible (:issue:`58411`) -- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) -- Performance improvement in :meth:`MultiIndex.memory_usage` to ignore the index engine when it isn't already cached. (:issue:`58385`) -- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) -- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) -- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`) -- Performance improvement in :meth:`RangeIndex.insert` returning a :class:`RangeIndex` instead of a :class:`Index` when the :class:`RangeIndex` is empty. (:issue:`57833`) -- Performance improvement in :meth:`RangeIndex.round` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57824`) -- Performance improvement in :meth:`RangeIndex.searchsorted` (:issue:`58376`) -- Performance improvement in :meth:`RangeIndex.to_numpy` when specifying an ``na_value`` (:issue:`58376`) -- Performance improvement in :meth:`RangeIndex.value_counts` (:issue:`58376`) -- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`) -- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`) -- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`) -- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`) -- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`) -- Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`) -- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`) -- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) -- Performance improvement in indexing operations for string dtypes (:issue:`56997`) -- Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`) - -.. --------------------------------------------------------------------------- -.. _whatsnew_300.bug_fixes: - -Bug fixes -~~~~~~~~~ - -Categorical -^^^^^^^^^^^ -- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) -- - -Datetimelike -^^^^^^^^^^^^ -- Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`) -- Bug in :class:`DataFrame` raising ``ValueError`` when ``dtype`` is ``timedelta64`` and ``data`` is a list containing ``None`` (:issue:`60064`) -- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`) -- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) -- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`) -- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) -- Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`) -- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` casting ``datetime64`` and ``timedelta64`` columns to ``float64`` and losing precision (:issue:`60850`) -- Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`) -- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) -- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) -- Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`) -- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`) -- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`) -- Bug in :meth:`to_datetime` on float32 df with year, month, day etc. columns leads to precision issues and incorrect result. (:issue:`60506`) -- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`) -- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`) -- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) - -Timedelta -^^^^^^^^^ -- Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`) -- Bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) - -Timezones -^^^^^^^^^ -- -- - -Numeric -^^^^^^^ -- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`) -- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`) - -Conversion -^^^^^^^^^^ -- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) -- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) -- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) -- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) - -Strings -^^^^^^^ -- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`) -- - -Interval -^^^^^^^^ -- :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`) -- Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`) -- - -Indexing -^^^^^^^^ -- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) -- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) -- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) -- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) - -Missing -^^^^^^^ -- Bug in :meth:`DataFrame.fillna` and :meth:`Series.fillna` that would ignore the ``limit`` argument on :class:`.ExtensionArray` dtypes (:issue:`58001`) -- - -MultiIndex -^^^^^^^^^^ -- :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`) -- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) -- :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`) -- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) -- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) -- - -I/O -^^^ -- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`) -- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) -- Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) -- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) -- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) -- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) -- Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`) -- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) -- Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`) -- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) -- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) -- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`) -- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) -- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) -- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`) -- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`) -- Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`) -- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`) -- Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`) -- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) -- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`) -- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`) -- Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`) -- Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) - -Period -^^^^^^ -- Fixed error message when passing invalid period alias to :meth:`PeriodIndex.to_timestamp` (:issue:`58974`) -- - -Plotting -^^^^^^^^ -- Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`) -- Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) -- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) -- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) -- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) - -Groupby/resample/rolling -^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :meth:`.DataFrameGroupBy.__len__` and :meth:`.SeriesGroupBy.__len__` would raise when the grouping contained NA values and ``dropna=False`` (:issue:`58644`) -- Bug in :meth:`.DataFrameGroupBy.any` that returned True for groups where all Timedelta values are NaT. (:issue:`59712`) -- Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`) -- Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) -- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`) -- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) -- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) -- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) -- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`) -- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) -- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) -- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) -- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) -- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) -- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) -- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) -- Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`) - -Reshaping -^^^^^^^^^ -- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) -- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) -- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) -- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) -- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) -- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`) -- Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`) -- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) -- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) -- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) - -Sparse -^^^^^^ -- Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) -- Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`) -- Bug in :meth:`DataFrame.sparse.to_dense` which ignored subclassing and always returned an instance of :class:`DataFrame` (:issue:`59913`) - -ExtensionArray -^^^^^^^^^^^^^^ -- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) -- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) -- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) -- Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`) -- Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`) - -Styler -^^^^^^ -- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed. - -Other -^^^^^ -- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) -- Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`) -- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) -- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) -- Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`) -- Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`) -- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) -- Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) -- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) -- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) -- Bug in :meth:`DataFrame.query` where using duplicate column names led to a ``TypeError``. (:issue:`59950`) -- Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`) -- Bug in :meth:`DataFrame.shift` where passing a ``freq`` on a DataFrame with no columns did not shift the index correctly. (:issue:`60102`) -- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) -- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) -- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) -- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`) -- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) -- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) -- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) -- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) -- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) -- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`) -- Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`) -- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) -- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) -- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) -- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`) -- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`) -- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`) -- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`) - -.. ***DO NOT USE THIS SECTION*** - -- -- - -.. --------------------------------------------------------------------------- -.. _whatsnew_300.contributors: - -Contributors -~~~~~~~~~~~~ From b28362f204b14401d9ae255cfbcdb9367ecace4a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 18 Feb 2025 10:17:45 +0100 Subject: [PATCH 3/4] fixup test --- pandas/tests/indexing/test_loc.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index ddcce4553ba37..2c62c56bbcfc5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2084,10 +2084,15 @@ def test_loc_setitem_datetime_keys_cast(self, conv, using_infer_string): df.loc[conv(dt1), "one"] = 100 df.loc[conv(dt2), "one"] = 200 + # the dtype constructed by Index([..]) does not yet follow the unit + # of the input on 2.3.x -> so checking this is datetime64, but then + # specifying the exact dtype in the expected result + assert df.index.dtype.kind == "M" expected = DataFrame( {"one": [100.0, 200.0]}, index=Index( - [conv(dt1), conv(dt2)], dtype=None if using_infer_string else object + [conv(dt1), conv(dt2)], + dtype=df.index.dtype if using_infer_string else object, ), columns=Index(["one"]), ) From b3c8e7b6d4650cbfae741a78bc62be3b967f60eb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 18 Feb 2025 11:08:30 +0100 Subject: [PATCH 4/4] second attempt to fixup test --- pandas/tests/indexing/test_loc.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2c62c56bbcfc5..bb22a6d403086 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2087,13 +2087,14 @@ def test_loc_setitem_datetime_keys_cast(self, conv, using_infer_string): # the dtype constructed by Index([..]) does not yet follow the unit # of the input on 2.3.x -> so checking this is datetime64, but then # specifying the exact dtype in the expected result - assert df.index.dtype.kind == "M" + if using_infer_string: + assert df.index.dtype.kind == "M" + exp_dtype = df.index.dtype + else: + exp_dtype = "datetime64[ns]" expected = DataFrame( {"one": [100.0, 200.0]}, - index=Index( - [conv(dt1), conv(dt2)], - dtype=df.index.dtype if using_infer_string else object, - ), + index=Index([dt1, dt2], dtype=exp_dtype), columns=Index(["one"]), ) tm.assert_frame_equal(df, expected)