From bda0025144d5843ee6b31a3ad0729736517fb116 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Sun, 9 Feb 2025 10:26:46 -0500 Subject: [PATCH 1/6] ENH: Add TypeError for unsupported datetime64 and timedelta64 dtypes in DataFrame.cov --- pandas/core/frame.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 57a7b9467a05e..882d82be26e2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11239,6 +11239,12 @@ def cov( c -0.150812 0.191417 0.895202 """ data = self._get_numeric_data() if numeric_only else self + if data.select_dtypes(include=[np.datetime64, np.timedelta64]).shape[1] > 0: + msg = ( + "DataFrame contains columns with dtype datetime64[ns] " + "or timedelta64[ns], which are not supported for cov." + ) + raise TypeError(msg) cols = data.columns idx = cols.copy() mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False) From de954f7b55c5cdc50b048f8e814b05b3615d82c5 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Sun, 9 Feb 2025 10:27:08 -0500 Subject: [PATCH 2/6] TST: Add test for TypeError in DataFrame.cov with NaT and Timedelta inputs --- pandas/tests/frame/test_reductions.py | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 64e686d25faa7..4b18831ae708f 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1907,6 +1907,39 @@ def test_df_empty_nullable_min_count_1(self, opname, dtype, exp_dtype): expected = Series([pd.NA, pd.NA], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "data", + [ + {"a": [0, 1, 2], "b": [pd.NaT, pd.NaT, pd.NaT]}, + {"a": [0, 1, 2], "b": [Timestamp("1990-01-01"), pd.NaT, pd.NaT]}, + { + "a": [0, 1, 2], + "b": [ + Timestamp("1990-01-01"), + Timestamp("1991-01-01"), + Timestamp("1992-01-01"), + ], + }, + { + "a": [0, 1, 2], + "b": [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.NaT], + }, + { + "a": [0, 1, 2], + "b": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + }, + ], + ) + def test_df_cov_pd_nat(self, data): + # GH #53115 + df = DataFrame(data) + with pytest.raises(TypeError, match="not supported for cov"): + df.cov() + def test_sum_timedelta64_skipna_false(): # GH#17235 From cff4f369af762b5adf1dca2a08d6f96f6c7dcbec Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 25 Mar 2025 20:38:23 -0400 Subject: [PATCH 3/6] BUG: Improve error message for unsupported datetime and timedelta dtypes in cov() --- pandas/core/frame.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a1eaf9a8d7a6a..1d7e64456fe3d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -105,6 +105,8 @@ is_sequence, needs_i8_conversion, pandas_dtype, + is_timedelta64_dtype, + is_datetime64_any_dtype, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ( @@ -11267,10 +11269,11 @@ def cov( c -0.150812 0.191417 0.895202 """ data = self._get_numeric_data() if numeric_only else self - if data.select_dtypes(include=[np.datetime64, np.timedelta64]).shape[1] > 0: + dtypes = [blk.dtype for blk in self._mgr.blocks] + if any(is_datetime64_any_dtype(d) or is_timedelta64_dtype(d) for d in dtypes): msg = ( - "DataFrame contains columns with dtype datetime64[ns] " - "or timedelta64[ns], which are not supported for cov." + "DataFrame contains columns with dtype datetime64 " + "or timedelta64, which are not supported for cov." ) raise TypeError(msg) cols = data.columns From df59a2ca7d3e2f0dbc3c2ef0fdf0dc7c86786bbf Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 25 Mar 2025 20:38:43 -0400 Subject: [PATCH 4/6] BUG: Handle NaN values for datetime and timedelta dtypes in BlockManager --- pandas/core/internals/managers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a3738bb25f56c..46a78dc128dc6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1800,6 +1800,8 @@ def as_array( arr = np.asarray(blk.values, dtype=dtype) else: arr = np.array(blk.values, dtype=dtype, copy=copy) + if passed_nan and blk.dtype.kind in ["m", "M"]: + arr[isna(blk.values)] = na_value if not copy: arr = arr.view() @@ -1865,6 +1867,8 @@ def _interleave( else: arr = blk.get_values(dtype) result[rl.indexer] = arr + if na_value is not lib.no_default and blk.dtype.kind in ["m", "M"]: + result[rl.indexer][isna(arr)] = na_value itemmask[rl.indexer] = 1 if not itemmask.all(): From dbceb26be9116fa6679d19a8b9c042153ce6e6c0 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 25 Mar 2025 20:38:49 -0400 Subject: [PATCH 5/6] BUG: Add test for to_numpy() handling of NaT and NaN values --- pandas/tests/frame/methods/test_to_numpy.py | 39 ++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index 36088cceb13f1..6db76c98a4a99 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -1,10 +1,7 @@ import numpy as np import pytest -from pandas import ( - DataFrame, - Timestamp, -) +from pandas import DataFrame, Timestamp, date_range, NaT import pandas._testing as tm @@ -41,3 +38,37 @@ def test_to_numpy_mixed_dtype_to_str(self): result = df.to_numpy(dtype=str) expected = np.array([["2020-01-01 00:00:00", "100.0"]], dtype=str) tm.assert_numpy_array_equal(result, expected) + + def test_to_numpy_datetime_with_na(self): + # GH #53115 + dti = date_range("2016-01-01", periods=3) + df = DataFrame(dti) + df.iloc[0, 0] = NaT + expected = np.array([[np.nan], [1.45169280e18], [1.45177920e18]]) + assert np.allclose( + df.to_numpy(float, na_value=np.nan), expected, equal_nan=True + ) + + df = DataFrame( + { + "a": [Timestamp("1970-01-01"), Timestamp("1970-01-02"), NaT], + "b": [ + Timestamp("1970-01-01"), + np.nan, + Timestamp("1970-01-02"), + ], + "c": [ + 1, + np.nan, + 2, + ], + } + ) + arr = np.array( + [ + [0.00e00, 0.00e00, 1.00e00], + [8.64e04, np.nan, np.nan], + [np.nan, 8.64e04, 2.00e00], + ] + ) + assert np.allclose(df.to_numpy(float, na_value=np.nan), arr, equal_nan=True) From cf53cc0f74a9aee0bdde13841706c02938ab72d1 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Wed, 26 Mar 2025 08:50:33 -0400 Subject: [PATCH 6/6] REF: Refactor imports in frame.py and update test imports in test_to_numpy.py --- pandas/core/frame.py | 4 ++-- pandas/tests/frame/methods/test_to_numpy.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4797fcfa66025..153516b161933 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -93,6 +93,7 @@ is_array_like, is_bool_dtype, is_dataclass, + is_datetime64_any_dtype, is_dict_like, is_float, is_float_dtype, @@ -103,10 +104,9 @@ is_list_like, is_scalar, is_sequence, + is_timedelta64_dtype, needs_i8_conversion, pandas_dtype, - is_timedelta64_dtype, - is_datetime64_any_dtype, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ( diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index 6db76c98a4a99..3c322352a956a 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import DataFrame, Timestamp, date_range, NaT +from pandas import ( + DataFrame, + NaT, + Timestamp, + date_range, +) import pandas._testing as tm