Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug cov nat #60898

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
9 changes: 9 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
is_array_like,
is_bool_dtype,
is_dataclass,
is_datetime64_any_dtype,
is_dict_like,
is_float,
is_float_dtype,
Expand All @@ -103,6 +104,7 @@
is_list_like,
is_scalar,
is_sequence,
is_timedelta64_dtype,
needs_i8_conversion,
pandas_dtype,
)
Expand Down Expand Up @@ -11350,6 +11352,13 @@ def cov(
c -0.150812 0.191417 0.895202
"""
data = self._get_numeric_data() if numeric_only else self
dtypes = [blk.dtype for blk in self._mgr.blocks]
if any(is_datetime64_any_dtype(d) or is_timedelta64_dtype(d) for d in dtypes):
msg = (
"DataFrame contains columns with dtype datetime64 "
"or timedelta64, which are not supported for cov."
)
raise TypeError(msg)
cols = data.columns
idx = cols.copy()
mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1800,6 +1800,8 @@ def as_array(
arr = np.asarray(blk.values, dtype=dtype)
else:
arr = np.array(blk.values, dtype=dtype, copy=copy)
if passed_nan and blk.dtype.kind in ["m", "M"]:
arr[isna(blk.values)] = na_value

if not copy:
arr = arr.view()
Expand Down Expand Up @@ -1865,6 +1867,8 @@ def _interleave(
else:
arr = blk.get_values(dtype)
result[rl.indexer] = arr
if na_value is not lib.no_default and blk.dtype.kind in ["m", "M"]:
result[rl.indexer][isna(arr)] = na_value
itemmask[rl.indexer] = 1

if not itemmask.all():
Expand Down
36 changes: 36 additions & 0 deletions pandas/tests/frame/methods/test_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

from pandas import (
DataFrame,
NaT,
Timestamp,
date_range,
)
import pandas._testing as tm

Expand Down Expand Up @@ -41,3 +43,37 @@ def test_to_numpy_mixed_dtype_to_str(self):
result = df.to_numpy(dtype=str)
expected = np.array([["2020-01-01 00:00:00", "100.0"]], dtype=str)
tm.assert_numpy_array_equal(result, expected)

def test_to_numpy_datetime_with_na(self):
# GH #53115
dti = date_range("2016-01-01", periods=3)
df = DataFrame(dti)
df.iloc[0, 0] = NaT
expected = np.array([[np.nan], [1.45169280e18], [1.45177920e18]])
assert np.allclose(
df.to_numpy(float, na_value=np.nan), expected, equal_nan=True
)

df = DataFrame(
{
"a": [Timestamp("1970-01-01"), Timestamp("1970-01-02"), NaT],
"b": [
Timestamp("1970-01-01"),
np.nan,
Timestamp("1970-01-02"),
],
"c": [
1,
np.nan,
2,
],
}
)
arr = np.array(
[
[0.00e00, 0.00e00, 1.00e00],
[8.64e04, np.nan, np.nan],
[np.nan, 8.64e04, 2.00e00],
]
)
assert np.allclose(df.to_numpy(float, na_value=np.nan), arr, equal_nan=True)
33 changes: 33 additions & 0 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1917,6 +1917,39 @@ def test_df_empty_nullable_min_count_1(self, opname, dtype, exp_dtype):
expected = Series([pd.NA, pd.NA], dtype=exp_dtype, index=Index([0, 1]))
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"data",
[
{"a": [0, 1, 2], "b": [pd.NaT, pd.NaT, pd.NaT]},
{"a": [0, 1, 2], "b": [Timestamp("1990-01-01"), pd.NaT, pd.NaT]},
{
"a": [0, 1, 2],
"b": [
Timestamp("1990-01-01"),
Timestamp("1991-01-01"),
Timestamp("1992-01-01"),
],
},
{
"a": [0, 1, 2],
"b": [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.NaT],
},
{
"a": [0, 1, 2],
"b": [
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
],
},
],
)
def test_df_cov_pd_nat(self, data):
# GH #53115
df = DataFrame(data)
with pytest.raises(TypeError, match="not supported for cov"):
df.cov()


def test_sum_timedelta64_skipna_false():
# GH#17235
Expand Down
Loading