Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Add fillna at the beginning of _where not to fill NA. #60729 #60772

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
bc9a942
BUG: Add fillna so that cond doesnt contain NA at the beginning of _w…
sanggon6107 Jan 23, 2025
558569f
TST: Add tests for mask with NA. (#60729)
sanggon6107 Jan 23, 2025
bbbc720
BUG: Fix _where to make np.ndarray mutable. (#60729)
sanggon6107 Jan 23, 2025
e2f32cb
DOC: Add documentation regarding the bug (#60729)
sanggon6107 Jan 23, 2025
6fd8986
Merge branch 'main' into add-mask-fillna
sanggon6107 Jan 25, 2025
d2d5f62
ENH: Optimze test_mask_na()
sanggon6107 Jan 25, 2025
475f2d1
BUG: Fix a bug in test_mask_na() (#60729)
sanggon6107 Jan 25, 2025
db30b58
Update doc/source/whatsnew/v3.0.0.rst
sanggon6107 Feb 9, 2025
55fe420
Merge branch 'main' into add-mask-fillna
sanggon6107 Mar 3, 2025
cb94cf7
Add test arguments for test_mask_na
sanggon6107 Mar 3, 2025
71e442e
Fix whatsnew
sanggon6107 Mar 3, 2025
b6bd3af
Fix test failures by adding importorskip
sanggon6107 Mar 3, 2025
8bac997
Fill True when tuple or list cond has np.nan/pd.NA
sanggon6107 Mar 3, 2025
89bc1b4
Merge branch 'main' into add-mask-fillna
sanggon6107 Mar 4, 2025
f154cf5
Optimize _where
sanggon6107 Mar 4, 2025
eed6121
Optimize test_mask_na
sanggon6107 Mar 4, 2025
9ac81f0
Add np.array for read-only ndarray
sanggon6107 Mar 5, 2025
7e3fd3a
Merge branch 'main' into add-mask-fillna
sanggon6107 Mar 5, 2025
8c5ffff
Update generic.py
sanggon6107 Mar 5, 2025
5516517
Revert generic.py
sanggon6107 Mar 6, 2025
2437ce2
Merge branch 'main' into add-mask-fillna
sanggon6107 Mar 7, 2025
9556aa4
Replace np.array with fillna
sanggon6107 Mar 7, 2025
b64b8a7
Correct the unintended deletion
sanggon6107 Mar 7, 2025
9574746
Merge branch 'main' into add-mask-fillna
sanggon6107 Mar 20, 2025
c073c0b
Add test for list and ndarray
sanggon6107 Mar 24, 2025
4eea08e
Handle list with NA
sanggon6107 Mar 27, 2025
bbc5612
Fix code checks
sanggon6107 Mar 27, 2025
0851593
Fix type
sanggon6107 Mar 27, 2025
98fb602
Optimize operation
sanggon6107 Mar 28, 2025
915b8a7
Prevent a list with np.nan converting to float
sanggon6107 Apr 6, 2025
7611f59
Add test for a list with np.nan
sanggon6107 Apr 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,7 @@ Indexing
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
- Bug in :meth:`Series.mask` unexpectedly filling ``pd.NA`` (:issue:`60729`)
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)

Expand Down
19 changes: 17 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

from pandas._libs import lib
from pandas._libs.lib import is_range_indexer
from pandas._libs.missing import NA
from pandas._libs.tslibs import (
Period,
Timestamp,
Expand Down Expand Up @@ -9701,6 +9702,7 @@ def _where(
# align the cond to same shape as myself
cond = common.apply_if_callable(cond, self)
if isinstance(cond, NDFrame):
cond = cond.fillna(True)
# CoW: Make sure reference is not kept alive
if cond.ndim == 1 and self.ndim == 2:
cond = cond._constructor_expanddim(
Expand All @@ -9711,7 +9713,10 @@ def _where(
cond = cond.align(self, join="right")[0]
else:
if not hasattr(cond, "shape"):
cond = np.asanyarray(cond)
cond = np.asanyarray(cond, dtype=object)
if not cond.flags.writeable:
cond.setflags(write=True)
cond[isna(cond)] = True
if cond.shape != self.shape:
raise ValueError("Array conditional must be same shape as self")
cond = self._constructor(cond, **self._construct_axes_dict(), copy=False)
Expand Down Expand Up @@ -10095,7 +10100,17 @@ def mask(

# see gh-21891
if not hasattr(cond, "__invert__"):
cond = np.array(cond)
cond = np.array(cond, dtype=object)

if isinstance(cond, np.ndarray):
if all(
x is NA or isinstance(x, (np.bool_, bool)) or x is np.nan
for x in cond.flatten()
):
if not cond.flags.writeable:
cond.setflags(write=True)
cond[isna(cond)] = False
cond = cond.astype(bool)

return self._where(
~cond,
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/series/indexing/test_mask.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import Series
import pandas._testing as tm

Expand Down Expand Up @@ -67,3 +69,26 @@ def test_mask_inplace():
rs = s.copy()
rs.mask(cond, -s, inplace=True)
tm.assert_series_equal(rs, s.mask(cond, -s))


@pytest.mark.parametrize(
"dtype",
[
"Int64",
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
],
)
def test_mask_na(dtype):
# We should not be filling pd.NA. See GH#60729
series = Series([None, 1, 2, None, 3, 4, None], dtype=dtype)
cond = series <= 2
expected = Series([None, -99, -99, None, 3, 4, None], dtype=dtype)

result = series.mask(cond, -99)
tm.assert_series_equal(result, expected)

result = series.mask(cond.to_list(), -99)
tm.assert_series_equal(result, expected)

result = series.mask(cond.to_numpy(), -99)
tm.assert_series_equal(result, expected)
12 changes: 12 additions & 0 deletions pandas/tests/series/indexing/test_where.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._libs.missing import NA

from pandas.core.dtypes.common import is_integer

import pandas as pd
Expand Down Expand Up @@ -443,3 +445,13 @@ def test_where_datetimelike_categorical(tz_naive_fixture):
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))

tm.assert_frame_equal(res, pd.DataFrame(dr))


def test_where_list_with_nan():
ser = Series([None, 1, 2, np.nan, 3, 4, NA])
cond = [np.nan, False, False, np.nan, True, True, np.nan]
expected = Series([None, -99, -99, np.nan, 3, 4, NA])

res = ser.where(cond, -99)

tm.assert_series_equal(res, expected)
Loading