Skip to content

Commit

Permalink
TST (string) fix xfailed groupby tests (3) (pandas-dev#59642)
Browse files Browse the repository at this point in the history
* TST (string) fix xfailed groupby tests (3)

* TST: non-pyarrow build
  • Loading branch information
jbrockmendel authored Aug 28, 2024
1 parent ad077aa commit 8fa78ec
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 35 deletions.
8 changes: 2 additions & 6 deletions pandas/tests/groupby/methods/test_describe.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -73,7 +71,6 @@ def test_series_describe_as_index(as_index, keys):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_frame_describe_multikey(tsframe):
grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
result = grouped.describe()
Expand All @@ -82,7 +79,7 @@ def test_frame_describe_multikey(tsframe):
group = grouped[col].describe()
# GH 17464 - Remove duplicate MultiIndex levels
group_col = MultiIndex(
levels=[[col], group.columns],
levels=[Index([col], dtype=tsframe.columns.dtype), group.columns],
codes=[[0] * len(group.columns), range(len(group.columns))],
)
group = DataFrame(group.values, columns=group_col, index=group.index)
Expand Down Expand Up @@ -249,7 +246,6 @@ def test_describe_non_cython_paths():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("dtype", [int, float, object])
@pytest.mark.parametrize(
"kwargs",
Expand All @@ -271,5 +267,5 @@ def test_groupby_empty_dataset(dtype, kwargs):

result = df.iloc[:0].groupby("A").B.describe(**kwargs)
expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0]
expected.index = Index([])
expected.index = Index([], dtype=df.columns.dtype)
tm.assert_frame_equal(result, expected)
6 changes: 2 additions & 4 deletions pandas/tests/groupby/methods/test_nth.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -679,14 +677,14 @@ def test_first_multi_key_groupby_categorical():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("method", ["first", "last", "nth"])
def test_groupby_last_first_nth_with_none(method, nulls_fixture):
# GH29645
expected = Series(["y"])
expected = Series(["y"], dtype=object)
data = Series(
[nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture],
index=[0, 0, 0, 0, 0],
dtype=object,
).groupby(level=0)

if method == "nth":
Expand Down
16 changes: 0 additions & 16 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
from pandas.compat.pyarrow import pa_version_under10p1

from pandas.core.dtypes.missing import na_value_for_dtype
Expand All @@ -13,9 +12,6 @@
from pandas.tests.groupby import get_groupby_method_args


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, tuples, outputs",
[
Expand Down Expand Up @@ -59,9 +55,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group(
tm.assert_frame_equal(grouped, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, tuples, outputs",
[
Expand Down Expand Up @@ -138,9 +131,6 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
tm.assert_frame_equal(grouped, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, idx, expected",
[
Expand Down Expand Up @@ -215,9 +205,6 @@ def test_groupby_dataframe_slice_then_transform(dropna, index):
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, tuples, outputs",
[
Expand Down Expand Up @@ -299,9 +286,6 @@ def test_groupby_dropna_datetime_like_data(
tm.assert_frame_equal(grouped, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, data, selected_data, levels",
[
Expand Down
29 changes: 20 additions & 9 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.common import ensure_platform_int

Expand Down Expand Up @@ -372,8 +373,7 @@ def test_transform_select_columns(df):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_transform_nuisance_raises(df):
def test_transform_nuisance_raises(df, using_infer_string):
# case that goes through _transform_item_by_item

df.columns = ["A", "B", "B", "D"]
Expand All @@ -383,10 +383,16 @@ def test_transform_nuisance_raises(df):
grouped = df.groupby("A")

gbc = grouped["B"]
with pytest.raises(TypeError, match="Could not convert"):
msg = "Could not convert"
if using_infer_string:
if df.columns.dtype.storage == "pyarrow":
msg = "with dtype str does not support operation 'mean'"
else:
msg = "Cannot perform reduction 'mean' with string dtype"
with pytest.raises(TypeError, match=msg):
gbc.transform(lambda x: np.mean(x))

with pytest.raises(TypeError, match="Could not convert"):
with pytest.raises(TypeError, match=msg):
df.groupby("A").transform(lambda x: np.mean(x))


Expand Down Expand Up @@ -445,8 +451,7 @@ def test_transform_coercion():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_groupby_transform_with_int():
def test_groupby_transform_with_int(using_infer_string):
# GH 3740, make sure that we might upcast on item-by-item transform

# floats
Expand Down Expand Up @@ -476,8 +481,14 @@ def test_groupby_transform_with_int():
"D": "foo",
}
)
msg = "Could not convert"
if using_infer_string:
if HAS_PYARROW:
msg = "with dtype str does not support operation 'mean'"
else:
msg = "Cannot perform reduction 'mean' with string dtype"
with np.errstate(all="ignore"):
with pytest.raises(TypeError, match="Could not convert"):
with pytest.raises(TypeError, match=msg):
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
result = df.groupby("A")[["B", "C"]].transform(
lambda x: (x - x.mean()) / x.std()
Expand All @@ -489,7 +500,7 @@ def test_groupby_transform_with_int():
s = Series([2, 3, 4, 10, 5, -1])
df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
with np.errstate(all="ignore"):
with pytest.raises(TypeError, match="Could not convert"):
with pytest.raises(TypeError, match=msg):
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
result = df.groupby("A")[["B", "C"]].transform(
lambda x: (x - x.mean()) / x.std()
Expand Down Expand Up @@ -705,7 +716,6 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.slow
@pytest.mark.parametrize(
"op, args, targop",
Expand Down Expand Up @@ -757,6 +767,7 @@ def test_cython_transform_frame_column(
"does not support operation",
".* is not supported for object dtype",
"is not implemented for this dtype",
".* is not supported for str dtype",
]
)
with pytest.raises(TypeError, match=msg):
Expand Down

0 comments on commit 8fa78ec

Please sign in to comment.