diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 838ac6b34..1d48feba3 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -235,10 +235,11 @@ class Series(IndexOpsMixin[S1], NDFrame): cls, data: ( DatetimeIndex - | Sequence[np.datetime64 | datetime] - | dict[HashableT1, np.datetime64 | datetime] + | Sequence[np.datetime64 | datetime | date] + | dict[HashableT1, np.datetime64 | datetime | date] | np.datetime64 | datetime + | date ), index: Axes | None = ..., *, @@ -259,7 +260,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __new__( # type: ignore[overload-overlap] cls, - data: PeriodIndex, + data: PeriodIndex | Sequence[Period], index: Axes | None = ..., *, dtype: PeriodDtype = ..., @@ -748,7 +749,18 @@ class Series(IndexOpsMixin[S1], NDFrame): def cov( self, other: Series[S1], min_periods: int | None = ..., ddof: int = ... ) -> float: ... - def diff(self, periods: int = ...) -> Series[S1]: ... + @overload + def diff(self: Series[_bool], periods: int = ...) -> Series[type[object]]: ... # type: ignore[overload-overlap] + @overload + def diff(self: Series[complex], periods: int = ...) -> Series[complex]: ... # type: ignore[overload-overlap] + @overload + def diff(self: Series[bytes], periods: int = ...) -> Never: ... + @overload + def diff(self: Series[type], periods: int = ...) -> Never: ... + @overload + def diff(self: Series[str], periods: int = ...) -> Never: ... + @overload + def diff(self, periods: int = ...) -> Series[float]: ... def autocorr(self, lag: int = ...) -> float: ... @overload def dot(self, other: Series[S1]) -> Scalar: ... @@ -1511,16 +1523,16 @@ class Series(IndexOpsMixin[S1], NDFrame): def __eq__(self, other: object) -> Series[_bool]: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] def __floordiv__(self, other: num | _ListLike | Series[S1]) -> Series[int]: ... def __ge__( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] - self, other: S1 | _ListLike | Series[S1] | datetime | timedelta + self, other: S1 | _ListLike | Series[S1] | datetime | timedelta | date ) -> Series[_bool]: ... def __gt__( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] - self, other: S1 | _ListLike | Series[S1] | datetime | timedelta + self, other: S1 | _ListLike | Series[S1] | datetime | timedelta | date ) -> Series[_bool]: ... def __le__( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] - self, other: S1 | _ListLike | Series[S1] | datetime | timedelta + self, other: S1 | _ListLike | Series[S1] | datetime | timedelta | date ) -> Series[_bool]: ... def __lt__( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] - self, other: S1 | _ListLike | Series[S1] | datetime | timedelta + self, other: S1 | _ListLike | Series[S1] | datetime | timedelta | date ) -> Series[_bool]: ... @overload def __mul__( @@ -2075,6 +2087,7 @@ class TimestampSeries(Series[Timestamp]): numeric_only: _bool = ..., **kwargs, ) -> Timedelta: ... + def diff(self, periods: int = ...) -> TimedeltaSeries: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] class TimedeltaSeries(Series[Timedelta]): # ignores needed because of mypy @@ -2171,11 +2184,13 @@ class TimedeltaSeries(Series[Timedelta]): numeric_only: _bool = ..., **kwargs, ) -> Timedelta: ... + def diff(self, periods: int = ...) -> TimedeltaSeries: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] class PeriodSeries(Series[Period]): @property def dt(self) -> PeriodProperties: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] def __sub__(self, other: PeriodSeries) -> OffsetSeries: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] + def diff(self, periods: int = ...) -> OffsetSeries: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] class OffsetSeries(Series[BaseOffset]): @overload # type: ignore[override] @@ -2188,3 +2203,4 @@ class OffsetSeries(Series[BaseOffset]): class IntervalSeries(Series[Interval[_OrderableT]], Generic[_OrderableT]): @property def array(self) -> IntervalArray: ... + def diff(self, periods: int = ...) -> Never: ... diff --git a/pyproject.toml b/pyproject.toml index f21594481..62c63b039 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ black = ">=23.3.0" isort = ">=5.12.0" openpyxl = ">=3.0.10" # for tables, MacOS gives random CI failures on 3.9.2 -tables = { version = "==3.9.1", python = "<4"} # 3.8.0 depends on blosc2 which caps python to <4 +tables = { version = "==3.9.2", python = "<4"} # 3.8.0 depends on blosc2 which caps python to <4 lxml = ">=4.9.1" pyreadstat = ">=1.2.0" xlrd = ">=2.0.1" diff --git a/tests/__init__.py b/tests/__init__.py index ab95a55b3..4448af3c5 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -10,6 +10,7 @@ from typing import ( TYPE_CHECKING, Final, + Literal, ) import pandas as pd @@ -24,16 +25,22 @@ PD_LTE_22 = Version(pd.__version__) < Version("2.2.999") -def check(actual: T, klass: type, dtype: type | None = None, attr: str = "left") -> T: +def check( + actual: T, + klass: type, + dtype: type | None = None, + attr: str = "left", + index_to_check_for_type: Literal[0, -1] = 0, +) -> T: if not isinstance(actual, klass): raise RuntimeError(f"Expected type '{klass}' but got '{type(actual)}'") if dtype is None: return actual # type: ignore[return-value] if isinstance(actual, pd.Series): - value = actual.iloc[0] + value = actual.iloc[index_to_check_for_type] elif isinstance(actual, pd.Index): - value = actual[0] # type: ignore[assignment] + value = actual[index_to_check_for_type] # type: ignore[assignment] elif isinstance(actual, BaseGroupBy): value = actual.obj elif hasattr(actual, "__iter__"): diff --git a/tests/test_series.py b/tests/test_series.py index bc5bd575e..203f81700 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -32,11 +32,13 @@ from typing_extensions import ( Self, TypeAlias, + assert_never, assert_type, ) import xarray as xr from pandas._libs.missing import NAType +from pandas._libs.tslibs import BaseOffset from pandas._typing import ( DtypeObj, Scalar, @@ -45,6 +47,7 @@ from tests import ( PD_LTE_22, TYPE_CHECKING_INVALID_USAGE, + WINDOWS, check, pytest_warns_bounded, ) @@ -52,12 +55,14 @@ if TYPE_CHECKING: from pandas.core.series import ( + OffsetSeries, TimedeltaSeries, TimestampSeries, ) else: TimedeltaSeries: TypeAlias = pd.Series TimestampSeries: TypeAlias = pd.Series + OffsetSeries: TypeAlias = pd.Series if TYPE_CHECKING: from pandas._typing import ( @@ -3091,3 +3096,111 @@ def test_series_apply() -> None: check(assert_type(s.apply(list), "pd.Series[Any]"), pd.Series) check(assert_type(s.apply(set), "pd.Series[Any]"), pd.Series) check(assert_type(s.apply(frozenset), "pd.Series[Any]"), pd.Series) + + +def test_diff() -> None: + s = pd.Series([1, 1, 2, 3, 5, 8]) + # int -> float + check(assert_type(s.diff(), "pd.Series[float]"), pd.Series, float) + # unint -> float + check(assert_type(s.astype(np.uint32).diff(), "pd.Series[float]"), pd.Series, float) + # float -> float + check(assert_type(s.astype(float).diff(), "pd.Series[float]"), pd.Series, float) + # datetime.date -> timeDelta + check( + assert_type( + pd.Series( + [datetime.datetime.now().date(), datetime.datetime.now().date()] + ).diff(), + "TimedeltaSeries", + ), + pd.Series, + pd.Timedelta, + index_to_check_for_type=-1, + ) + # timestamp -> timedelta + times = pd.Series([pd.Timestamp(0), pd.Timestamp(1)]) + check( + assert_type(times.diff(), "TimedeltaSeries"), + pd.Series, + pd.Timedelta, + index_to_check_for_type=-1, + ) + # timedelta -> timedelta64 + check( + assert_type( + pd.Series([pd.Timedelta(0), pd.Timedelta(1)]).diff(), "TimedeltaSeries" + ), + pd.Series, + pd.Timedelta, + index_to_check_for_type=-1, + ) + # period -> object + if WINDOWS: + with pytest_warns_bounded( + RuntimeWarning, "overflow encountered in scalar multiply" + ): + check( + assert_type( + pd.Series( + pd.period_range(start="2017-01-01", end="2017-02-01", freq="D") + ).diff(), + "OffsetSeries", + ), + pd.Series, + BaseOffset, + index_to_check_for_type=-1, + ) + else: + check( + assert_type( + pd.Series( + pd.period_range(start="2017-01-01", end="2017-02-01", freq="D") + ).diff(), + "OffsetSeries", + ), + pd.Series, + BaseOffset, + index_to_check_for_type=-1, + ) + # bool -> object + check( + assert_type( + pd.Series([True, True, False, False, True]).diff(), + "pd.Series[type[object]]", + ), + pd.Series, + object, + ) + # object -> object + check( + assert_type(s.astype(object).diff(), "pd.Series[type[object]]"), + pd.Series, + object, + ) + # complex -> complex + check( + assert_type(s.astype(complex).diff(), "pd.Series[complex]"), pd.Series, complex + ) + if TYPE_CHECKING_INVALID_USAGE: + # interval -> TypeError: IntervalArray has no 'diff' method. Convert to a suitable dtype prior to calling 'diff'. + assert_never(pd.Series([pd.Interval(0, 2), pd.Interval(1, 4)]).diff()) + + +def test_diff_never1() -> None: + s = pd.Series([1, 1, 2, 3, 5, 8]) + if TYPE_CHECKING_INVALID_USAGE: + # bytes -> numpy.core._exceptions._UFuncNoLoopError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('S21'), dtype('S21')) -> None + assert_never(s.astype(bytes).diff()) + + +def test_diff_never2() -> None: + if TYPE_CHECKING_INVALID_USAGE: + # dtype -> TypeError: unsupported operand type(s) for -: 'type' and 'type' + assert_never(pd.Series([str, int, bool]).diff()) + + +def test_diff_never3() -> None: + if TYPE_CHECKING_INVALID_USAGE: + # str -> TypeError: unsupported operand type(s) for -: 'str' and 'str' + assert_never(pd.Series(["a", "b"]).diff())