From 0e11d6dfde943dd3c355aba178eb732c6e6d6223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <6618166+twoertwein@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:32:03 -0500 Subject: [PATCH] TYP: Update pyright (#56892) * TYP: Update pyright * isort * int | float --- .pre-commit-config.yaml | 2 +- pandas/_config/config.py | 2 +- pandas/_config/localization.py | 7 ++-- pandas/_libs/lib.pyi | 28 ++++++++++++++-- pandas/_libs/lib.pyx | 13 ++++---- pandas/core/arrays/string_.py | 3 +- pandas/core/arrays/string_arrow.py | 5 +-- pandas/core/computation/eval.py | 7 ++-- pandas/core/dtypes/missing.py | 5 ++- pandas/core/frame.py | 4 ++- pandas/core/indexes/range.py | 6 ++-- pandas/core/ops/common.py | 6 ++-- pandas/core/reshape/pivot.py | 3 +- pandas/core/reshape/reshape.py | 50 +++++++++++++++++++++++----- pandas/core/strings/object_array.py | 10 +++--- pandas/core/tools/timedeltas.py | 4 ++- pandas/plotting/_matplotlib/hist.py | 8 +++-- pandas/util/_validators.py | 2 +- pyright_reportGeneralTypeIssues.json | 1 - 19 files changed, 121 insertions(+), 45 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d899a3136dd83..de59c59affb22 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -132,7 +132,7 @@ repos: types: [python] stages: [manual] additional_dependencies: &pyright_dependencies - - pyright@1.1.339 + - pyright@1.1.347 - id: pyright # note: assumes python env is setup and activated name: pyright reportGeneralTypeIssues diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 7b8173409aec6..b74883dbe5734 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -88,7 +88,7 @@ class DeprecatedOption(NamedTuple): class RegisteredOption(NamedTuple): key: str - defval: object + defval: Any doc: str validator: Callable[[object], Any] | None cb: Callable[[str], Any] | None diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 5c1a0ff139533..69a56d3911316 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -10,7 +10,10 @@ import platform import re import subprocess -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + cast, +) from pandas._config.config import options @@ -152,7 +155,7 @@ def get_locales( out_locales = [] for x in split_raw_locales: try: - out_locales.append(str(x, encoding=options.display.encoding)) + out_locales.append(str(x, encoding=cast(str, options.display.encoding))) except UnicodeError: # 'locale -a' is used to populated 'raw_locales' and on # Redhat 7 Linux (and maybe others) prints locale names diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 32ecd264262d6..c8befabbf86de 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -69,16 +69,26 @@ def fast_multiget( mapping: dict, keys: np.ndarray, # object[:] default=..., -) -> np.ndarray: ... +) -> ArrayLike: ... def fast_unique_multiple_list_gen(gen: Generator, sort: bool = ...) -> list: ... def fast_unique_multiple_list(lists: list, sort: bool | None = ...) -> list: ... +@overload def map_infer( arr: np.ndarray, f: Callable[[Any], Any], - convert: bool = ..., + *, + convert: Literal[False], ignore_na: bool = ..., ) -> np.ndarray: ... @overload +def map_infer( + arr: np.ndarray, + f: Callable[[Any], Any], + *, + convert: bool = ..., + ignore_na: bool = ..., +) -> ArrayLike: ... +@overload def maybe_convert_objects( objects: npt.NDArray[np.object_], *, @@ -164,14 +174,26 @@ def is_all_arraylike(obj: list) -> bool: ... # Functions which in reality take memoryviews def memory_usage_of_objects(arr: np.ndarray) -> int: ... # object[:] # np.int64 +@overload def map_infer_mask( arr: np.ndarray, f: Callable[[Any], Any], mask: np.ndarray, # const uint8_t[:] - convert: bool = ..., + *, + convert: Literal[False], na_value: Any = ..., dtype: np.dtype = ..., ) -> np.ndarray: ... +@overload +def map_infer_mask( + arr: np.ndarray, + f: Callable[[Any], Any], + mask: np.ndarray, # const uint8_t[:] + *, + convert: bool = ..., + na_value: Any = ..., + dtype: np.dtype = ..., +) -> ArrayLike: ... def indices_fast( index: npt.NDArray[np.intp], labels: np.ndarray, # const int64_t[:] diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c483f35513a40..5eb20960f0e3d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2864,10 +2864,11 @@ def map_infer_mask( ndarray[object] arr, object f, const uint8_t[:] mask, + *, bint convert=True, object na_value=no_default, cnp.dtype dtype=np.dtype(object) -) -> np.ndarray: +) -> "ArrayLike": """ Substitute for np.vectorize with pandas-friendly dtype inference. @@ -2887,7 +2888,7 @@ def map_infer_mask( Returns ------- - np.ndarray + np.ndarray or an ExtensionArray """ cdef Py_ssize_t n = len(arr) result = np.empty(n, dtype=dtype) @@ -2941,8 +2942,8 @@ def _map_infer_mask( @cython.boundscheck(False) @cython.wraparound(False) def map_infer( - ndarray arr, object f, bint convert=True, bint ignore_na=False -) -> np.ndarray: + ndarray arr, object f, *, bint convert=True, bint ignore_na=False +) -> "ArrayLike": """ Substitute for np.vectorize with pandas-friendly dtype inference. @@ -2956,7 +2957,7 @@ def map_infer( Returns ------- - np.ndarray + np.ndarray or an ExtensionArray """ cdef: Py_ssize_t i, n @@ -3091,7 +3092,7 @@ def to_object_array_tuples(rows: object) -> np.ndarray: @cython.wraparound(False) @cython.boundscheck(False) -def fast_multiget(dict mapping, object[:] keys, default=np.nan) -> np.ndarray: +def fast_multiget(dict mapping, object[:] keys, default=np.nan) -> "ArrayLike": cdef: Py_ssize_t i, n = len(keys) object val diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index d4da5840689de..b73b49eca3e18 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -4,6 +4,7 @@ TYPE_CHECKING, ClassVar, Literal, + cast, ) import numpy as np @@ -637,7 +638,7 @@ def _str_map( # error: Argument 1 to "dtype" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected # "Type[object]" - dtype=np.dtype(dtype), # type: ignore[arg-type] + dtype=np.dtype(cast(type, dtype)), ) if not na_value_is_na: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 8c8787d15c8fe..a76eef8095695 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -7,6 +7,7 @@ TYPE_CHECKING, Callable, Union, + cast, ) import warnings @@ -327,7 +328,7 @@ def _str_map( # error: Argument 1 to "dtype" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected # "Type[object]" - dtype=np.dtype(dtype), # type: ignore[arg-type] + dtype=np.dtype(cast(type, dtype)), ) if not na_value_is_na: @@ -640,7 +641,7 @@ def _str_map( mask.view("uint8"), convert=False, na_value=na_value, - dtype=np.dtype(dtype), # type: ignore[arg-type] + dtype=np.dtype(cast(type, dtype)), ) return result diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 6313c2e2c98de..55421090d4202 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -4,7 +4,10 @@ from __future__ import annotations import tokenize -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Any, +) import warnings from pandas.util._exceptions import find_stack_level @@ -177,7 +180,7 @@ def eval( level: int = 0, target=None, inplace: bool = False, -): +) -> Any: """ Evaluate a Python expression as a string using various backends. diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 3e4227a8a2598..52ec4a0b012e3 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -258,7 +258,9 @@ def _use_inf_as_na(key) -> None: globals()["INF_AS_NA"] = False -def _isna_array(values: ArrayLike, inf_as_na: bool = False): +def _isna_array( + values: ArrayLike, inf_as_na: bool = False +) -> npt.NDArray[np.bool_] | NDFrame: """ Return an array indicating which values of the input array are NaN / NA. @@ -275,6 +277,7 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False): Array of boolean values denoting the NA status of each element. """ dtype = values.dtype + result: npt.NDArray[np.bool_] | NDFrame if not isinstance(values, np.ndarray): # i.e. ExtensionArray diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ad6926406fe9b..e6d86cba0a4c3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9802,7 +9802,9 @@ def explode( return result.__finalize__(self, method="explode") - def unstack(self, level: IndexLabel = -1, fill_value=None, sort: bool = True): + def unstack( + self, level: IndexLabel = -1, fill_value=None, sort: bool = True + ) -> DataFrame | Series: """ Pivot a level of the (necessarily hierarchical) index labels. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index b7c37e6cf67ed..16b203931c073 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -491,7 +491,7 @@ def copy(self, name: Hashable | None = None, deep: bool = False) -> Self: new_index = self._rename(name=name) return new_index - def _minmax(self, meth: str): + def _minmax(self, meth: str) -> int | float: no_steps = len(self) - 1 if no_steps == -1: return np.nan @@ -500,13 +500,13 @@ def _minmax(self, meth: str): return self.start + self.step * no_steps - def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int | float: """The minimum value of the RangeIndex""" nv.validate_minmax_axis(axis) nv.validate_min(args, kwargs) return self._minmax("min") - def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int | float: """The maximum value of the RangeIndex""" nv.validate_minmax_axis(axis) nv.validate_max(args, kwargs) diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index 559977bacf881..fa085a1f0262b 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -40,7 +40,7 @@ def wrapper(method: F) -> F: return wrapper -def _unpack_zerodim_and_defer(method, name: str): +def _unpack_zerodim_and_defer(method: F, name: str) -> F: """ Boilerplate for pandas conventions in arithmetic and comparison methods. @@ -75,7 +75,9 @@ def new_method(self, other): return method(self, other) - return new_method + # error: Incompatible return value type (got "Callable[[Any, Any], Any]", + # expected "F") + return new_method # type: ignore[return-value] def get_op_result_name(left, right): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index ea74c17917279..ff973f6defc09 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -568,7 +568,8 @@ def pivot( # error: Argument 1 to "unstack" of "DataFrame" has incompatible type "Union # [List[Any], ExtensionArray, ndarray[Any, Any], Index, Series]"; expected # "Hashable" - result = indexed.unstack(columns_listlike) # type: ignore[arg-type] + # unstack with a MultiIndex returns a DataFrame + result = cast("DataFrame", indexed.unstack(columns_listlike)) # type: ignore[arg-type] result.index.names = [ name if name is not lib.no_default else None for name in result.index.names ] diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 3493f1c78da91..39cd619715a91 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -4,6 +4,7 @@ from typing import ( TYPE_CHECKING, cast, + overload, ) import warnings @@ -451,7 +452,11 @@ def _unstack_multiple( result = data while clocs: val = clocs.pop(0) - result = result.unstack(val, fill_value=fill_value, sort=sort) + # error: Incompatible types in assignment (expression has type + # "DataFrame | Series", variable has type "DataFrame") + result = result.unstack( # type: ignore[assignment] + val, fill_value=fill_value, sort=sort + ) clocs = [v if v < val else v - 1 for v in clocs] return result @@ -460,7 +465,9 @@ def _unstack_multiple( dummy_df = data.copy(deep=False) dummy_df.index = dummy_index - unstacked = dummy_df.unstack( + # error: Incompatible types in assignment (expression has type "DataFrame | + # Series", variable has type "DataFrame") + unstacked = dummy_df.unstack( # type: ignore[assignment] "__placeholder__", fill_value=fill_value, sort=sort ) if isinstance(unstacked, Series): @@ -486,7 +493,21 @@ def _unstack_multiple( return unstacked -def unstack(obj: Series | DataFrame, level, fill_value=None, sort: bool = True): +@overload +def unstack(obj: Series, level, fill_value=..., sort: bool = ...) -> DataFrame: + ... + + +@overload +def unstack( + obj: Series | DataFrame, level, fill_value=..., sort: bool = ... +) -> Series | DataFrame: + ... + + +def unstack( + obj: Series | DataFrame, level, fill_value=None, sort: bool = True +) -> Series | DataFrame: if isinstance(level, (tuple, list)): if len(level) != 1: # _unstack_multiple only handles MultiIndexes, @@ -573,10 +594,14 @@ def _unstack_extension_series( # equiv: result.droplevel(level=0, axis=1) # but this avoids an extra copy result.columns = result.columns._drop_level_numbers([0]) - return result + # error: Incompatible return value type (got "DataFrame | Series", expected + # "DataFrame") + return result # type: ignore[return-value] -def stack(frame: DataFrame, level=-1, dropna: bool = True, sort: bool = True): +def stack( + frame: DataFrame, level=-1, dropna: bool = True, sort: bool = True +) -> Series | DataFrame: """ Convert DataFrame to Series with multi-level Index. Columns become the second level of the resulting hierarchical index @@ -659,7 +684,9 @@ def stack_multiple(frame: DataFrame, level, dropna: bool = True, sort: bool = Tr if all(lev in frame.columns.names for lev in level): result = frame for lev in level: - result = stack(result, lev, dropna=dropna, sort=sort) + # error: Incompatible types in assignment (expression has type + # "Series | DataFrame", variable has type "DataFrame") + result = stack(result, lev, dropna=dropna, sort=sort) # type: ignore[assignment] # Otherwise, level numbers may change as each successive level is stacked elif all(isinstance(lev, int) for lev in level): @@ -672,7 +699,9 @@ def stack_multiple(frame: DataFrame, level, dropna: bool = True, sort: bool = Tr while level: lev = level.pop(0) - result = stack(result, lev, dropna=dropna, sort=sort) + # error: Incompatible types in assignment (expression has type + # "Series | DataFrame", variable has type "DataFrame") + result = stack(result, lev, dropna=dropna, sort=sort) # type: ignore[assignment] # Decrement all level numbers greater than current, as these # have now shifted down by one level = [v if v <= lev else v - 1 for v in level] @@ -894,6 +923,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame: if len(level) > 1: # Arrange columns in the order we want to take them, e.g. level=[2, 0, 1] sorter = np.argsort(level) + assert isinstance(stack_cols, MultiIndex) ordered_stack_cols = stack_cols._reorder_ilevels(sorter) else: ordered_stack_cols = stack_cols @@ -956,13 +986,15 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame: codes, uniques = factorize(frame.index, use_na_sentinel=False) index_levels = [uniques] index_codes = list(np.tile(codes, (1, ratio))) - if isinstance(stack_cols, MultiIndex): + if isinstance(ordered_stack_cols, MultiIndex): column_levels = ordered_stack_cols.levels column_codes = ordered_stack_cols.drop_duplicates().codes else: column_levels = [ordered_stack_cols.unique()] column_codes = [factorize(ordered_stack_cols_unique, use_na_sentinel=False)[0]] - column_codes = [np.repeat(codes, len(frame)) for codes in column_codes] + # error: Incompatible types in assignment (expression has type "list[ndarray[Any, + # dtype[Any]]]", variable has type "FrozenList") + column_codes = [np.repeat(codes, len(frame)) for codes in column_codes] # type: ignore[assignment] result.index = MultiIndex( levels=index_levels + column_levels, codes=index_codes + column_codes, diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 29d17e7174ee9..bdcf55e61d2d1 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -29,8 +29,6 @@ Scalar, ) - from pandas import Series - class ObjectStringArrayMixin(BaseStringArrayMethods): """ @@ -75,7 +73,9 @@ def _str_map( mask = isna(arr) map_convert = convert and not np.all(mask) try: - result = lib.map_infer_mask(arr, f, mask.view(np.uint8), map_convert) + result = lib.map_infer_mask( + arr, f, mask.view(np.uint8), convert=map_convert + ) except (TypeError, AttributeError) as err: # Reraise the exception if callable `f` got wrong number of args. # The user may want to be warned by this, instead of getting NaN @@ -456,7 +456,7 @@ def _str_lstrip(self, to_strip=None): def _str_rstrip(self, to_strip=None): return self._str_map(lambda x: x.rstrip(to_strip)) - def _str_removeprefix(self, prefix: str) -> Series: + def _str_removeprefix(self, prefix: str): # outstanding question on whether to use native methods for users on Python 3.9+ # https://github.com/pandas-dev/pandas/pull/39226#issuecomment-836719770, # in which case we could do return self._str_map(str.removeprefix) @@ -468,7 +468,7 @@ def removeprefix(text: str) -> str: return self._str_map(removeprefix) - def _str_removesuffix(self, suffix: str) -> Series: + def _str_removesuffix(self, suffix: str): return self._str_map(lambda x: x.removesuffix(suffix)) def _str_extract(self, pat: str, flags: int = 0, expand: bool = True): diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index b80ed9ac50dce..fcf4f7606a594 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -5,6 +5,7 @@ from typing import ( TYPE_CHECKING, + Any, overload, ) import warnings @@ -89,7 +90,8 @@ def to_timedelta( | Series, unit: UnitChoices | None = None, errors: DateTimeErrorChoices = "raise", -) -> Timedelta | TimedeltaIndex | Series | NaTType: + # returning Any for errors="ignore" +) -> Timedelta | TimedeltaIndex | Series | NaTType | Any: """ Convert argument to timedelta. diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index ab2dc20ccbd02..d521d91ab3b7b 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -41,7 +41,9 @@ if TYPE_CHECKING: from matplotlib.axes import Axes + from matplotlib.container import BarContainer from matplotlib.figure import Figure + from matplotlib.patches import Polygon from pandas._typing import PlottingOrientation @@ -112,7 +114,8 @@ def _plot( # type: ignore[override] *, bins, **kwds, - ): + # might return a subset from the possible return types of Axes.hist(...)[2]? + ) -> BarContainer | Polygon | list[BarContainer | Polygon]: if column_num == 0: cls._initialize_stacker(ax, stacking_id, len(bins) - 1) @@ -171,7 +174,8 @@ def _make_plot(self, fig: Figure) -> None: if self.by is not None: ax.set_title(pprint_thing(label)) - self._append_legend_handles_labels(artists[0], label) + # error: Value of type "Polygon" is not indexable + self._append_legend_handles_labels(artists[0], label) # type: ignore[index,arg-type] def _make_plot_keywords(self, kwds: dict[str, Any], y: np.ndarray) -> None: """merge BoxPlot/KdePlot properties to passed kwds""" diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 4e542d1b7f04a..178284c7d75b6 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -265,7 +265,7 @@ def validate_bool_kwarg( f'For argument "{arg_name}" expected type bool, received ' f"type {type(value).__name__}." ) - return value # pyright: ignore[reportGeneralTypeIssues] + return value def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): diff --git a/pyright_reportGeneralTypeIssues.json b/pyright_reportGeneralTypeIssues.json index da27906e041cf..1589988603506 100644 --- a/pyright_reportGeneralTypeIssues.json +++ b/pyright_reportGeneralTypeIssues.json @@ -41,7 +41,6 @@ "pandas/core/arrays/string_arrow.py", "pandas/core/arrays/timedeltas.py", "pandas/core/computation/align.py", - "pandas/core/computation/ops.py", "pandas/core/construction.py", "pandas/core/dtypes/cast.py", "pandas/core/dtypes/common.py",