From 5b2bbbef62f84d5451763dfae82a9aa2ce0c6be4 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 11 Oct 2020 11:27:25 -0700 Subject: [PATCH 1/4] TYP: mostly io, plotting --- pandas/core/base.py | 25 ++++++++++++++++++--- pandas/core/indexes/category.py | 1 + pandas/core/reshape/concat.py | 5 +++-- pandas/io/excel/_base.py | 6 ++--- pandas/io/formats/format.py | 12 +++++----- pandas/io/parsers.py | 2 +- pandas/io/pytables.py | 33 ++++++++++++++++++++-------- pandas/io/stata.py | 4 +++- pandas/plotting/_matplotlib/core.py | 6 +++-- pandas/plotting/_matplotlib/tools.py | 4 ++-- setup.cfg | 3 --- 11 files changed, 70 insertions(+), 31 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 10b83116dee58..6af537dcd149a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,12 +4,22 @@ import builtins import textwrap -from typing import Any, Callable, Dict, FrozenSet, Optional, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + FrozenSet, + Optional, + TypeVar, + Union, + cast, +) import numpy as np import pandas._libs.lib as lib -from pandas._typing import IndexLabel +from pandas._typing import DtypeObj, IndexLabel from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -33,6 +43,9 @@ from pandas.core.construction import create_series_with_explicit_dtype import pandas.core.nanops as nanops +if TYPE_CHECKING: + from pandas import Categorical + _shared_docs: Dict[str, str] = dict() _indexops_doc_kwargs = dict( klass="IndexOpsMixin", @@ -238,7 +251,7 @@ def _gotitem(self, key, ndim: int, subset=None): Parameters ---------- key : str / list of selections - ndim : 1,2 + ndim : {1, 2} requested ndim of result subset : object, default None subset to act on @@ -305,6 +318,11 @@ class IndexOpsMixin(OpsMixin): ["tolist"] # tolist is not deprecated, just suppressed in the __dir__ ) + @property + def dtype(self) -> DtypeObj: + # must be defined here as a property for mypy + raise AbstractMethodError(self) + @property def _values(self) -> Union[ExtensionArray, np.ndarray]: # must be defined here as a property for mypy @@ -832,6 +850,7 @@ def _map_values(self, mapper, na_action=None): if is_categorical_dtype(self.dtype): # use the built in categorical series mapper which saves # time by mapping the categories instead of all values + self = cast("Categorical", self) return self._values.map(mapper) values = self._values diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 8038bc6bf1c72..a5e8edca80873 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -164,6 +164,7 @@ class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate): codes: np.ndarray categories: Index _data: Categorical + _values: Categorical @property def _engine_type(self): diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 91310a3659f33..162f02afbaeec 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -3,7 +3,7 @@ """ from collections import abc -from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, overload +from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, cast, overload import numpy as np @@ -30,7 +30,7 @@ from pandas.core.internals import concatenate_block_managers if TYPE_CHECKING: - from pandas import DataFrame + from pandas import DataFrame, Series from pandas.core.generic import NDFrame # --------------------------------------------------------------------- @@ -463,6 +463,7 @@ def get_result(self): if self.bm_axis == 0: name = com.consensus_name_attr(self.objs) cons = self.objs[0]._constructor + cons = cast("Series", cons) arrs = [ser._values for ser in self.objs] diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 604b7e12ec243..e4c6ab3bec5e4 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -3,7 +3,7 @@ from io import BufferedIOBase, BytesIO, RawIOBase import os from textwrap import fill -from typing import Any, Mapping, Union +from typing import Any, Mapping, Tuple, Union from pandas._config import config @@ -674,7 +674,7 @@ def __new__(cls, path, engine=None, **kwargs): @property @abc.abstractmethod - def supported_extensions(self): + def supported_extensions(self) -> Tuple[str, ...]: """Extensions that writer engine supports.""" pass @@ -785,7 +785,7 @@ def _value_with_fmt(self, val): return val, fmt @classmethod - def check_extension(cls, ext): + def check_extension(cls, ext: str): """ checks that path's extension against the Writer's supported extensions. If it isn't supported, raises UnsupportedFiletypeError. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3e4780ec21378..2977a78f02785 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1835,9 +1835,11 @@ def _make_fixed_width( return strings if adj is None: - adj = get_adjustment() + adjustment = get_adjustment() + else: + adjustment = adj - max_len = max(adj.len(x) for x in strings) + max_len = max(adjustment.len(x) for x in strings) if minimum is not None: max_len = max(minimum, max_len) @@ -1846,14 +1848,14 @@ def _make_fixed_width( if conf_max is not None and max_len > conf_max: max_len = conf_max - def just(x): + def just(x: str) -> str: if conf_max is not None: - if (conf_max > 3) & (adj.len(x) > max_len): + if (conf_max > 3) & (adjustment.len(x) > max_len): x = x[: max_len - 3] + "..." return x strings = [just(x) for x in strings] - result = adj.justify(strings, max_len, mode=justify) + result = adjustment.justify(strings, max_len, mode=justify) return result diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 63c3f9899d915..1184b0436b93d 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1661,7 +1661,7 @@ def _get_name(icol): return index - def _agg_index(self, index, try_parse_dates=True): + def _agg_index(self, index, try_parse_dates=True) -> Index: arrays = [] for i, arr in enumerate(index): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 2903ede1d5c0b..5160773455067 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -565,6 +565,7 @@ def __fspath__(self): def root(self): """ return the root node """ self._check_if_open() + assert self._handle is not None # for mypy return self._handle.root @property @@ -1393,6 +1394,8 @@ def groups(self): """ _tables() self._check_if_open() + assert self._handle is not None # for mypy + assert _table_mod is not None # for mypy return [ g for g in self._handle.walk_groups() @@ -1437,6 +1440,9 @@ def walk(self, where="/"): """ _tables() self._check_if_open() + assert self._handle is not None # for mypy + assert _table_mod is not None # for mypy + for g in self._handle.walk_groups(where): if getattr(g._v_attrs, "pandas_type", None) is not None: continue @@ -1862,6 +1868,8 @@ def __init__( def __iter__(self): # iterate current = self.start + if self.coordinates is None: + raise ValueError("Cannot iterate until get_result is called.") while current < self.stop: stop = min(current + self.chunksize, self.stop) value = self.func(None, None, self.coordinates[current:stop]) @@ -3196,7 +3204,7 @@ class Table(Fixed): pandas_kind = "wide_table" format_type: str = "table" # GH#30962 needed by dask table_type: str - levels = 1 + levels: Union[int, List[Label]] = 1 is_table = True index_axes: List[IndexCol] @@ -3292,7 +3300,9 @@ def is_multi_index(self) -> bool: """the levels attribute is 1 or a list in the case of a multi-index""" return isinstance(self.levels, list) - def validate_multiindex(self, obj): + def validate_multiindex( + self, obj: FrameOrSeriesUnion + ) -> Tuple[DataFrame, List[Label]]: """ validate that we can store the multi-index; reset and return the new object @@ -3301,11 +3311,13 @@ def validate_multiindex(self, obj): l if l is not None else f"level_{i}" for i, l in enumerate(obj.index.names) ] try: - return obj.reset_index(), levels + reset_obj = obj.reset_index() except ValueError as err: raise ValueError( "duplicate names/columns in the multi-index when storing as a table" ) from err + assert isinstance(reset_obj, DataFrame) # for mypy + return reset_obj, levels @property def nrows_expected(self) -> int: @@ -3433,7 +3445,7 @@ def get_attrs(self): self.nan_rep = getattr(self.attrs, "nan_rep", None) self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) - self.levels = getattr(self.attrs, "levels", None) or [] + self.levels: List[Label] = getattr(self.attrs, "levels", None) or [] self.index_axes = [a for a in self.indexables if a.is_an_indexable] self.values_axes = [a for a in self.indexables if not a.is_an_indexable] @@ -4562,11 +4574,12 @@ class AppendableMultiSeriesTable(AppendableSeriesTable): def write(self, obj, **kwargs): """ we are going to write this as a frame table """ name = obj.name or "values" - obj, self.levels = self.validate_multiindex(obj) + newobj, self.levels = self.validate_multiindex(obj) + assert isinstance(self.levels, list) # for mypy cols = list(self.levels) cols.append(name) - obj.columns = cols - return super().write(obj=obj, **kwargs) + newobj.columns = Index(cols) + return super().write(obj=newobj, **kwargs) class GenericTable(AppendableFrameTable): @@ -4576,6 +4589,7 @@ class GenericTable(AppendableFrameTable): table_type = "generic_table" ndim = 2 obj_type = DataFrame + levels: List[Label] @property def pandas_type(self) -> str: @@ -4609,7 +4623,7 @@ def indexables(self): name="index", axis=0, table=self.table, meta=meta, metadata=md ) - _indexables = [index_col] + _indexables: List[Union[GenericIndexCol, GenericDataIndexableCol]] = [index_col] for i, n in enumerate(d._v_names): assert isinstance(n, str) @@ -4652,6 +4666,7 @@ def write(self, obj, data_columns=None, **kwargs): elif data_columns is True: data_columns = obj.columns.tolist() obj, self.levels = self.validate_multiindex(obj) + assert isinstance(self.levels, list) # for mypy for n in self.levels: if n not in data_columns: data_columns.insert(0, n) @@ -5173,7 +5188,7 @@ def select_coords(self): start = 0 elif start < 0: start += nrows - if self.stop is None: + if stop is None: stop = nrows elif stop < 0: stop += nrows diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 55dde374048b6..00b6bd9b4fb05 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -887,7 +887,9 @@ def __init__(self): (65530, np.int8), ] ) - self.TYPE_MAP = list(range(251)) + list("bhlfd") + # error: Argument 1 to "list" has incompatible type "str"; + # expected "Iterable[int]" [arg-type] + self.TYPE_MAP = list(range(251)) + list("bhlfd") # type:ignore [arg-type] self.TYPE_MAP_XML = dict( [ # Not really a Q, unclear how to handle byteswap diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f806325d60eca..a69767df267fc 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -82,6 +82,8 @@ def _kind(self): _default_rot = 0 orientation: Optional[str] = None + axes: np.ndarray # of Axes objects + def __init__( self, data, @@ -177,7 +179,7 @@ def __init__( self.ax = ax self.fig = fig - self.axes = None + self.axes = np.array([], dtype=object) # "real" version get set in `generate` # parse errorbar input if given xerr = kwds.pop("xerr", None) @@ -697,7 +699,7 @@ def _get_ax_layer(cls, ax, primary=True): else: return getattr(ax, "right_ax", ax) - def _get_ax(self, i): + def _get_ax(self, i: int): # get the twinx ax if appropriate if self.subplots: ax = self.axes[i] diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 832957dd73ec7..bec1f48f5e64a 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -401,11 +401,11 @@ def handle_shared_axes( _remove_labels_from_axis(ax.yaxis) -def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> Sequence["Axes"]: +def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> np.ndarray: if not is_list_like(axes): return np.array([axes]) elif isinstance(axes, (np.ndarray, ABCIndexClass)): - return axes.ravel() + return np.asarray(axes).ravel() return np.array(axes) diff --git a/setup.cfg b/setup.cfg index 2ae00e0efeda4..f962b42f7baf3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -229,9 +229,6 @@ check_untyped_defs=False [mypy-pandas.io.parsers] check_untyped_defs=False -[mypy-pandas.io.pytables] -check_untyped_defs=False - [mypy-pandas.io.stata] check_untyped_defs=False From 5013cb160bdec66ff8e4d03078cac5c7a46b2167 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 11 Oct 2020 17:30:52 -0700 Subject: [PATCH 2/4] TYP: stata --- pandas/io/excel/_base.py | 10 ++++------ pandas/io/stata.py | 4 ++-- setup.cfg | 3 --- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index e4c6ab3bec5e4..0b6130a34666b 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -672,11 +672,8 @@ def __new__(cls, path, engine=None, **kwargs): curr_sheet = None path = None - @property - @abc.abstractmethod - def supported_extensions(self) -> Tuple[str, ...]: - """Extensions that writer engine supports.""" - pass + supported_extensions: Tuple[str, ...] + # Exensions that writer engine supports; must be implemented by subclass @property @abc.abstractmethod @@ -792,7 +789,8 @@ def check_extension(cls, ext: str): """ if ext.startswith("."): ext = ext[1:] - if not any(ext in extension for extension in cls.supported_extensions): + exts = cls.supported_extensions + if not any(ext in extension for extension in exts): raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'") else: return True diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 00b6bd9b4fb05..c9d29b66e8981 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -378,8 +378,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): d["delta"] = time_delta._values.astype(np.int64) // 1000 # microseconds if days or year: date_index = DatetimeIndex(dates) - d["year"] = date_index.year - d["month"] = date_index.month + d["year"] = date_index._data.year + d["month"] = date_index._data.month if days: days_in_ns = dates.astype(np.int64) - to_datetime( d["year"], format="%Y" diff --git a/setup.cfg b/setup.cfg index f962b42f7baf3..88868d74f8686 100644 --- a/setup.cfg +++ b/setup.cfg @@ -229,9 +229,6 @@ check_untyped_defs=False [mypy-pandas.io.parsers] check_untyped_defs=False -[mypy-pandas.io.stata] -check_untyped_defs=False - [mypy-pandas.plotting._matplotlib.converter] check_untyped_defs=False From 363bb35c4e6e0dcd15743ae3352a09cca7c34b2f Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 11 Oct 2020 19:22:08 -0700 Subject: [PATCH 3/4] lint fixup --- pandas/io/stata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index c9d29b66e8981..c128c56f496cc 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -889,7 +889,7 @@ def __init__(self): ) # error: Argument 1 to "list" has incompatible type "str"; # expected "Iterable[int]" [arg-type] - self.TYPE_MAP = list(range(251)) + list("bhlfd") # type:ignore [arg-type] + self.TYPE_MAP = list(range(251)) + list("bhlfd") # type: ignore[arg-type] self.TYPE_MAP_XML = dict( [ # Not really a Q, unclear how to handle byteswap From a24d9c22f71b178a88b6317c7ca5c32f22ae9006 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 12 Oct 2020 10:54:21 -0700 Subject: [PATCH 4/4] update per comments --- pandas/core/reshape/concat.py | 5 ++--- pandas/io/excel/_base.py | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 3283de9b9eca0..13052c23a9f11 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -3,7 +3,7 @@ """ from collections import abc -from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, cast, overload +from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, overload import numpy as np @@ -30,7 +30,7 @@ from pandas.core.internals import concatenate_block_managers if TYPE_CHECKING: - from pandas import DataFrame, Series + from pandas import DataFrame from pandas.core.generic import NDFrame # --------------------------------------------------------------------- @@ -463,7 +463,6 @@ def get_result(self): if self.bm_axis == 0: name = com.consensus_name_attr(self.objs) cons = self.objs[0]._constructor - cons = cast("Series", cons) arrs = [ser._values for ser in self.objs] diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 0b6130a34666b..3461652f4ea24 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -3,7 +3,7 @@ from io import BufferedIOBase, BytesIO, RawIOBase import os from textwrap import fill -from typing import Any, Mapping, Tuple, Union +from typing import Any, Mapping, Union from pandas._config import config @@ -672,8 +672,11 @@ def __new__(cls, path, engine=None, **kwargs): curr_sheet = None path = None - supported_extensions: Tuple[str, ...] - # Exensions that writer engine supports; must be implemented by subclass + @property + @abc.abstractmethod + def supported_extensions(self): + """Extensions that writer engine supports.""" + pass @property @abc.abstractmethod @@ -789,8 +792,12 @@ def check_extension(cls, ext: str): """ if ext.startswith("."): ext = ext[1:] - exts = cls.supported_extensions - if not any(ext in extension for extension in exts): + # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__" + # (not iterable) [attr-defined] + if not any( + ext in extension + for extension in cls.supported_extensions # type: ignore[attr-defined] + ): raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'") else: return True