From 60120d2ccb2e36c85348c372566ff2f6eddd481c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 7 Apr 2025 10:19:18 -0700 Subject: [PATCH 1/5] Bump pre-commit version, bump clang-format and meson --- .pre-commit-config.yaml | 10 +++++----- environment.yml | 2 +- requirements-dev.txt | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 09bfda1755e03..4f055dc2e0821 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -minimum_pre_commit_version: 2.15.0 +minimum_pre_commit_version: 4.2.0 exclude: ^LICENSES/|\.(html|csv|svg)$ # reserve "manual" for relatively slow hooks which we still want to run in CI default_stages: [ @@ -25,7 +25,7 @@ repos: args: [--exit-non-zero-on-fix] exclude: ^pandas/tests/frame/test_query_eval.py - id: ruff - # TODO: remove autofixe-only rules when they are checked by ruff + # TODO: remove autofix only rules when they are checked by ruff name: ruff-selected-autofixes alias: ruff-selected-autofixes files: ^pandas @@ -34,7 +34,7 @@ repos: - id: ruff-format exclude: ^scripts|^pandas/tests/frame/test_query_eval.py - repo: https://github.com/jendrikseipp/vulture - rev: 'v2.14' + rev: v2.14 hooks: - id: vulture entry: python scripts/run_vulture.py @@ -95,14 +95,14 @@ repos: - id: sphinx-lint args: ["--enable", "all", "--disable", "line-too-long"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v19.1.7 + rev: v20.1.0 hooks: - id: clang-format files: ^pandas/_libs/src|^pandas/_libs/include args: [-i] types_or: [c, c++] - repo: https://github.com/trim21/pre-commit-mirror-meson - rev: v1.7.0 + rev: v1.7.2 hooks: - id: meson-fmt args: ['--inplace'] diff --git a/environment.yml b/environment.yml index ca8f1996c61cf..0c170d05316f6 100644 --- a/environment.yml +++ b/environment.yml @@ -80,7 +80,7 @@ dependencies: - flake8=7.1.0 # run in subprocess over docstring examples - mypy=1.13.0 # pre-commit uses locally installed mypy - tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py - - pre-commit>=4.0.1 + - pre-commit>=4.2.0 # documentation - gitpython # obtain contributors from git for whatsnew diff --git a/requirements-dev.txt b/requirements-dev.txt index 20fc21be75a06..c386a5a9c8c6e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -57,7 +57,7 @@ asv>=0.6.1 flake8==7.1.0 mypy==1.13.0 tokenize-rt -pre-commit>=4.0.1 +pre-commit>=4.2.0 gitpython gitdb google-auth From 659c83b0acb5ca48f4c464855847ac5fb1bcc7cd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 7 Apr 2025 10:39:23 -0700 Subject: [PATCH 2/5] Fix type checking abbreviation --- pyproject.toml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b7d53b0d8934a..d3bcd7bdb0f05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -234,8 +234,8 @@ select = [ "TID", # implicit string concatenation "ISC", - # type-checking imports - "TCH", + # flake8-type-checking + "TC", # comprehensions "C4", # pygrep-hooks @@ -390,6 +390,8 @@ ignore = [ "PLW0108", # global-statement "PLW0603", + # runtime-cast-value + "TC006", ] exclude = [ @@ -429,7 +431,7 @@ exclude = [ "pandas/tests/*" = ["B028", "FLY"] "scripts/*" = ["B028"] # Keep this one enabled -"pandas/_typing.py" = ["TCH"] +"pandas/_typing.py" = ["TC"] [tool.ruff.lint.flake8-pytest-style] fixture-parentheses = false From d92ce07cef4805d286350fa342a7bd43da7e9c42 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 7 Apr 2025 10:45:18 -0700 Subject: [PATCH 3/5] Bump to 0.11.4 --- .pre-commit-config.yaml | 2 +- asv_bench/benchmarks/frame_methods.py | 2 +- pandas/_libs/tslibs/timedeltas.pyi | 6 ++---- pandas/core/apply.py | 2 +- pandas/core/arrays/string_arrow.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/groupby.py | 4 ++-- pandas/core/internals/blocks.py | 2 +- pandas/core/internals/construction.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/io/formats/format.py | 2 +- pandas/io/formats/style_render.py | 6 +++--- pandas/io/pytables.py | 6 +++--- pandas/io/sql.py | 6 +++--- pandas/tests/apply/test_frame_apply.py | 4 ++-- pandas/tests/dtypes/cast/test_maybe_box_native.py | 2 +- pandas/tests/io/parser/test_na_values.py | 2 +- pandas/tests/series/accessors/test_cat_accessor.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- setup.py | 2 +- 21 files changed, 30 insertions(+), 32 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4f055dc2e0821..0496545bde40d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ ci: skip: [pyright, mypy] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.9 + rev: v0.11.4 hooks: - id: ruff args: [--exit-non-zero-on-fix] diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 6a2ab24df26fe..cd7851acae3f2 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -517,7 +517,7 @@ def setup(self): self.df = DataFrame(np.random.randn(1000, 100)) self.s = Series(np.arange(1028.0)) - self.df2 = DataFrame({i: self.s for i in range(1028)}) + self.df2 = DataFrame(dict.fromkeys(range(1028), self.s)) self.df3 = DataFrame(np.random.randn(1000, 3), columns=list("ABC")) def time_apply_user_func(self): diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 979a5666661b2..c885543b2fc6d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -3,7 +3,6 @@ from typing import ( ClassVar, Literal, TypeAlias, - TypeVar, overload, ) @@ -60,7 +59,6 @@ UnitChoices: TypeAlias = Literal[ "nanos", "nanosecond", ] -_S = TypeVar("_S", bound=timedelta) def get_unit_for_round(freq, creso: int) -> int: ... def disallow_ambiguous_unit(unit: str | None) -> None: ... @@ -95,11 +93,11 @@ class Timedelta(timedelta): _value: int # np.int64 # error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]") def __new__( # type: ignore[misc] - cls: type[_S], + cls: type[Self], value=..., unit: str | None = ..., **kwargs: float | np.integer | np.floating, - ) -> _S | NaTType: ... + ) -> Self | NaTType: ... @classmethod def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ... @property diff --git a/pandas/core/apply.py b/pandas/core/apply.py index da6124307e3f1..2c96f1ef020ac 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -327,7 +327,7 @@ def transform(self) -> DataFrame | Series: if is_series: func = {com.get_callable_name(v) or v: v for v in func} else: - func = {col: func for col in obj} + func = dict.fromkeys(obj, func) if is_dict_like(func): func = cast(AggFuncTypeDict, func) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index d35083fd892a8..a39d64429d162 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -281,7 +281,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: ] # short-circuit to return all False array. - if not len(value_set): + if not value_set: return np.zeros(len(self), dtype=bool) result = pc.is_in( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6a45ef9325bec..884107d4bc6af 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9705,7 +9705,7 @@ def _where( # CoW: Make sure reference is not kept alive if cond.ndim == 1 and self.ndim == 2: cond = cond._constructor_expanddim( - {i: cond for i in range(len(self.columns))}, + dict.fromkeys(range(len(self.columns)), cond), copy=False, ) cond.columns = self.columns diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1251403db6ff3..b520ad69aae96 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2505,7 +2505,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame: ) results = [func(sgb) for sgb in sgbs] - if not len(results): + if not results: # concat would raise res_df = DataFrame([], columns=columns, index=self._grouper.result_index) else: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f9438b348c140..d31e50bbd311b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5175,8 +5175,8 @@ def diff( shifted = shifted.astype("float32") else: to_coerce = [c for c, dtype in obj.dtypes.items() if dtype in dtypes_to_f32] - if len(to_coerce): - shifted = shifted.astype({c: "float32" for c in to_coerce}) + if to_coerce: + shifted = shifted.astype(dict.fromkeys(to_coerce, "float32")) return obj - shifted diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index dc64da35e9725..b846af1c83736 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -805,7 +805,7 @@ def replace_list( for x, y in zip(src_list, dest_list) if (self._can_hold_element(x) or (self.dtype == "string" and is_re(x))) ] - if not len(pairs): + if not pairs: return [self.copy(deep=False)] src_len = len(pairs) - 1 diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 69da2be0306f6..d098f8d42d3db 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -864,7 +864,7 @@ def _finalize_columns_and_data( # GH#26429 do not raise user-facing AssertionError raise ValueError(err) from err - if len(contents) and contents[0].dtype == np.object_: + if contents and contents[0].dtype == np.object_: contents = convert_object_array(contents, dtype=dtype) return contents, columns diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a3738bb25f56c..e238bb78bbdfa 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1298,7 +1298,7 @@ def value_getitem(placement): # Defer setting the new values to enable consolidation self._iset_split_block(blkno_l, blk_locs, refs=refs) - if len(removed_blknos): + if removed_blknos: # Remove blocks & update blknos accordingly is_deleted = np.zeros(self.nblocks, dtype=np.bool_) is_deleted[removed_blknos] = True diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fb799361fea67..f1be0b41ad7f7 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -566,7 +566,7 @@ def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceTyp result = {} elif isinstance(col_space, (int, str)): result = {"": col_space} - result.update({column: col_space for column in self.frame.columns}) + result.update(dict.fromkeys(self.frame.columns, col_space)) elif isinstance(col_space, Mapping): for column in col_space.keys(): if column not in self.frame.columns and column != "": diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 482ed316c7ce4..6752c83d5169b 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1225,7 +1225,7 @@ def format( data = self.data.loc[subset] if not isinstance(formatter, dict): - formatter = {col: formatter for col in data.columns} + formatter = dict.fromkeys(data.columns, formatter) cis = self.columns.get_indexer_for(data.columns) ris = self.index.get_indexer_for(data.index) @@ -1411,7 +1411,7 @@ def format_index( return self # clear the formatter / revert to default and avoid looping if not isinstance(formatter, dict): - formatter = {level: formatter for level in levels_} + formatter = dict.fromkeys(levels_, formatter) else: formatter = { obj._get_level_number(level): formatter_ @@ -1708,7 +1708,7 @@ def format_index_names( return self # clear the formatter / revert to default and avoid looping if not isinstance(formatter, dict): - formatter = {level: formatter for level in levels_} + formatter = dict.fromkeys(levels_, formatter) else: formatter = { obj._get_level_number(level): formatter_ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a689cfbcb1418..b83b5aba3cf13 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1760,7 +1760,7 @@ def info(self) -> str: if self.is_open: lkeys = sorted(self.keys()) - if len(lkeys): + if lkeys: keys = [] values = [] @@ -4540,7 +4540,7 @@ def write_data(self, chunksize: int | None, dropna: bool = False) -> None: masks.append(mask.astype("u1", copy=False)) # consolidate masks - if len(masks): + if masks: mask = masks[0] for m in masks[1:]: mask = mask & m @@ -4660,7 +4660,7 @@ def delete( groups = list(diff[diff > 1].index) # 1 group - if not len(groups): + if not groups: groups = [0] # final element diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0e0f07c0f8ff3..31dd3b6b0ebda 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1901,7 +1901,7 @@ def prep_table( # Type[str], Type[float], Type[int], Type[complex], Type[bool], # Type[object]]]]"; expected type "Union[ExtensionDtype, str, # dtype[Any], Type[object]]" - dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] + dtype = dict.fromkeys(frame, dtype) # type: ignore[misc] else: dtype = cast(dict, dtype) @@ -2615,7 +2615,7 @@ def _create_table_setup(self): ] ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index] - if len(ix_cols): + if ix_cols: cnames = "_".join(ix_cols) cnames_br = ",".join([escape(c) for c in ix_cols]) create_stmts.append( @@ -2859,7 +2859,7 @@ def to_sql( # Type[str], Type[float], Type[int], Type[complex], Type[bool], # Type[object]]]]"; expected type "Union[ExtensionDtype, str, # dtype[Any], Type[object]]" - dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] + dtype = dict.fromkeys(frame, dtype) # type: ignore[misc] else: dtype = cast(dict, dtype) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 2d47cd851ad10..dde1158dc7951 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -334,7 +334,7 @@ def test_apply_broadcast_scalars(float_frame): def test_apply_broadcast_scalars_axis1(float_frame): result = float_frame.apply(np.mean, axis=1, result_type="broadcast") m = float_frame.mean(axis=1) - expected = DataFrame({c: m for c in float_frame.columns}) + expected = DataFrame(dict.fromkeys(float_frame.columns, m)) tm.assert_frame_equal(result, expected) @@ -361,7 +361,7 @@ def test_apply_broadcast_lists_index(float_frame): ) m = list(range(len(float_frame.index))) expected = DataFrame( - {c: m for c in float_frame.columns}, + dict.fromkeys(float_frame.columns, m), dtype="float64", index=float_frame.index, ) diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py index 3f62f31dac219..151586962d517 100644 --- a/pandas/tests/dtypes/cast/test_maybe_box_native.py +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -17,7 +17,7 @@ "obj,expected_dtype", [ (b"\x00\x10", bytes), - (int(4), int), + ((4), int), (np.uint(4), int), (np.int32(-4), int), (np.uint8(4), int), diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 3a68d38cc0bde..213fa2c01cef4 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -546,7 +546,7 @@ def test_na_values_dict_null_column_name(all_parsers): parser = all_parsers data = ",x,y\n\nMA,1,2\nNA,2,1\nOA,,3" names = [None, "x", "y"] - na_values = {name: STR_NA_VALUES for name in names} + na_values = dict.fromkeys(names, STR_NA_VALUES) dtype = {None: "object", "x": "float64", "y": "float64"} if parser.engine == "pyarrow": diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index ce8ea27ea1fa2..f017ccd963972 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -40,7 +40,7 @@ def test_getname_categorical_accessor(self, method): def test_cat_accessor(self): ser = Series(Categorical(["a", "b", np.nan, "a"])) tm.assert_index_equal(ser.cat.categories, Index(["a", "b"])) - assert not ser.cat.ordered, False + assert not ser.cat.ordered exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5f4a100e7ccc7..f82451a2be84d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -632,7 +632,7 @@ def test_constructor_maskedarray_hardened(self): def test_series_ctor_plus_datetimeindex(self): rng = date_range("20090415", "20090519", freq="B") - data = {k: 1 for k in rng} + data = dict.fromkeys(rng, 1) result = Series(data, index=rng) assert result.index.is_(rng) diff --git a/setup.py b/setup.py index 737ebd270d1e4..db1852b43cfa9 100755 --- a/setup.py +++ b/setup.py @@ -364,7 +364,7 @@ def run(self) -> None: # enable coverage by building cython files by setting the environment variable # "PANDAS_CYTHON_COVERAGE" (with a Truthy value) or by running build_ext # with `--with-cython-coverage`enabled -linetrace = os.environ.get("PANDAS_CYTHON_COVERAGE", False) +linetrace = os.environ.get("PANDAS_CYTHON_COVERAGE", False) # noqa: PLW1508 if "--with-cython-coverage" in sys.argv: linetrace = True sys.argv.remove("--with-cython-coverage") From 005b1ab2ce470bc4f97d39be5e4d7d58c08d44ba Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 7 Apr 2025 10:50:06 -0700 Subject: [PATCH 4/5] Put minimum version at 4 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0496545bde40d..5308c98e96937 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -minimum_pre_commit_version: 4.2.0 +minimum_pre_commit_version: 4.0.0 exclude: ^LICENSES/|\.(html|csv|svg)$ # reserve "manual" for relatively slow hooks which we still want to run in CI default_stages: [ From 7a372ecdd6c907275b23cd33ff3bddf049d81fe1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 7 Apr 2025 13:21:54 -0700 Subject: [PATCH 5/5] Change misc to arg-type --- pandas/io/sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 31dd3b6b0ebda..7376843f7e8ff 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1901,7 +1901,7 @@ def prep_table( # Type[str], Type[float], Type[int], Type[complex], Type[bool], # Type[object]]]]"; expected type "Union[ExtensionDtype, str, # dtype[Any], Type[object]]" - dtype = dict.fromkeys(frame, dtype) # type: ignore[misc] + dtype = dict.fromkeys(frame, dtype) # type: ignore[arg-type] else: dtype = cast(dict, dtype) @@ -2859,7 +2859,7 @@ def to_sql( # Type[str], Type[float], Type[int], Type[complex], Type[bool], # Type[object]]]]"; expected type "Union[ExtensionDtype, str, # dtype[Any], Type[object]]" - dtype = dict.fromkeys(frame, dtype) # type: ignore[misc] + dtype = dict.fromkeys(frame, dtype) # type: ignore[arg-type] else: dtype = cast(dict, dtype)