Skip to content

Add typos check to pre-commit hooks #10040

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,9 @@ repos:
hooks:
- id: validate-pyproject
additional_dependencies: ["validate-pyproject-schema-store[all]"]
- repo: https://github.com/crate-ci/typos
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the typos project — it's great in PRQL. There's also a new-ish VSCode extension which has been good

rev: dictgen-v0.3.1
hooks:
- id: typos
# https://github.com/crate-ci/typos/issues/347
pass_filenames: false
2 changes: 1 addition & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class can be passed through the ``decode_times`` keyword argument (see also
coder = xr.coders.CFDatetimeCoder(time_unit="s")
ds = xr.open_dataset(filename, decode_times=coder)

Similar control of the resoution of decoded timedeltas can be achieved through
Similar control of the resolution of decoded timedeltas can be achieved through
passing a :py:class:`coders.CFTimedeltaCoder` instance to the
``decode_timedelta`` keyword argument:

Expand Down
87 changes: 67 additions & 20 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
[project]
authors = [
{ name = "xarray Developers", email = "[email protected]" },
]
authors = [{ name = "xarray Developers", email = "[email protected]" }]
classifiers = [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: Apache Software License",
Expand All @@ -22,11 +20,7 @@ name = "xarray"
readme = "README.md"
requires-python = ">=3.10"

dependencies = [
"numpy>=1.24",
"packaging>=23.2",
"pandas>=2.1",
]
dependencies = ["numpy>=1.24", "packaging>=23.2", "pandas>=2.1"]

# We don't encode minimum requirements here (though if we can write a script to
# generate the text from `min_deps_check.py`, that's welcome...). We do add
Expand Down Expand Up @@ -70,6 +64,7 @@ types = [
"types-PyYAML",
"types-Pygments",
"types-colorama",
"types-decorator",
"types-defusedxml",
"types-docutils",
"types-networkx",
Expand All @@ -93,10 +88,7 @@ dask = "xarray.namedarray.daskmanager:DaskManager"

[build-system]
build-backend = "setuptools.build_meta"
requires = [
"setuptools>=42",
"setuptools-scm>=7",
]
requires = ["setuptools>=42", "setuptools-scm>=7"]

[tool.setuptools]
packages = ["xarray"]
Expand All @@ -120,10 +112,7 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]

[tool.mypy]
enable_error_code = ["ignore-without-code", "redundant-self", "redundant-expr"]
exclude = [
'build',
'xarray/util/generate_.*\.py',
]
exclude = ['build', 'xarray/util/generate_.*\.py']
files = "xarray"
show_error_context = true
warn_redundant_casts = true
Expand Down Expand Up @@ -254,10 +243,7 @@ module = ["xarray.namedarray.*", "xarray.tests.test_namedarray"]
# reportMissingTypeStubs = false

[tool.ruff]
extend-exclude = [
"doc",
"_typed_ops.pyi",
]
extend-exclude = ["doc", "_typed_ops.pyi"]

[tool.ruff.lint]
extend-select = [
Expand Down Expand Up @@ -383,3 +369,64 @@ test = "pytest"
ignore = [
"PP308", # This option creates a large amount of log lines.
]

[tool.typos]

[tool.typos.default]
extend-ignore-identifiers-re = [
# Variable names
"nd_.*",
".*_nd",
"ba_.*",
".*_ba",
"ser_.*",
".*_ser",
# Function/class names
"NDArray.*",
".*NDArray.*",
]

[tool.typos.default.extend-words]
# NumPy function names
arange = "arange"

# Technical terms
nd = "nd"
nin = "nin"

# Variable names
ba = "ba"
ser = "ser"
fo = "fo"
iy = "iy"
vart = "vart"
ede = "ede"

# Organization/Institution names
Stichting = "Stichting"
Mathematisch = "Mathematisch"

# People's names
Soler = "Soler"
Bruning = "Bruning"
Tung = "Tung"
Claus = "Claus"
Celles = "Celles"
slowy = "slowy"
Commun = "Commun"

# Tests
Ome = "Ome"
SUR = "SUR"
Tio = "Tio"
Ono = "Ono"
abl = "abl"

# Technical terms
splitted = "splitted"
childs = "childs"
cutted = "cutted"
LOCA = "LOCA"

[tool.typos.type.jupyter]
extend-ignore-re = ["\"id\": \".*\""]
2 changes: 1 addition & 1 deletion xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ def reindex(
"""

# TODO: (benbovy - explicit indexes): uncomment?
# --> from reindex docstrings: "any mis-matched dimension is simply ignored"
# --> from reindex docstrings: "any mismatched dimension is simply ignored"
# bad_keys = [k for k in indexers if k not in obj._indexes and k not in obj.dims]
# if bad_keys:
# raise ValueError(
Expand Down
8 changes: 4 additions & 4 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1973,8 +1973,8 @@ def reindex_like(
names to pandas.Index objects, which provides coordinates upon
which to index the variables in this dataset. The indexes on this
other object need not be the same as the indexes on this
dataset. Any mis-matched index values will be filled in with
NaN, and any mis-matched dimension names will simply be ignored.
dataset. Any mismatched index values will be filled in with
NaN, and any mismatched dimension names will simply be ignored.
method : {None, "nearest", "pad", "ffill", "backfill", "bfill"}, optional
Method to use for filling index values from other not found on this
data array:
Expand Down Expand Up @@ -2155,8 +2155,8 @@ def reindex(
----------
indexers : dict, optional
Dictionary with keys given by dimension names and values given by
arrays of coordinates tick labels. Any mis-matched coordinate
values will be filled in with NaN, and any mis-matched dimension
arrays of coordinates tick labels. Any mismatched coordinate
values will be filled in with NaN, and any mismatched dimension
names will simply be ignored.
One of indexers or indexers_kwargs must be provided.
copy : bool, optional
Expand Down
8 changes: 4 additions & 4 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3364,8 +3364,8 @@ def reindex_like(
names to pandas.Index objects, which provides coordinates upon
which to index the variables in this dataset. The indexes on this
other object need not be the same as the indexes on this
dataset. Any mis-matched index values will be filled in with
NaN, and any mis-matched dimension names will simply be ignored.
dataset. Any mismatched index values will be filled in with
NaN, and any mismatched dimension names will simply be ignored.
method : {None, "nearest", "pad", "ffill", "backfill", "bfill", None}, optional
Method to use for filling index values from other not found in this
dataset:
Expand Down Expand Up @@ -3430,8 +3430,8 @@ def reindex(
----------
indexers : dict, optional
Dictionary with keys given by dimension names and values given by
arrays of coordinates tick labels. Any mis-matched coordinate
values will be filled in with NaN, and any mis-matched dimension
arrays of coordinates tick labels. Any mismatched coordinate
values will be filled in with NaN, and any mismatched dimension
names will simply be ignored.
One of indexers or indexers_kwargs must be provided.
method : {None, "nearest", "pad", "ffill", "backfill", "bfill", None}, optional
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1464,7 +1464,7 @@ def sel(
if any(ds != dim_size0 for ds in dim_size):
raise ValueError(
"CoordinateTransformIndex only supports advanced (point-wise) indexing "
"with xarray.DataArray or xarray.Variable objects of macthing dimensions."
"with xarray.DataArray or xarray.Variable objects of matching dimensions."
)

coord_labels = {
Expand Down
4 changes: 2 additions & 2 deletions xarray/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def unique_subset_of(
)


class CFTimeStategy(st.SearchStrategy):
class CFTimeStrategy(st.SearchStrategy):
def __init__(self, min_value, max_value):
self.min_value = min_value
self.max_value = max_value
Expand Down Expand Up @@ -506,5 +506,5 @@ def do_draw(self, data):
daysinmonth = date_type(99999, 12, 1).daysinmonth
min_value = date_type(-99999, 1, 1)
max_value = date_type(99999, 12, daysinmonth, 23, 59, 59, 999999)
strategy = CFTimeStategy(min_value, max_value)
strategy = CFTimeStrategy(min_value, max_value)
return strategy.do_draw(data)
2 changes: 1 addition & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2576,7 +2576,7 @@
with self.roundtrip(original) as actual:
assert_identical(original, actual)

# but itermediate unaligned chunks are bad
# but intermediate unaligned chunks are bad
badenc = ds.chunk({"x": (3, 5, 3, 1)})
badenc.var1.encoding["chunks"] = (3,)
with pytest.raises(ValueError, match=r"would overlap multiple dask chunks"):
Expand Down Expand Up @@ -4185,7 +4185,7 @@
fx.create_dataset(k, data=v)
with pytest.warns(UserWarning, match="The 'phony_dims' kwarg"):
with xr.open_dataset(tmp_file, engine="h5netcdf", group="bar") as ds:
assert ds.dims == {

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.

Check warning on line 4188 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
"phony_dim_0": 5,
"phony_dim_1": 5,
"phony_dim_2": 5,
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -1826,7 +1826,7 @@ def test_encode_cf_timedelta_casting_overflow_error(use_dask, dtype) -> None:

_DECODE_TIMEDELTA_TESTS = {
"default": (True, None, np.dtype("timedelta64[ns]"), True),
"decode_timdelta=False": (True, False, np.dtype("int64"), False),
"decode_timedelta=False": (True, False, np.dtype("int64"), False),
"inherit-time_unit-from-decode_times": (
CFDatetimeCoder(time_unit="s"),
None,
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,7 @@ def test_chunk_by_frequency(self, freq: str, calendar: str, add_gap: bool) -> No
assert rechunked.chunksizes["time"] == expected
assert rechunked.chunksizes["x"] == (2,) * 5

def test_chunk_by_frequecy_errors(self):
def test_chunk_by_frequency_errors(self):
ds = Dataset({"foo": ("x", [1, 2, 3])})
with pytest.raises(ValueError, match="virtual variable"):
ds.chunk(x=TimeResampler("YE"))
Expand Down Expand Up @@ -2204,7 +2204,7 @@ def test_reindex(self) -> None:

# invalid dimension
# TODO: (benbovy - explicit indexes): uncomment?
# --> from reindex docstrings: "any mis-matched dimension is simply ignored"
# --> from reindex docstrings: "any mismatched dimension is simply ignored"
# with pytest.raises(ValueError, match=r"indexer keys.*not correspond.*"):
# data.reindex(invalid=0)

Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1602,7 +1602,7 @@ def test_filter_like(self) -> None:
assert filtered_tree.equals(barren_tree)
assert "flowers" not in filtered_tree.children

# test symetrical pruning results in isomorphic trees
# test symmetrical pruning results in isomorphic trees
assert flower_tree.filter_like(fruit_tree).isomorphic(
fruit_tree.filter_like(flower_tree)
)
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def test_rolling_construct_automatic_rechunk(self):

# Construct dataset with chunk size of (400, 400, 1) or 1.22 MiB
da = DataArray(
dims=["latitute", "longitude", "time"],
dims=["latitude", "longitude", "time"],
data=dask.array.random.random((400, 400, 400), chunks=(-1, -1, 1)),
name="foo",
)
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ def test_getitem_error(self):

v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5))
ind = Variable(["x"], [0, 1])
with pytest.raises(IndexError, match=r"Dimensions of indexers mis"):
with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
v[:, ind]

@pytest.mark.parametrize(
Expand Down
Loading