pydata · max-sixty · Mar 7, 2025 · Feb 9, 2025 · Feb 9, 2025 · Mar 2, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -73,3 +73,9 @@ repos:
     hooks:
       - id: validate-pyproject
         additional_dependencies: ["validate-pyproject-schema-store[all]"]
+  - repo: https://github.com/crate-ci/typos
+    rev: dictgen-v0.3.1
+    hooks:
+      - id: typos
+        # https://github.com/crate-ci/typos/issues/347
+        pass_filenames: false
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -105,7 +105,7 @@ class can be passed through the ``decode_times`` keyword argument (see also
     coder = xr.coders.CFDatetimeCoder(time_unit="s")
     ds = xr.open_dataset(filename, decode_times=coder)
 
-Similar control of the resoution of decoded timedeltas can be achieved through
+Similar control of the resolution of decoded timedeltas can be achieved through
 passing a :py:class:`coders.CFTimedeltaCoder` instance to the
 ``decode_timedelta`` keyword argument:
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,5 @@
 [project]
-authors = [
-  { name = "xarray Developers", email = "[email protected]" },
-]
+authors = [{ name = "xarray Developers", email = "[email protected]" }]
 classifiers = [
   "Development Status :: 5 - Production/Stable",
   "License :: OSI Approved :: Apache Software License",
@@ -22,11 +20,7 @@ name = "xarray"
 readme = "README.md"
 requires-python = ">=3.10"
 
-dependencies = [
-  "numpy>=1.24",
-  "packaging>=23.2",
-  "pandas>=2.1",
-]
+dependencies = ["numpy>=1.24", "packaging>=23.2", "pandas>=2.1"]
 
 # We don't encode minimum requirements here (though if we can write a script to
 # generate the text from `min_deps_check.py`, that's welcome...). We do add
@@ -70,6 +64,7 @@ types = [
   "types-PyYAML",
   "types-Pygments",
   "types-colorama",
+  "types-decorator",
   "types-defusedxml",
   "types-docutils",
   "types-networkx",
@@ -93,10 +88,7 @@ dask = "xarray.namedarray.daskmanager:DaskManager"
 
 [build-system]
 build-backend = "setuptools.build_meta"
-requires = [
-  "setuptools>=42",
-  "setuptools-scm>=7",
-]
+requires = ["setuptools>=42", "setuptools-scm>=7"]
 
 [tool.setuptools]
 packages = ["xarray"]
@@ -120,10 +112,7 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]
 
 [tool.mypy]
 enable_error_code = ["ignore-without-code", "redundant-self", "redundant-expr"]
-exclude = [
-  'build',
-  'xarray/util/generate_.*\.py',
-]
+exclude = ['build', 'xarray/util/generate_.*\.py']
 files = "xarray"
 show_error_context = true
 warn_redundant_casts = true
@@ -254,10 +243,7 @@ module = ["xarray.namedarray.*", "xarray.tests.test_namedarray"]
 # reportMissingTypeStubs = false
 
 [tool.ruff]
-extend-exclude = [
-  "doc",
-  "_typed_ops.pyi",
-]
+extend-exclude = ["doc", "_typed_ops.pyi"]
 
 [tool.ruff.lint]
 extend-select = [
@@ -383,3 +369,64 @@ test = "pytest"
 ignore = [
   "PP308", # This option creates a large amount of log lines.
 ]
+
+[tool.typos]
+
+[tool.typos.default]
+extend-ignore-identifiers-re = [
+  # Variable names
+  "nd_.*",
+  ".*_nd",
+  "ba_.*",
+  ".*_ba",
+  "ser_.*",
+  ".*_ser",
+  # Function/class names
+  "NDArray.*",
+  ".*NDArray.*",
+]
+
+[tool.typos.default.extend-words]
+# NumPy function names
+arange = "arange"
+
+# Technical terms
+nd = "nd"
+nin = "nin"
+
+# Variable names
+ba = "ba"
+ser = "ser"
+fo = "fo"
+iy = "iy"
+vart = "vart"
+ede = "ede"
+
+# Organization/Institution names
+Stichting = "Stichting"
+Mathematisch = "Mathematisch"
+
+# People's names
+Soler = "Soler"
+Bruning = "Bruning"
+Tung = "Tung"
+Claus = "Claus"
+Celles = "Celles"
+slowy = "slowy"
+Commun = "Commun"
+
+# Tests
+Ome = "Ome"
+SUR = "SUR"
+Tio = "Tio"
+Ono = "Ono"
+abl = "abl"
+
+# Technical terms
+splitted = "splitted"
+childs = "childs"
+cutted = "cutted"
+LOCA = "LOCA"
+
+[tool.typos.type.jupyter]
+extend-ignore-re = ["\"id\": \".*\""]
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -979,7 +979,7 @@ def reindex(
     """
 
     # TODO: (benbovy - explicit indexes): uncomment?
-    # --> from reindex docstrings: "any mis-matched dimension is simply ignored"
+    # --> from reindex docstrings: "any mismatched dimension is simply ignored"
     # bad_keys = [k for k in indexers if k not in obj._indexes and k not in obj.dims]
     # if bad_keys:
     #     raise ValueError(

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -1973,8 +1973,8 @@ def reindex_like(
             names to pandas.Index objects, which provides coordinates upon
             which to index the variables in this dataset. The indexes on this
             other object need not be the same as the indexes on this
-            dataset. Any mis-matched index values will be filled in with
-            NaN, and any mis-matched dimension names will simply be ignored.
+            dataset. Any mismatched index values will be filled in with
+            NaN, and any mismatched dimension names will simply be ignored.
         method : {None, "nearest", "pad", "ffill", "backfill", "bfill"}, optional
             Method to use for filling index values from other not found on this
             data array:
@@ -2155,8 +2155,8 @@ def reindex(
         ----------
         indexers : dict, optional
             Dictionary with keys given by dimension names and values given by
-            arrays of coordinates tick labels. Any mis-matched coordinate
-            values will be filled in with NaN, and any mis-matched dimension
+            arrays of coordinates tick labels. Any mismatched coordinate
+            values will be filled in with NaN, and any mismatched dimension
             names will simply be ignored.
             One of indexers or indexers_kwargs must be provided.
         copy : bool, optional

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -3364,8 +3364,8 @@ def reindex_like(
             names to pandas.Index objects, which provides coordinates upon
             which to index the variables in this dataset. The indexes on this
             other object need not be the same as the indexes on this
-            dataset. Any mis-matched index values will be filled in with
-            NaN, and any mis-matched dimension names will simply be ignored.
+            dataset. Any mismatched index values will be filled in with
+            NaN, and any mismatched dimension names will simply be ignored.
         method : {None, "nearest", "pad", "ffill", "backfill", "bfill", None}, optional
             Method to use for filling index values from other not found in this
             dataset:
@@ -3430,8 +3430,8 @@ def reindex(
         ----------
         indexers : dict, optional
             Dictionary with keys given by dimension names and values given by
-            arrays of coordinates tick labels. Any mis-matched coordinate
-            values will be filled in with NaN, and any mis-matched dimension
+            arrays of coordinates tick labels. Any mismatched coordinate
+            values will be filled in with NaN, and any mismatched dimension
             names will simply be ignored.
             One of indexers or indexers_kwargs must be provided.
         method : {None, "nearest", "pad", "ffill", "backfill", "bfill", None}, optional

diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
@@ -1464,7 +1464,7 @@ def sel(
         if any(ds != dim_size0 for ds in dim_size):
             raise ValueError(
                 "CoordinateTransformIndex only supports advanced (point-wise) indexing "
-                "with xarray.DataArray or xarray.Variable objects of macthing dimensions."
+                "with xarray.DataArray or xarray.Variable objects of matching dimensions."
             )
 
         coord_labels = {

diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py
@@ -477,7 +477,7 @@ def unique_subset_of(
     )
 
 
-class CFTimeStategy(st.SearchStrategy):
+class CFTimeStrategy(st.SearchStrategy):
     def __init__(self, min_value, max_value):
         self.min_value = min_value
         self.max_value = max_value
@@ -506,5 +506,5 @@ def do_draw(self, data):
             daysinmonth = date_type(99999, 12, 1).daysinmonth
             min_value = date_type(-99999, 1, 1)
             max_value = date_type(99999, 12, daysinmonth, 23, 59, 59, 999999)
-            strategy = CFTimeStategy(min_value, max_value)
+            strategy = CFTimeStrategy(min_value, max_value)
             return strategy.do_draw(data)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -2576,7 +2576,7 @@
             with self.roundtrip(original) as actual:
                 assert_identical(original, actual)
 
-        # but itermediate unaligned chunks are bad
+        # but intermediate unaligned chunks are bad
         badenc = ds.chunk({"x": (3, 5, 3, 1)})
         badenc.var1.encoding["chunks"] = (3,)
         with pytest.raises(ValueError, match=r"would overlap multiple dask chunks"):
@@ -4185,7 +4185,7 @@
                        fx.create_dataset(k, data=v)
            with pytest.warns(UserWarning, match="The 'phony_dims' kwarg"):
                with xr.open_dataset(tmp_file, engine="h5netcdf", group="bar") as ds:
                    assert ds.dims == {
                        "phony_dim_0": 5,
                        "phony_dim_1": 5,
                        "phony_dim_2": 5,

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -1826,7 +1826,7 @@ def test_encode_cf_timedelta_casting_overflow_error(use_dask, dtype) -> None:
 
 _DECODE_TIMEDELTA_TESTS = {
     "default": (True, None, np.dtype("timedelta64[ns]"), True),
-    "decode_timdelta=False": (True, False, np.dtype("int64"), False),
+    "decode_timedelta=False": (True, False, np.dtype("int64"), False),
     "inherit-time_unit-from-decode_times": (
         CFDatetimeCoder(time_unit="s"),
         None,

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -1243,7 +1243,7 @@ def test_chunk_by_frequency(self, freq: str, calendar: str, add_gap: bool) -> No
         assert rechunked.chunksizes["time"] == expected
         assert rechunked.chunksizes["x"] == (2,) * 5
 
-    def test_chunk_by_frequecy_errors(self):
+    def test_chunk_by_frequency_errors(self):
         ds = Dataset({"foo": ("x", [1, 2, 3])})
         with pytest.raises(ValueError, match="virtual variable"):
             ds.chunk(x=TimeResampler("YE"))
@@ -2204,7 +2204,7 @@ def test_reindex(self) -> None:
 
         # invalid dimension
         # TODO: (benbovy - explicit indexes): uncomment?
-        # --> from reindex docstrings: "any mis-matched dimension is simply ignored"
+        # --> from reindex docstrings: "any mismatched dimension is simply ignored"
         # with pytest.raises(ValueError, match=r"indexer keys.*not correspond.*"):
         #     data.reindex(invalid=0)
 

diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py
@@ -1602,7 +1602,7 @@ def test_filter_like(self) -> None:
         assert filtered_tree.equals(barren_tree)
         assert "flowers" not in filtered_tree.children
 
-        # test symetrical pruning results in isomorphic trees
+        # test symmetrical pruning results in isomorphic trees
         assert flower_tree.filter_like(fruit_tree).isomorphic(
             fruit_tree.filter_like(flower_tree)
         )

diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py
@@ -606,7 +606,7 @@ def test_rolling_construct_automatic_rechunk(self):
 
         # Construct dataset with chunk size of (400, 400, 1) or 1.22 MiB
         da = DataArray(
-            dims=["latitute", "longitude", "time"],
+            dims=["latitude", "longitude", "time"],
             data=dask.array.random.random((400, 400, 400), chunks=(-1, -1, 1)),
             name="foo",
         )

diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
@@ -868,7 +868,7 @@ def test_getitem_error(self):
 
         v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5))
         ind = Variable(["x"], [0, 1])
-        with pytest.raises(IndexError, match=r"Dimensions of indexers mis"):
+        with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
             v[:, ind]
 
     @pytest.mark.parametrize(