diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8230514..c25eafb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,13 +11,13 @@ repos: pass_filenames: false # ruff check (w/autofix) - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 # should match version in pyproject.toml + rev: v0.8.4 # should match version in pyproject.toml hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] # ruff format - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 # should match version in pyproject.toml + rev: v0.8.4 # should match version in pyproject.toml hooks: - id: ruff-format # # pydoclint - docstring formatting diff --git a/benchmarks/plot.ipynb b/benchmarks/plot.ipynb index 9cf0eb1..0642d70 100644 --- a/benchmarks/plot.ipynb +++ b/benchmarks/plot.ipynb @@ -8,7 +8,6 @@ "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", - "import numpy as np\n", "\n", "plt.style.use(\"ggplot\")" ] @@ -16,16 +15,6 @@ { "cell_type": "code", "execution_count": null, - "id": "c92bf960-ddb5-409f-bd3c-5bce0a03ccd0", - "metadata": {}, - "outputs": [], - "source": [ - "from sequentia import" - ] - }, - { - "cell_type": "code", - "execution_count": 79, "id": "6649bf2d-7430-401d-8113-f3c1e1cf4779", "metadata": {}, "outputs": [ @@ -48,23 +37,36 @@ "\n", "bars = ax.bar(labels, runtimes, width=0.5, color=\"C1\")\n", "ax.set(xlabel=\"Package\", ylabel=\"Runtime (s)\")\n", - "ax.set_title(\"Univariate DTW-kNN performance (1,500 FSDD train/test sequences, 16 workers)\", fontsize=11)\n", + "ax.set_title(\n", + " (\n", + " \"Univariate DTW-kNN performance \"\n", + " \"(1,500 FSDD train/test sequences, 16 workers)\"\n", + " ),\n", + " fontsize=11,\n", + ")\n", + "\n", "\n", "def fmt(s: float) -> str:\n", + " \"\"\"Formats the runtime.\"\"\"\n", " if s < 60:\n", " return f\"{round(s)}s\"\n", " m, s = divmod(s, 60)\n", " return f\"{round(m)}m {round(s)}s\"\n", "\n", + "\n", "for bar in bars:\n", " plt.text(\n", - " bar.get_x() + bar.get_width() / 2, bar.get_height(),\n", - " fmt(bar.get_height()), ha='center', va='bottom', fontsize=9,\n", + " bar.get_x() + bar.get_width() / 2,\n", + " bar.get_height(),\n", + " fmt(bar.get_height()),\n", + " ha=\"center\",\n", + " va=\"bottom\",\n", + " fontsize=9,\n", " )\n", "\n", "for lab in ax.get_xticklabels():\n", - " if lab.get_text() == \"sequentia\":\n", - " lab.set_fontweight('bold')\n", + " if lab.get_text() == \"sequentia\":\n", + " lab.set_fontweight(\"bold\")\n", "\n", "plt.tight_layout()\n", "plt.savefig(\"benchmark.svg\")\n", diff --git a/make/lint.py b/make/lint.py index f09157f..17151ef 100644 --- a/make/lint.py +++ b/make/lint.py @@ -33,7 +33,7 @@ def check(c: Config) -> None: def format_(c: Config) -> None: """Format Python files.""" commands: list[str] = [ - "poetry run ruff --fix .", + "poetry run ruff check --fix .", "poetry run ruff format .", ] for command in commands: diff --git a/pyproject.toml b/pyproject.toml index 37531f5..55f9311 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ tox = "4.11.3" pre-commit = ">=3" [tool.poetry.group.lint.dependencies] -ruff = "0.1.3" +ruff = "0.8.4" pydoclint = "0.3.8" [tool.poetry.group.docs.dependencies] @@ -100,8 +100,8 @@ pytest = { version = "^7.4.0" } pytest-cov = { version = "^4.1.0" } [tool.ruff] -required-version = "0.1.3" -select = [ +required-version = "0.8.4" +lint.select = [ "F", # pyflakes: https://pypi.org/project/pyflakes/ "E", # pycodestyle (error): https://pypi.org/project/pycodestyle/ "W", # pycodestyle (warning): https://pypi.org/project/pycodestyle/ @@ -144,7 +144,7 @@ select = [ "PERF", # perflint: https://pypi.org/project/perflint/ "RUF", # ruff ] -ignore = [ +lint.ignore = [ "ANN401", # https://beta.ruff.rs/docs/rules/any-type/ "B905", # https://beta.ruff.rs/docs/rules/zip-without-explicit-strict/ "TD003", # https://beta.ruff.rs/docs/rules/missing-todo-link/ @@ -162,16 +162,15 @@ ignore = [ "C408", # Unnecessary `dict` call (rewrite as a literal) "D401", # First line of docstring should be in imperative mood ] -ignore-init-module-imports = true # allow unused imports in __init__.py line-length = 79 -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "numpy" -[tool.ruff.flake8-annotations] +[tool.ruff.lint.flake8-annotations] allow-star-arg-any = true -[tool.ruff.extend-per-file-ignores] +[tool.ruff.lint.extend-per-file-ignores] "__init__.py" = ["PLC0414", "F403", "F401", "F405"] "sequentia/datasets/*.py" = ["B006"] "sequentia/enums.py" = ["E501"] diff --git a/sequentia/_internal/_hmm/topologies.py b/sequentia/_internal/_hmm/topologies.py index e819182..c74278f 100644 --- a/sequentia/_internal/_hmm/topologies.py +++ b/sequentia/_internal/_hmm/topologies.py @@ -13,10 +13,10 @@ from sequentia.enums import TopologyMode __all__ = [ + "TOPOLOGY_MAP", "ErgodicTopology", "LeftRightTopology", "LinearTopology", - "TOPOLOGY_MAP", ] @@ -36,15 +36,15 @@ class BaseTopology: mode: TopologyMode def __init__( - self: BaseTopology, + self, *, n_states: int, random_state: np.random.RandomState, - ) -> BaseTopology: + ) -> None: self.n_states = n_states self.random_state = random_state - def uniform_start_probs(self: BaseTopology) -> FloatArray: + def uniform_start_probs(self) -> FloatArray: """Set the initial state distribution as a discrete uniform distribution. @@ -55,7 +55,7 @@ def uniform_start_probs(self: BaseTopology) -> FloatArray: """ return np.ones(self.n_states) / self.n_states - def random_start_probs(self: BaseTopology) -> FloatArray: + def random_start_probs(self) -> FloatArray: """Set the initial state distribution by randomly sampling probabilities generated by a Dirichlet distribution. @@ -69,7 +69,7 @@ def random_start_probs(self: BaseTopology) -> FloatArray: size=1, ).flatten() - def uniform_transition_probs(self: BaseTopology) -> FloatArray: + def uniform_transition_probs(self) -> FloatArray: """Set the transition matrix as uniform (equal probability of transitioning to all other possible states from each state) corresponding to the topology. @@ -81,7 +81,7 @@ def uniform_transition_probs(self: BaseTopology) -> FloatArray: """ raise NotImplementedError - def random_transition_probs(self: BaseTopology) -> FloatArray: + def random_transition_probs(self) -> FloatArray: """Set the transition matrix as random (random probability of transitioning to all other possible states from each state) by sampling probabilitiesfrom a Dirichlet distribution - according @@ -94,7 +94,7 @@ def random_transition_probs(self: BaseTopology) -> FloatArray: """ raise NotImplementedError - def check_start_probs(self: BaseTopology, initial: FloatArray, /) -> None: + def check_start_probs(self, initial: FloatArray, /) -> None: """Validate an initial state distribution according to the topology's restrictions. @@ -114,9 +114,7 @@ def check_start_probs(self: BaseTopology, initial: FloatArray, /) -> None: raise ValueError(msg) return initial - def check_transition_probs( - self: BaseTopology, transitions: FloatArray, / - ) -> FloatArray: + def check_transition_probs(self, transitions: FloatArray, /) -> FloatArray: """Validate a transition matrix according to the topology's restrictions. @@ -152,7 +150,7 @@ class ErgodicTopology(BaseTopology): mode: TopologyMode = TopologyMode.ERGODIC - def uniform_transition_probs(self: ErgodicTopology) -> FloatArray: + def uniform_transition_probs(self) -> FloatArray: """Set the transition matrix as uniform (equal probability of transitioning to all other possible states from each state) corresponding to the topology. @@ -164,7 +162,7 @@ def uniform_transition_probs(self: ErgodicTopology) -> FloatArray: """ return np.ones((self.n_states, self.n_states)) / self.n_states - def random_transition_probs(self: ErgodicTopology) -> FloatArray: + def random_transition_probs(self) -> FloatArray: """Set the transition matrix as random (random probability of transitioning to all other possible states from each state) by sampling probabilities from a Dirichlet distribution - according @@ -180,9 +178,7 @@ def random_transition_probs(self: ErgodicTopology) -> FloatArray: size=self.n_states, ) - def check_transition_probs( - self: ErgodicTopology, transitions: FloatArray, / - ) -> FloatArray: + def check_transition_probs(self, transitions: FloatArray, /) -> FloatArray: """Validate a transition matrix according to the topology's restrictions. @@ -216,7 +212,7 @@ class LeftRightTopology(BaseTopology): mode: TopologyMode = TopologyMode.LEFT_RIGHT - def uniform_transition_probs(self: LeftRightTopology) -> FloatArray: + def uniform_transition_probs(self) -> FloatArray: """Set the transition matrix as uniform (equal probability of transitioning to all other possible states from each state) corresponding to the topology. @@ -233,7 +229,7 @@ def uniform_transition_probs(self: LeftRightTopology) -> FloatArray: lower_ones = np.tril(np.ones(self.n_states), k=-1) return upper_ones / (upper_divisors + lower_ones) - def random_transition_probs(self: LeftRightTopology) -> FloatArray: + def random_transition_probs(self) -> FloatArray: """Set the transition matrix as random (random probability of transitioning to all other possible states from each state) by sampling probabilities from a Dirichlet distribution, according @@ -249,9 +245,7 @@ def random_transition_probs(self: LeftRightTopology) -> FloatArray: row[i:] = self.random_state.dirichlet(np.ones(self.n_states - i)) return transitions - def check_transition_probs( - self: LeftRightTopology, transitions: FloatArray, / - ) -> FloatArray: + def check_transition_probs(self, transitions: FloatArray, /) -> FloatArray: """Validate a transition matrix according to the topology's restrictions. @@ -281,7 +275,7 @@ class LinearTopology(LeftRightTopology): mode: TopologyMode = TopologyMode.LINEAR - def uniform_transition_probs(self: LinearTopology) -> FloatArray: + def uniform_transition_probs(self) -> FloatArray: """Set the transition matrix as uniform (equal probability of transitioning to all other possible states from each state) corresponding to the topology. @@ -297,7 +291,7 @@ def uniform_transition_probs(self: LinearTopology) -> FloatArray: row[i : (i + size)] = np.ones(size) / size return transitions - def random_transition_probs(self: LinearTopology) -> FloatArray: + def random_transition_probs(self) -> FloatArray: """Set the transition matrix as random (random probability of transitioning to all other possible states from each state) by sampling probabilities from a Dirichlet distribution, according to the @@ -314,9 +308,7 @@ def random_transition_probs(self: LinearTopology) -> FloatArray: row[i : (i + size)] = self.random_state.dirichlet(np.ones(size)) return transitions - def check_transition_probs( - self: LinearTopology, transitions: FloatArray, / - ) -> FloatArray: + def check_transition_probs(self, transitions: FloatArray, /) -> FloatArray: """Validate a transition matrix according to the topology's restrictions. diff --git a/sequentia/_internal/_typing.py b/sequentia/_internal/_typing.py index a537bfa..2e37a98 100644 --- a/sequentia/_internal/_typing.py +++ b/sequentia/_internal/_typing.py @@ -6,7 +6,7 @@ import numpy as np import numpy.typing as npt -__all__ = ["FloatArray", "IntArray", "Array"] +__all__ = ["Array", "FloatArray", "IntArray"] FloatArray = npt.NDArray[np.float64] IntArray = npt.NDArray[np.int64] diff --git a/sequentia/_internal/_validation.py b/sequentia/_internal/_validation.py index 911e881..89f26cb 100644 --- a/sequentia/_internal/_validation.py +++ b/sequentia/_internal/_validation.py @@ -20,15 +20,15 @@ from sequentia._internal._typing import Array, FloatArray, IntArray __all__ = [ - "check_random_state", - "check_is_fitted", - "requires_fit", - "check_classes", "check_X", "check_X_lengths", - "check_y", - "check_weighting", + "check_classes", + "check_is_fitted", + "check_random_state", "check_use_c", + "check_weighting", + "check_y", + "requires_fit", ] @@ -60,7 +60,7 @@ def check_is_fitted( def requires_fit(function: t.Callable) -> t.Callable: @functools.wraps(function) - def wrapper(self: t.Self, *args: t.Any, **kwargs: t.Any) -> t.Any: + def wrapper(self, *args: t.Any, **kwargs: t.Any) -> t.Any: # noqa: ANN001 check_is_fitted(self) return function(self, *args, **kwargs) @@ -106,14 +106,14 @@ def check_X( if not isinstance(X, np.ndarray): try: X = np.array(X).astype(dtype) - except Exception as e: # noqa: BLE001 + except Exception as e: type_ = type(X).__name__ msg = f"Expected value to be a numpy.ndarray, got {type_!r}" raise TypeError(msg) from e if (dtype_ := X.dtype) != dtype: try: X = X.astype(dtype) - except Exception as e: # noqa: BLE001 + except Exception as e: msg = f"Expected array to have dtype {dtype}, got {dtype_}" raise TypeError(msg) from e if (ndim_ := X.ndim) != 2: @@ -214,7 +214,7 @@ def check_weighting( if x.shape != weights.shape: msg = "Weights should have the same shape as inputs" raise ValueError(msg) # noqa: TRY301 - except Exception as e: # noqa: BLE001 + except Exception as e: msg = "Invalid weighting function" raise ValueError(msg) from e diff --git a/sequentia/datasets/__init__.py b/sequentia/datasets/__init__.py index 30d9ea9..817b0a2 100644 --- a/sequentia/datasets/__init__.py +++ b/sequentia/datasets/__init__.py @@ -10,4 +10,4 @@ from sequentia.datasets.digits import load_digits from sequentia.datasets.gene_families import load_gene_families -__all__ = ["data", "load_digits", "load_gene_families", "SequentialDataset"] +__all__ = ["SequentialDataset", "data", "load_digits", "load_gene_families"] diff --git a/sequentia/datasets/base.py b/sequentia/datasets/base.py index 2ae5de4..4bc096b 100644 --- a/sequentia/datasets/base.py +++ b/sequentia/datasets/base.py @@ -26,19 +26,17 @@ class SequentialDataset: """Utility wrapper for a generic sequential dataset.""" def __init__( - self: SequentialDataset, + self, X: Array, y: Array | None = None, *, lengths: IntArray | None = None, classes: list[int] | None = None, - ) -> SequentialDataset: + ) -> None: """Initialize a :class:`.SequentialDataset`. Parameters ---------- - self: SequentialDataset - X: Sequence(s). @@ -80,17 +78,21 @@ def __init__( self._idxs = _data.get_idxs(self.lengths) def split( - self: SequentialDataset, + self, *, test_size: ( - pyd.NonNegativeInt | pyd.confloat(ge=0, le=1) | None - ) = None, # placeholder + pyd.NonNegativeInt + | t.Annotated[float, pyd.Field(ge=0, le=1)] + | None + ) = None, train_size: ( - pyd.NonNegativeInt | pyd.confloat(ge=0, le=1) | None - ) = None, # placeholder + pyd.NonNegativeInt + | t.Annotated[float, pyd.Field(ge=0, le=1)] + | None + ) = None, random_state: ( pyd.NonNegativeInt | np.random.RandomState | None - ) = None, # placeholder + ) = None, shuffle: bool = True, stratify: bool = False, ) -> tuple[SequentialDataset, SequentialDataset]: @@ -100,8 +102,6 @@ def split( Parameters ---------- - self: SequentialDataset - test_size: Size of the test partition. @@ -171,9 +171,7 @@ def split( return data_train, data_test - def iter_by_class( - self: SequentialDataset, - ) -> t.Generator[tuple[Array, Array, int]]: + def iter_by_class(self) -> t.Generator[tuple[Array, Array, int]]: """Subset the observation sequences by class. Returns @@ -207,24 +205,18 @@ def iter_by_class( lengths = self._lengths[ind] yield np.vstack(X), lengths, c - def __len__(self: SequentialDataset) -> int: + def __len__(self) -> int: """Return the number of sequences in the dataset.""" return len(self._lengths) - def __getitem__( - self: SequentialDataset, - /, - i: int, - ) -> Array | tuple[Array, Array]: + def __getitem__(self, /, i: int) -> Array | tuple[Array, Array]: """Slice observation sequences and corresponding outputs.""" idxs = np.atleast_2d(self._idxs[i]) X = list(_data.iter_X(self._X, idxs=idxs)) X = X[0] if isinstance(i, int) and len(X) == 1 else X return X if self._y is None else (X, self._y[i]) - def __iter__( - self: SequentialDataset, - ) -> t.Generator[Array | tuple[Array, Array]]: + def __iter__(self) -> t.Generator[Array | tuple[Array, Array]]: """Create a generator over sequences and their corresponding outputs. """ @@ -232,7 +224,7 @@ def __iter__( yield self[i] @property - def X(self: SequentialDataset) -> Array: + def X(self) -> Array: """Observation sequences. Returns @@ -243,7 +235,7 @@ def X(self: SequentialDataset) -> Array: return self._X @property - def y(self: SequentialDataset) -> Array: + def y(self) -> Array: """Outputs corresponding to ``X``. Returns @@ -262,7 +254,7 @@ def y(self: SequentialDataset) -> Array: return self._y @property - def lengths(self: SequentialDataset) -> IntArray: + def lengths(self) -> IntArray: """Lengths corresponding to ``X``. Returns @@ -273,7 +265,7 @@ def lengths(self: SequentialDataset) -> IntArray: return self._lengths @property - def classes(self: SequentialDataset) -> IntArray | None: + def classes(self) -> IntArray | None: """Set of unique classes in ``y``. Returns @@ -284,7 +276,7 @@ def classes(self: SequentialDataset) -> IntArray | None: return self._classes @property - def idxs(self: SequentialDataset) -> IntArray: + def idxs(self) -> IntArray: """Observation sequence start and end indices. Returns @@ -295,7 +287,7 @@ def idxs(self: SequentialDataset) -> IntArray: return self._idxs @property - def X_y(self: SequentialDataset) -> dict[str, Array]: + def X_y(self) -> dict[str, Array]: """Observation sequences and corresponding outputs. Returns @@ -317,7 +309,7 @@ def X_y(self: SequentialDataset) -> dict[str, Array]: return {"X": self._X, "y": self._y} @property - def X_lengths(self: SequentialDataset) -> dict[str, Array]: + def X_lengths(self) -> dict[str, Array]: """Observation sequences and corresponding lengths. Returns @@ -331,7 +323,7 @@ def X_lengths(self: SequentialDataset) -> dict[str, Array]: return {"X": self._X, "lengths": self._lengths} @property - def X_y_lengths(self: SequentialDataset) -> dict[str, Array]: + def X_y_lengths(self) -> dict[str, Array]: """Observation sequences and corresponding outputs and lengths. Returns @@ -354,7 +346,7 @@ def X_y_lengths(self: SequentialDataset) -> dict[str, Array]: return {"X": self._X, "y": self._y, "lengths": self._lengths} def save( - self: SequentialDataset, + self, path: str | pathlib.Path | t.IO, /, *, @@ -389,9 +381,7 @@ def save( save_fun(path, **arrs) @classmethod - def load( - cls: type[SequentialDataset], path: str | pathlib.Path | t.IO, / - ) -> SequentialDataset: + def load(cls, path: str | pathlib.Path | t.IO, /) -> SequentialDataset: """Load a stored dataset in ``.npz`` format. See :func:`numpy:numpy.load`. @@ -413,7 +403,7 @@ def load( """ return cls(**np.load(path)) - def copy(self: SequentialDataset) -> SequentialDataset: + def copy(self) -> SequentialDataset: """Create a copy of the dataset. Returns diff --git a/sequentia/datasets/digits.py b/sequentia/datasets/digits.py index e904d63..7fbca04 100644 --- a/sequentia/datasets/digits.py +++ b/sequentia/datasets/digits.py @@ -9,6 +9,7 @@ import importlib.resources import operator +import typing as t import numpy as np import pydantic as pyd @@ -22,7 +23,19 @@ @pyd.validate_call def load_digits( - *, digits: set[pyd.conint(ge=0, le=9)] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + *, + digits: set[t.Annotated[int, pyd.Field(ge=0, le=9)]] = { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + }, ) -> SequentialDataset: """Load a dataset of MFCC features of spoken digit audio samples from the Free Spoken Digit Dataset. diff --git a/sequentia/datasets/gene_families.py b/sequentia/datasets/gene_families.py index b2786f7..54559ff 100644 --- a/sequentia/datasets/gene_families.py +++ b/sequentia/datasets/gene_families.py @@ -9,6 +9,7 @@ import importlib.resources import operator +import typing as t import numpy as np import pydantic as pyd @@ -23,7 +24,16 @@ @pyd.validate_call def load_gene_families( - *, families: set[pyd.conint(ge=0, le=6)] = {0, 1, 2, 3, 4, 5, 6} + *, + families: set[t.Annotated[int, pyd.Field(ge=0, le=6)]] = { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + }, ) -> tuple[SequentialDataset, LabelEncoder]: """Load a dataset of human DNA sequences grouped by gene family. diff --git a/sequentia/enums.py b/sequentia/enums.py index bdcbf22..3b70919 100644 --- a/sequentia/enums.py +++ b/sequentia/enums.py @@ -7,7 +7,7 @@ import enum -__all__ = ["TopologyMode", "CovarianceMode", "TransitionMode", "PriorMode"] +__all__ = ["CovarianceMode", "PriorMode", "TopologyMode", "TransitionMode"] class TopologyMode(enum.StrEnum): diff --git a/sequentia/model_selection/__init__.py b/sequentia/model_selection/__init__.py index 52dec87..07e3c40 100644 --- a/sequentia/model_selection/__init__.py +++ b/sequentia/model_selection/__init__.py @@ -24,15 +24,15 @@ ) __all__ = [ - "KFold", - "StratifiedKFold", - "ShuffleSplit", - "StratifiedShuffleSplit", - "RepeatedKFold", - "RepeatedStratifiedKFold", "GridSearchCV", - "RandomizedSearchCV", "HalvingGridSearchCV", "HalvingRandomSearchCV", + "KFold", + "RandomizedSearchCV", + "RepeatedKFold", + "RepeatedStratifiedKFold", + "ShuffleSplit", + "StratifiedKFold", + "StratifiedShuffleSplit", "param_grid", ] diff --git a/sequentia/model_selection/_split.py b/sequentia/model_selection/_split.py index e6bdbbf..d710f2f 100644 --- a/sequentia/model_selection/_split.py +++ b/sequentia/model_selection/_split.py @@ -49,11 +49,11 @@ __all__ = [ "KFold", - "StratifiedKFold", - "ShuffleSplit", - "StratifiedShuffleSplit", "RepeatedKFold", "RepeatedStratifiedKFold", + "ShuffleSplit", + "StratifiedKFold", + "StratifiedShuffleSplit", ] diff --git a/sequentia/model_selection/_validation.py b/sequentia/model_selection/_validation.py index 5365b85..6cd0674 100644 --- a/sequentia/model_selection/_validation.py +++ b/sequentia/model_selection/_validation.py @@ -45,7 +45,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import numbers import time from traceback import format_exc diff --git a/sequentia/models/base.py b/sequentia/models/base.py index 08c792c..55874be 100644 --- a/sequentia/models/base.py +++ b/sequentia/models/base.py @@ -8,6 +8,7 @@ from __future__ import annotations import abc +import typing as t import numpy as np import sklearn.base @@ -28,18 +29,18 @@ class ClassifierMixin( @abc.abstractmethod def fit( - self: ClassifierMixin, + self, X: Array, y: IntArray, *, lengths: IntArray | None = None, - ) -> ClassifierMixin: + ) -> t.Self: """Fit the classifier with the provided sequences and outputs.""" raise NotImplementedError @abc.abstractmethod def predict( - self: ClassifierMixin, + self, X: Array, *, lengths: IntArray | None = None, @@ -48,7 +49,7 @@ def predict( raise NotImplementedError def fit_predict( - self: ClassifierMixin, + self, X: Array, y: IntArray, *, @@ -59,8 +60,6 @@ def fit_predict( Parameters ---------- - self: ClassifierMixin - X: Sequence(s). @@ -82,7 +81,7 @@ def fit_predict( @abc.abstractmethod def predict_proba( - self: ClassifierMixin, + self, X: Array, *, lengths: IntArray | None = None, @@ -92,7 +91,7 @@ def predict_proba( @abc.abstractmethod def predict_scores( - self: ClassifierMixin, + self, X: Array, *, lengths: IntArray | None = None, @@ -102,7 +101,7 @@ def predict_scores( @_validation.requires_fit def score( - self: ClassifierMixin, + self, X: Array, y: IntArray, *, @@ -114,8 +113,6 @@ def score( Parameters ---------- - self: ClassifierMixin - X: Sequence(s). @@ -155,24 +152,24 @@ class RegressorMixin(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin): @abc.abstractmethod def fit( - self: RegressorMixin, + self, X: FloatArray, y: FloatArray, *, lengths: IntArray | None = None, - ) -> RegressorMixin: + ) -> t.Self: """Fit the regressor with the provided sequences and outputs.""" raise NotImplementedError @abc.abstractmethod def predict( - self: RegressorMixin, X: FloatArray, lengths: IntArray | None = None + self, X: FloatArray, lengths: IntArray | None = None ) -> FloatArray: """Predict outputs for the provided sequences.""" raise NotImplementedError def fit_predict( - self: RegressorMixin, + self, X: FloatArray, y: FloatArray, *, @@ -183,8 +180,6 @@ def fit_predict( Parameters ---------- - self: RegressorMixin - X: Sequence(s). @@ -206,7 +201,7 @@ def fit_predict( @_validation.requires_fit def score( - self: RegressorMixin, + self, X: FloatArray, y: FloatArray, *, @@ -218,8 +213,6 @@ def score( Parameters ---------- - self: RegressorMixin - X: Sequence(s). diff --git a/sequentia/models/hmm/classifier.py b/sequentia/models/hmm/classifier.py index a6bd06d..2da8a38 100644 --- a/sequentia/models/hmm/classifier.py +++ b/sequentia/models/hmm/classifier.py @@ -103,24 +103,22 @@ class HMMClassifier(ClassifierMixin): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, variant: type[variants.CategoricalHMM] | type[variants.GaussianMixtureHMM] | None = None, model_kwargs: dict[str, t.Any] | None = None, prior: ( - PriorMode | dict[int, pyd.confloat(ge=0, le=1)] - ) = PriorMode.UNIFORM, # placeholder + PriorMode | dict[int, t.Annotated[float, pyd.Field(ge=0, le=1)]] + ) = PriorMode.UNIFORM, classes: list[int] | None = None, n_jobs: pyd.PositiveInt | pyd.NegativeInt = 1, - ) -> pyd.SkipValidation: + ) -> None: """Initialize a :class:`.HMMClassifier`. Parameters ---------- - self: HMMClassifier - variant: Variant of HMM to use for modelling each class. If not specified, models must instead be added using the :func:`add_model` or @@ -175,7 +173,9 @@ class labels provided here. #: Model parameters for initializing HMMs. self.model_kwargs: dict[str, t.Any] | None = model_kwargs #: Type of prior probability to assign to each HMM. - self.prior: PriorMode | dict[int, pyd.confloat(ge=0, le=1)] = prior + self.prior: ( + PriorMode | dict[int, t.Annotated[float, pyd.Field(ge=0, le=1)]] + ) = prior #: Set of possible class labels. self.classes: list[int] | None = classes #: Maximum number of concurrently running workers. @@ -196,19 +196,11 @@ class labels provided here. ) @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) - def add_model( - self: pyd.SkipValidation, - model: variants.BaseHMM, - /, - *, - label: int, - ) -> pyd.SkipValidation: + def add_model(self, model: variants.BaseHMM, /, *, label: int) -> t.Self: """Add a single HMM to the classifier. Parameters ---------- - self: HMMClassifier - model: HMM to add to the classifier. @@ -238,17 +230,11 @@ def add_model( return self @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) - def add_models( - self: pyd.SkipValidation, - models: dict[int, variants.BaseHMM], - /, - ) -> pyd.SkipValidation: + def add_models(self, models: dict[int, variants.BaseHMM], /) -> t.Self: """Add HMMs to the classifier. Parameters ---------- - self: HMMClassifier - models: HMMs to add to the classifier. The key for each HMM should be the label of the class represented by the HMM. @@ -268,12 +254,12 @@ def add_models( return self def fit( - self: HMMClassifier, + self, X: Array | None = None, y: IntArray | None = None, *, lengths: IntArray | None = None, - ) -> HMMClassifier: + ) -> t.Self: """Fit the HMMs to the sequence(s) in ``X``. - If fitted models were provided with :func:`add_model` or @@ -285,8 +271,6 @@ def fit( Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -396,7 +380,7 @@ def fit( @_validation.requires_fit def predict( - self: HMMClassifier, + self, X: Array, *, lengths: IntArray | None = None, @@ -405,8 +389,6 @@ def predict( Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -431,15 +413,13 @@ def predict( @_validation.requires_fit def predict_log_proba( - self: HMMClassifier, X: Array, *, lengths: IntArray | None = None + self, X: Array, *, lengths: IntArray | None = None ) -> FloatArray: """Predict log un-normalized posterior probabilities for the sequences in ``X``. Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -462,7 +442,7 @@ def predict_log_proba( @_validation.requires_fit def predict_proba( - self: HMMClassifier, X: Array, *, lengths: IntArray | None = None + self, X: Array, *, lengths: IntArray | None = None ) -> FloatArray: """Predict class probabilities for the sequence(s) in ``X``. @@ -471,8 +451,6 @@ def predict_proba( Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -499,7 +477,7 @@ def predict_proba( @_validation.requires_fit def predict_scores( - self: HMMClassifier, X: Array, *, lengths: IntArray | None = None + self, X: Array, *, lengths: IntArray | None = None ) -> FloatArray: """Predict class scores for the sequence(s) in ``X``. @@ -508,8 +486,6 @@ def predict_scores( Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -544,13 +520,11 @@ def predict_scores( ) @_validation.requires_fit - def save(self: HMMClassifier, path: str | pathlib.Path | t.IO, /) -> None: + def save(self, path: str | pathlib.Path | t.IO, /) -> None: """Serialize and save a fitted HMM classifier. Parameters ---------- - self: HMMClassifier - path: Location to save the serialized classifier. @@ -575,17 +549,11 @@ def save(self: HMMClassifier, path: str | pathlib.Path | t.IO, /) -> None: joblib.dump(state, path) @classmethod - def load( - cls: type[HMMClassifier], - path: str | pathlib.Path | t.IO, - /, - ) -> HMMClassifier: + def load(cls, path: str | pathlib.Path | t.IO, /) -> HMMClassifier: """Load and deserialize a fitted HMM classifier. Parameters ---------- - cls: type[HMMClassifier] - path: Location to load the serialized classifier from. @@ -613,7 +581,7 @@ def load( return model def _compute_scores_chunk( - self: HMMClassifier, X: Array, /, *, idxs: IntArray + self, X: Array, /, *, idxs: IntArray ) -> FloatArray: """Compute log posterior probabilities for a chunk of sequences.""" scores = np.zeros((len(idxs), len(self.classes_))) @@ -622,7 +590,7 @@ def _compute_scores_chunk( return scores def _compute_log_posterior( - self: HMMClassifier, + self, x: Array, /, ) -> FloatArray: diff --git a/sequentia/models/hmm/variants/base.py b/sequentia/models/hmm/variants/base.py index fbe15c5..3e0b864 100644 --- a/sequentia/models/hmm/variants/base.py +++ b/sequentia/models/hmm/variants/base.py @@ -34,13 +34,13 @@ class BaseHMM(BaseEstimator, metaclass=abc.ABCMeta): @abc.abstractmethod def __init__( - self: BaseHMM, + self, *, n_states: pyd.PositiveInt, topology: enums.TopologyMode | None, random_state: pyd.NonNegativeInt | np.random.RandomState | None, hmmlearn_kwargs: dict[str, t.Any] | None, - ) -> BaseHMM: + ) -> None: self.n_states: int = n_states """Number of states in the Markov chain.""" @@ -66,19 +66,12 @@ def __init__( self._skip_init_params = set() self._skip_params = set() - def fit( - self: BaseHMM, - X: Array, - *, - lengths: IntArray | None = None, - ) -> BaseHMM: + def fit(self, X: Array, *, lengths: IntArray | None = None) -> t.Self: """Fit the HMM to the sequences in ``X``, using the Baum—Welch algorithm. Parameters ---------- - self: BaseHMM - X: Sequence(s). @@ -123,14 +116,12 @@ def fit( return self @_validation.requires_fit - def score(self: BaseHMM, x: Array, /) -> float: + def score(self, x: Array, /) -> float: """Calculate the log-likelihood of the HMM generating a single observation sequence. Parameters ---------- - self: BaseHMM - x: Sequence. @@ -152,7 +143,7 @@ def score(self: BaseHMM, x: Array, /) -> float: @abc.abstractproperty @_validation.requires_fit - def n_params(self: BaseHMM) -> int: + def n_params(self) -> int: """Number of trainable parameters — requires :func:`fit`.""" n_params = 0 if "s" not in self._skip_params: @@ -163,7 +154,7 @@ def n_params(self: BaseHMM) -> int: @_validation.requires_fit def bic( - self: BaseHMM, + self, X: Array, *, lengths: IntArray | None = None, @@ -173,8 +164,6 @@ def bic( Parameters ---------- - self: BaseHMM - X: Sequence(s). @@ -200,7 +189,7 @@ def bic( @_validation.requires_fit def aic( - self: BaseHMM, + self, X: Array, *, lengths: IntArray | None = None, @@ -210,8 +199,6 @@ def aic( Parameters ---------- - self: BaseHMM - X: Sequence(s). @@ -236,10 +223,10 @@ def aic( @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def set_state_start_probs( - self: pyd.SkipValidation, + self, probs: ( FloatArray | enums.TransitionMode - ) = enums.TransitionMode.RANDOM, # placeholder + ) = enums.TransitionMode.RANDOM, /, ) -> None: """Set the initial state probabilities. @@ -258,8 +245,6 @@ def set_state_start_probs( Parameters ---------- - self: BaseHMM - probs: Probabilities or probability type to assign as initial state probabilities. @@ -285,10 +270,10 @@ def set_state_start_probs( @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def set_state_transition_probs( - self: pyd.SkipValidation, + self, probs: ( FloatArray | enums.TransitionMode - ) = enums.TransitionMode.RANDOM, # placeholder + ) = enums.TransitionMode.RANDOM, /, ) -> None: """Set the transition probability matrix. @@ -307,8 +292,6 @@ def set_state_transition_probs( Parameters ---------- - self: BaseHMM - probs: Probabilities or probability type to assign as state transition probabilities. @@ -335,7 +318,7 @@ def set_state_transition_probs( self._skip_init_params |= set("t") @abc.abstractmethod - def freeze(self: BaseHMM, params: str | None, /) -> None: + def freeze(self, params: str | None, /) -> None: """Freeze the trainable parameters of the HMM, preventing them from be updated during the Baum—Welch algorithm. """ @@ -343,19 +326,19 @@ def freeze(self: BaseHMM, params: str | None, /) -> None: self._skip_params |= set(self._modify_params(params or defaults)) @abc.abstractmethod - def unfreeze(self: BaseHMM, params: str | None, /) -> None: + def unfreeze(self, params: str | None, /) -> None: """Unfreeze the trainable parameters of the HMM, allowing them to be updated during the Baum—Welch algorithm. """ defaults = self._hmmlearn_kwargs_defaults()["params"] self._skip_params -= set(self._modify_params(params or defaults)) - def _modify_params(self: BaseHMM, params: str) -> str: + def _modify_params(self, params: str) -> str: """Validate parameters to be frozen/unfrozen.""" defaults = self._hmmlearn_kwargs_defaults()["params"] msg = ( "Expected a string consisting of any combination of " - f"{defaults!r}" # + f"{defaults!r}" ) if isinstance(params, str): if bool(re.compile(rf"[^{defaults}]").search(params)): @@ -364,7 +347,7 @@ def _modify_params(self: BaseHMM, params: str) -> str: raise TypeError(msg) return params - def _check_init_params(self: BaseHMM) -> None: + def _check_init_params(self) -> None: """Validate hmmlearn init_params argument.""" topology = self.topology_ or _hmm.topologies.ErgodicTopology( n_states=self.n_states, @@ -401,7 +384,7 @@ def _check_init_params(self: BaseHMM) -> None: @classmethod def _check_hmmlearn_kwargs( - cls: type[BaseHMM], kwargs: dict[str, t.Any] | None + cls, kwargs: dict[str, t.Any] | None ) -> dict[str, t.Any]: """Check hmmlearn forwarded key-word arguments.""" defaults: dict[str, t.Any] = cls._hmmlearn_kwargs_defaults() @@ -456,7 +439,7 @@ def _check_hmmlearn_kwargs( return kwargs @abc.abstractmethod - def _init_hmm(self: BaseHMM, **kwargs: t.Any) -> hmmlearn.base.BaseHMM: + def _init_hmm(self, **kwargs: t.Any) -> hmmlearn.base.BaseHMM: """Initialize the hmmlearn model.""" raise NotImplementedError diff --git a/sequentia/models/hmm/variants/categorical.py b/sequentia/models/hmm/variants/categorical.py index 9f3ccf8..3c1ddc6 100644 --- a/sequentia/models/hmm/variants/categorical.py +++ b/sequentia/models/hmm/variants/categorical.py @@ -59,19 +59,17 @@ class CategoricalHMM(BaseHMM): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, n_states: pyd.PositiveInt = 5, topology: enums.TopologyMode | None = enums.TopologyMode.LEFT_RIGHT, random_state: pyd.NonNegativeInt | np.random.RandomState | None = None, hmmlearn_kwargs: dict[str, t.Any] | None = None, - ) -> pyd.SkipValidation: + ) -> None: """Initializes the :class:`.CategoricalHMM`. Parameters ---------- - self: CategoricalHMM - n_states: Number of states in the Markov chain. @@ -104,18 +102,14 @@ def __init__( @property @_validation.requires_fit - def n_params(self: CategoricalHMM) -> int: + def n_params(self) -> int: """Number of trainable parameters — requires :func:`fit`.""" n_params = super().n_params if "e" not in self._skip_params: n_params += self.model.emissionprob_.size return n_params - def set_state_emission_probs( - self: CategoricalHMM, - probs: FloatArray, - /, - ) -> None: + def set_state_emission_probs(self, probs: FloatArray, /) -> None: """Set the state emission distribution of the HMM's emission model. If this method is **not** called, emission probabilities will be @@ -124,8 +118,6 @@ def set_state_emission_probs( Parameters ---------- - self: CategoricalHMM - probs: Array of emission probabilities. @@ -136,14 +128,12 @@ def set_state_emission_probs( self._emissionprob = np.array(probs, dtype=np.float64) self._skip_init_params |= set("e") - def freeze(self: CategoricalHMM, params: str | None = None, /) -> None: + def freeze(self, params: str | None = None, /) -> None: """Freeze the trainable parameters of the HMM, preventing them from being updated during the Baum—Welch algorithm. Parameters ---------- - self: CategoricalHMM - params: A string specifying which parameters to freeze. Can contain a combination of: @@ -164,14 +154,12 @@ def freeze(self: CategoricalHMM, params: str | None = None, /) -> None: """ super().freeze(params) - def unfreeze(self: CategoricalHMM, params: str | None = None, /) -> None: + def unfreeze(self, params: str | None = None, /) -> None: """Unfreeze the trainable parameters of the HMM, allowing them to be updated during the Baum—Welch algorithm. Parameters ---------- - self: CategoricalHMM - params: A string specifying which parameters to unfreeze. Can contain a combination of: @@ -189,10 +177,7 @@ def unfreeze(self: CategoricalHMM, params: str | None = None, /) -> None: """ super().unfreeze(params) - def _init_hmm( - self: CategoricalHMM, - **kwargs: t.Any, - ) -> hmmlearn.hmm.CategoricalHMM: + def _init_hmm(self, **kwargs: t.Any) -> hmmlearn.hmm.CategoricalHMM: """Initialize the hmmlearn model.""" return hmmlearn.hmm.CategoricalHMM( n_components=self.n_states, diff --git a/sequentia/models/hmm/variants/gaussian_mixture.py b/sequentia/models/hmm/variants/gaussian_mixture.py index 9842153..58042a8 100644 --- a/sequentia/models/hmm/variants/gaussian_mixture.py +++ b/sequentia/models/hmm/variants/gaussian_mixture.py @@ -60,7 +60,7 @@ class GaussianMixtureHMM(BaseHMM): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, n_states: pyd.PositiveInt = 5, n_components: pyd.PositiveInt = 3, @@ -68,13 +68,11 @@ def __init__( topology: enums.TopologyMode | None = enums.TopologyMode.LEFT_RIGHT, random_state: pyd.NonNegativeInt | np.random.RandomState | None = None, hmmlearn_kwargs: dict[str, t.Any] | None = None, - ) -> pyd.SkipValidation: + ) -> None: """Initializes the :class:`.GaussianMixtureHMM`. Parameters ---------- - self: GaussianMixtureHMM - n_states: Number of states in the Markov chain. @@ -122,7 +120,7 @@ def __init__( @property @_validation.requires_fit - def n_params(self: GaussianMixtureHMM) -> int: + def n_params(self) -> int: """Number of trainable parameters — requires :func:`fit`.""" n_params = super().n_params() if "m" not in self._skip_params: @@ -133,11 +131,7 @@ def n_params(self: GaussianMixtureHMM) -> int: n_params += self.model.weights_.size return n_params - def set_state_means( - self: GaussianMixtureHMM, - means: FloatArray, - /, - ) -> None: + def set_state_means(self, means: FloatArray, /) -> None: """Set the mean vectors of the state emission distributions. If this method is **not** called, mean vectors will be @@ -146,8 +140,6 @@ def set_state_means( Parameters ---------- - self: GaussianMixtureHMM - means: Array of mean values. @@ -158,11 +150,7 @@ def set_state_means( self._means = np.array(means, dtype=np.float64) self._skip_init_params |= set("m") - def set_state_covars( - self: GaussianMixtureHMM, - covars: FloatArray, - /, - ) -> None: + def set_state_covars(self, covars: FloatArray, /) -> None: """Set the covariance matrices of the state emission distributions. If this method is **not** called, covariance matrices will be @@ -171,8 +159,6 @@ def set_state_covars( Parameters ---------- - self: GaussianMixtureHMM - covars: Array of covariance values. @@ -183,11 +169,7 @@ def set_state_covars( self._covars = np.array(covars, dtype=np.float64) self._skip_init_params |= set("c") - def set_state_weights( - self: GaussianMixtureHMM, - weights: FloatArray, - /, - ) -> None: + def set_state_weights(self, weights: FloatArray, /) -> None: """Set the component mixture weights of the state emission distributions. @@ -197,8 +179,6 @@ def set_state_weights( Parameters ---------- - self: GaussianMixtureHMM - weights: Array of component mixture weights. @@ -209,18 +189,12 @@ def set_state_weights( self._weights = np.array(weights, dtype=np.float64) self._skip_init_params |= set("w") - def freeze( - self: GaussianMixtureHMM, - params: str | None = None, - /, - ) -> None: + def freeze(self, params: str | None = None, /) -> None: """Freeze the trainable parameters of the HMM, preventing them from be updated during the Baum—Welch algorithm. Parameters ---------- - self: GaussianMixtureHMM - params: A string specifying which parameters to freeze. Can contain a combination of: @@ -239,18 +213,12 @@ def freeze( """ super().freeze(params) - def unfreeze( - self: GaussianMixtureHMM, - params: str | None = None, - /, - ) -> None: + def unfreeze(self, params: str | None = None, /) -> None: """Unfreeze the trainable parameters of the HMM, allowing them to be updated during the Baum—Welch algorithm. Parameters ---------- - self: GaussianMixtureHMM - params: A string specifying which parameters to unfreeze. Can contain a combination of: @@ -270,7 +238,7 @@ def unfreeze( super().unfreeze(params) def _init_hmm( - self: GaussianMixtureHMM, + self, **kwargs: t.Any, ) -> hmmlearn.hmm.GMMHMM: """Initialize the hmmlearn model.""" diff --git a/sequentia/models/knn/base.py b/sequentia/models/knn/base.py index b1b34ce..0d25325 100644 --- a/sequentia/models/knn/base.py +++ b/sequentia/models/knn/base.py @@ -30,7 +30,7 @@ class KNNMixin: @_validation.requires_fit def query_neighbors( - self: KNNMixin, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -41,8 +41,6 @@ def query_neighbors( Parameters ---------- - self: KNNMixin - X: Sequence(s). @@ -94,7 +92,7 @@ def query_neighbors( @_validation.requires_fit def compute_distance_matrix( - self: KNNMixin, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -104,8 +102,6 @@ def compute_distance_matrix( Parameters ---------- - self: KNNMixin - X: Sequence(s). @@ -152,13 +148,11 @@ def compute_distance_matrix( ) @_validation.requires_fit - def dtw(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: + def dtw(self, A: FloatArray, B: FloatArray) -> float: """Calculate the DTW distance between two observation sequences. Parameters ---------- - self: KNNMixin - A: The first sequence. @@ -179,7 +173,7 @@ def dtw(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: return self._dtw()(A, B) def _dtw1d( - self: KNNMixin, + self, a: FloatArray, b: FloatArray, *, @@ -193,11 +187,11 @@ def _dtw1d( window=window, ) - def _window(self: KNNMixin, A: FloatArray, B: FloatArray) -> int: + def _window(self, A: FloatArray, B: FloatArray) -> int: """Calculate the absolute DTW window size.""" return int(self.window * min(len(A), len(B))) - def _dtwi(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: + def _dtwi(self, A: FloatArray, B: FloatArray) -> float: """Compute the multivariate DTW distance as the sum of the pairwise per-feature DTW distances, allowing each feature to be warped independently. @@ -210,7 +204,7 @@ def dtw(a: FloatArray, b: FloatArray) -> float: return np.sum([dtw(A[:, i], B[:, i]) for i in range(A.shape[1])]) - def _dtwd(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: + def _dtwd(self, A: FloatArray, B: FloatArray) -> float: """Compute the multivariate DTW distance so that the warping of the features depends on each other, by modifying the local distance measure. @@ -223,18 +217,18 @@ def _dtwd(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: window=window, ) - def _dtw(self: KNNMixin) -> t.Callable[[FloatArray], float]: + def _dtw(self) -> t.Callable[[FloatArray], float]: """Conditional DTW callable.""" return self._dtwi if self.independent else self._dtwd - def _weighting(self: KNNMixin) -> t.Callable[[FloatArray], FloatArray]: + def _weighting(self) -> t.Callable[[FloatArray], FloatArray]: """Weighting function - use equal weighting if not provided.""" if callable(self.weighting): return self.weighting return np.ones_like def _distance_matrix_row_chunk( - self: KNNMixin, + self, row_idxs: IntArray, col_chunk_idxs: list[IntArray], X: FloatArray, @@ -254,7 +248,7 @@ def _distance_matrix_row_chunk( ) def _distance_matrix_row_col_chunk( - self: KNNMixin, + self, col_idxs: IntArray, row_idxs: IntArray, X: FloatArray, @@ -270,17 +264,11 @@ def _distance_matrix_row_col_chunk( return distances @_validation.requires_fit - def save( - self: KNNMixin, - path: str | pathlib.Path | t.IO, - /, - ) -> None: + def save(self, path: str | pathlib.Path | t.IO, /) -> None: """Serialize and save a fitted KNN estimator. Parameters ---------- - self: KNNMixin - path: Location to save the serialized estimator. @@ -312,17 +300,11 @@ def save( joblib.dump(state, path) @classmethod - def load( - cls: type[KNNMixin], - path: str | pathlib.Path | t.IO, - /, - ) -> KNNMixin: + def load(cls, path: str | pathlib.Path | t.IO, /) -> KNNMixin: """Load and deserialize a fitted KNN estimator. Parameters ---------- - cls: type[KNNMixin] - path: Location to load the serialized estimator from. diff --git a/sequentia/models/knn/classifier.py b/sequentia/models/knn/classifier.py index 32483e9..76b5240 100644 --- a/sequentia/models/knn/classifier.py +++ b/sequentia/models/knn/classifier.py @@ -59,23 +59,21 @@ class KNNClassifier(KNNMixin, ClassifierMixin): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, k: pyd.PositiveInt = 1, weighting: t.Callable[[FloatArray], FloatArray] | None = None, - window: pyd.confloat(ge=0.0, le=1.0) = 1.0, + window: t.Annotated[float, pyd.Field(ge=0, le=1)] = 1.0, independent: bool = False, use_c: bool = True, n_jobs: pyd.PositiveInt | pyd.NegativeInt = 1, random_state: pyd.NonNegativeInt | np.random.RandomState | None = None, classes: list[int] | None = None, - ) -> pyd.SkipValidation: + ) -> None: """Initializes the :class:`.KNNClassifier`. Parameters ---------- - self: KNNClassifier - k: Number of neighbors. @@ -142,9 +140,7 @@ def __init__( self.k: int = k """Number of neighbors.""" - self.weighting: t.Callable[[np.ndarray], np.ndarray] | None = ( - weighting # placeholder - ) + self.weighting: t.Callable[[np.ndarray], np.ndarray] | None = weighting """A callable that specifies how distance weighting should be performed.""" @@ -184,18 +180,16 @@ def __init__( ) def fit( - self: KNNClassifier, + self, X: FloatArray, y: IntArray, *, lengths: IntArray | None = None, - ) -> KNNClassifier: + ) -> t.Self: """Fit the classifier to the sequence(s) in ``X``. Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -233,7 +227,7 @@ def fit( @_validation.requires_fit def predict( - self: KNNClassifier, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -242,8 +236,6 @@ def predict( Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -267,7 +259,7 @@ def predict( @_validation.requires_fit def predict_log_proba( - self: KNNClassifier, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -278,8 +270,6 @@ def predict_log_proba( Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -302,7 +292,7 @@ def predict_log_proba( @_validation.requires_fit def predict_proba( - self: KNNClassifier, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -313,8 +303,6 @@ def predict_proba( Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -338,7 +326,7 @@ def predict_proba( @_validation.requires_fit def predict_scores( - self: KNNClassifier, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -350,8 +338,6 @@ def predict_scores( Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -379,7 +365,7 @@ def predict_scores( return self._compute_scores(k_labels, k_weightings) def _compute_scores( - self: KNNClassifier, labels: IntArray, weightings: FloatArray + self, labels: IntArray, weightings: FloatArray ) -> FloatArray: """Calculate the sum of the weightings for each label group.""" scores = np.zeros((len(labels), len(self.classes_))) @@ -388,7 +374,7 @@ def _compute_scores( return scores def _find_max_labels( - self: KNNClassifier, + self, scores: FloatArray, /, ) -> IntArray: @@ -404,9 +390,7 @@ def _find_max_labels( ) ) - def _find_max_labels_chunk( - self: KNNClassifier, score_chunk: FloatArray, / - ) -> IntArray: + def _find_max_labels_chunk(self, score_chunk: FloatArray, /) -> IntArray: """Return the label with the highest score for each item in the chunk. """ diff --git a/sequentia/models/knn/regressor.py b/sequentia/models/knn/regressor.py index d1f36d6..8e9e83c 100644 --- a/sequentia/models/knn/regressor.py +++ b/sequentia/models/knn/regressor.py @@ -33,22 +33,20 @@ class KNNRegressor(KNNMixin, RegressorMixin): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, k: pyd.PositiveInt = 1, weighting: t.Callable[[FloatArray], FloatArray] | None = None, - window: pyd.confloat(ge=0.0, le=1.0) = 1.0, + window: t.Annotated[float, pyd.Field(ge=0, le=1)] = 1.0, independent: bool = False, use_c: bool = True, n_jobs: pyd.PositiveInt | pyd.NegativeInt = 1, random_state: pyd.NonNegativeInt | np.random.RandomState | None = None, - ) -> pyd.SkipValidation: + ) -> None: """Initializes the :class:`.KNNRegressor`. Parameters ---------- - self: KNNRegressor - k: Number of neighbors. @@ -107,9 +105,7 @@ def __init__( self.k: int = k """Number of neighbors.""" - self.weighting: t.Callable[[np.ndarray], np.ndarray] | None = ( - weighting # placeholder - ) + self.weighting: t.Callable[[np.ndarray], np.ndarray] | None = weighting """A callable that specifies how distance weighting should be performed.""" @@ -138,18 +134,16 @@ def __init__( self.set_score_request(lengths=True, sample_weight=True) def fit( - self: KNNRegressor, + self, X: FloatArray, y: FloatArray, *, lengths: IntArray | None = None, - ) -> KNNRegressor: + ) -> t.Self: """Fits the regressor to the sequence(s) in ``X``. Parameters ---------- - self: KNNRegressor - X: Sequence(s). @@ -183,7 +177,7 @@ def fit( @_validation.requires_fit def predict( - self: KNNRegressor, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -192,8 +186,6 @@ def predict( Parameters ---------- - self: KNNRegressor - X: Sequence(s). diff --git a/sequentia/preprocessing/transforms.py b/sequentia/preprocessing/transforms.py index ccb8dbc..52c7509 100644 --- a/sequentia/preprocessing/transforms.py +++ b/sequentia/preprocessing/transforms.py @@ -45,6 +45,7 @@ from __future__ import annotations +import typing as t import warnings import numpy as np @@ -176,18 +177,16 @@ def _check_inverse_transform(self, X, *, lengths): @sklearn.base._fit_context(prefer_skip_nested_validation=True) def fit( - self: IndependentFunctionTransformer, + self, X: Array, y: Array | None = None, *, lengths: IntArray | None = None, - ) -> IndependentFunctionTransformer: + ) -> t.Self: """Fits the transformer to ``X``. Parameters ---------- - self: IndependentFunctionTransformer - X: Sequence(s). @@ -213,7 +212,7 @@ def fit( return self def transform( - self: IndependentFunctionTransformer, + self, X: Array, *, lengths: IntArray | None = None, @@ -223,8 +222,6 @@ def transform( Parameters ---------- - self: IndependentFunctionTransformer - X: Sequence(s). @@ -245,7 +242,7 @@ def transform( ) def inverse_transform( - self: IndependentFunctionTransformer, + self, X: Array, *, lengths: IntArray | None = None, @@ -254,8 +251,6 @@ def inverse_transform( Parameters ---------- - self: IndependentFunctionTransformer - X: Sequence(s). @@ -283,7 +278,7 @@ def inverse_transform( ) def fit_transform( - self: IndependentFunctionTransformer, + self, X: Array, y: Array | None = None, *, @@ -294,8 +289,6 @@ def fit_transform( Parameters ---------- - self: IndependentFunctionTransformer - X: Sequence(s). diff --git a/sequentia/version.py b/sequentia/version.py index 1482320..91e71f8 100644 --- a/sequentia/version.py +++ b/sequentia/version.py @@ -79,6 +79,6 @@ def version_info() -> str: "related packages": ", ".join(related_packages), } return "\n".join( - "{:>30} {}".format(k + ":", str(v).replace("\n", " ")) # + "{:>30} {}".format(k + ":", str(v).replace("\n", " ")) for k, v in info.items() ) diff --git a/tests/conftest.py b/tests/conftest.py index cb11fa2..a007cf6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,7 @@ def combinations(string: str, /) -> t.Iterable[str]: return map( # noqa: C417 lambda params: "".join(params), itertools.chain.from_iterable( - itertools.combinations(string, i) # placeholder + itertools.combinations(string, i) for i in range(1, len(string)) ), ) @@ -39,17 +39,12 @@ def assert_not_equal(a: Array, b: Array, /) -> None: assert not np.allclose(a, b, rtol=1e-3) @classmethod - def assert_all_equal(cls: type[Helpers], A: Array, B: Array, /) -> None: + def assert_all_equal(cls, A: Array, B: Array, /) -> None: for a, b in zip(A, B): cls.assert_equal(a, b) @classmethod - def assert_all_not_equal( - cls: type[Helpers], - A: Array, - B: Array, - /, - ) -> None: + def assert_all_not_equal(cls, A: Array, B: Array, /) -> None: for a, b in zip(A, B): cls.assert_not_equal(a, b) @@ -61,6 +56,6 @@ def assert_distribution(x: Array, /) -> None: assert_almost_equal(x.sum(axis=1), np.ones(len(x))) -@pytest.fixture() +@pytest.fixture def helpers() -> type[Helpers]: return Helpers diff --git a/tests/unit/test_internal/test_hmm/test_topologies.py b/tests/unit/test_internal/test_hmm/test_topologies.py index 10adc0c..cea4f22 100644 --- a/tests/unit/test_internal/test_hmm/test_topologies.py +++ b/tests/unit/test_internal/test_hmm/test_topologies.py @@ -328,7 +328,7 @@ def test_ergodic_random_transitions_many( def test_ergodic_check_transitions_invalid( - random_state: np.random.RandomState + random_state: np.random.RandomState, ) -> None: """Validate an invalid ergodic transition matrix""" topology = topologies.ErgodicTopology( @@ -342,7 +342,7 @@ def test_ergodic_check_transitions_invalid( def test_ergodic_check_transitions_valid( - random_state: np.random.RandomState + random_state: np.random.RandomState, ) -> None: """Validate a valid ergodic transition matrix""" topology = topologies.ErgodicTopology( @@ -451,7 +451,7 @@ def test_linear_random_transitions_many( def test_linear_check_transitions_invalid( - random_state: np.random.RandomState + random_state: np.random.RandomState, ) -> None: """Validate an invalid linear transition matrix""" topology = topologies.LinearTopology(n_states=5, random_state=random_state) @@ -466,7 +466,7 @@ def test_linear_check_transitions_invalid( def test_linear_check_transitions_valid( - random_state: np.random.RandomState + random_state: np.random.RandomState, ) -> None: """Validate a valid linear transition matrix""" topology = topologies.LinearTopology(n_states=5, random_state=random_state)