Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Issue 237) Draft updates for scikit-learn 1.6 and scipy 1.15 API changes #238

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 43 additions & 15 deletions src/skmatter/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,13 @@
from scipy.sparse.linalg import eigsh
from sklearn.base import BaseEstimator, MetaEstimatorMixin
from sklearn.feature_selection._base import SelectorMixin
from sklearn.utils import check_array, check_random_state, check_X_y, safe_mask
from sklearn.utils.validation import FLOAT_DTYPES, as_float_array, check_is_fitted
from sklearn.utils import check_random_state, safe_mask
from sklearn.utils.validation import (
FLOAT_DTYPES,
as_float_array,
check_is_fitted,
validate_data,
)

from .utils import (
X_orthogonalizer,
Expand Down Expand Up @@ -157,11 +162,6 @@ def __init__(
self.n_to_select = n_to_select
self.score_threshold = score_threshold
self.score_threshold_type = score_threshold_type
if self.score_threshold_type not in ["relative", "absolute"]:
raise ValueError(
"invalid score_threshold_type, expected one of 'relative' or 'absolute'"
)

self.full = full
self.progress_bar = progress_bar
self.random_state = random_state
Expand All @@ -184,6 +184,11 @@ def fit(self, X, y=None, warm_start=False):
-------
self : object
"""
if self.score_threshold_type not in ["relative", "absolute"]:
raise ValueError(
"invalid score_threshold_type, expected one of 'relative' or 'absolute'"
)

if self.selection_type == "feature":
self._axis = 1
elif self.selection_type == "sample":
Expand All @@ -205,7 +210,7 @@ def fit(self, X, y=None, warm_start=False):

if hasattr(self, "mixing") or y is not None:
X, y = self._validate_data(X, y, **params)
X, y = check_X_y(X, y, multi_output=True)
X, y = validate_data(self, X, y, multi_output=True)

if len(y.shape) == 1:
# force y to have multi_output 2D format even when it's 1D, since
Expand All @@ -214,7 +219,7 @@ def fit(self, X, y=None, warm_start=False):
y = y.reshape((len(y), 1))

else:
X = check_array(X, **params)
X = validate_data(self, X, **params)

if self.full and self.score_threshold is not None:
raise ValueError(
Expand Down Expand Up @@ -308,7 +313,7 @@ def transform(self, X, y=None):

mask = self.get_support()

X = check_array(X)
X = validate_data(self, X, reset=False)

if len(X.shape) == 1:
if self._axis == 0:
Expand Down Expand Up @@ -486,6 +491,11 @@ def _more_tags(self):
"requires_y": False,
}

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.target_tags.required = False
return tags


class _CUR(GreedySelector):
"""Transformer that performs Greedy Selection by choosing features
Expand Down Expand Up @@ -560,6 +570,8 @@ def score(self, X, y=None):
score : numpy.ndarray of (n_to_select_from_)
:math:`\pi` importance for the given samples or features
"""
X, y = validate_data(self, X, y, reset=False)

return self.pi_

def _init_greedy_search(self, X, y, n_to_select):
Expand Down Expand Up @@ -734,6 +746,8 @@ def score(self, X, y=None):
score : numpy.ndarray of (n_to_select_from_)
:math:`\pi` importance for the given samples or features
"""
X, y = validate_data(self, X, y, reset=False)

return self.pi_

def _init_greedy_search(self, X, y, n_to_select):
Expand Down Expand Up @@ -927,6 +941,8 @@ def score(self, X, y=None):
-------
hausdorff : Hausdorff distances
"""
X, y = validate_data(self, X, y, reset=False)

return self.hausdorff_

def get_distance(self):
Expand Down Expand Up @@ -1048,11 +1064,6 @@ def __init__(
full=False,
random_state=0,
):
if mixing == 1.0:
raise ValueError(
"Mixing = 1.0 corresponds to traditional FPS."
"Please use the FPS class."
)

self.mixing = mixing
self.initialize = initialize
Expand All @@ -1067,6 +1078,16 @@ def __init__(
random_state=random_state,
)

def fit(self, X, y=None, warm_start=False):

if self.mixing == 1.0:
raise ValueError(
"Mixing = 1.0 corresponds to traditional FPS."
"Please use the FPS class."
)

return super().fit(X, y)

def score(self, X, y=None):
"""Returns the Hausdorff distances of all samples to previous selections.

Expand All @@ -1083,6 +1104,8 @@ def score(self, X, y=None):
-------
hausdorff : Hausdorff distances
"""
X, y = validate_data(self, X, y, reset=False)

return self.hausdorff_

def get_distance(self):
Expand Down Expand Up @@ -1159,3 +1182,8 @@ def _more_tags(self):
return {
"requires_y": True,
}

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.target_tags.required = True
return tags
18 changes: 9 additions & 9 deletions src/skmatter/decomposition/_kernel_pcovr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model._base import LinearModel
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.utils import check_array, check_random_state
from sklearn.utils import check_random_state
from sklearn.utils._arpack import _init_arpack_v0
from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
from sklearn.utils.validation import check_is_fitted, check_X_y
from sklearn.utils.validation import check_is_fitted, validate_data

from ..preprocessing import KernelNormalizer
from ..utils import check_krr_fit, pcovr_kernel
Expand Down Expand Up @@ -270,7 +270,7 @@ def fit(self, X, Y, W=None):
):
raise ValueError("Regressor must be an instance of `KernelRidge`")

X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True)
X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True)
self.X_fit_ = X.copy()

if self.n_components is None:
Expand Down Expand Up @@ -387,7 +387,7 @@ def predict(self, X=None):
"""Predicts the property values"""
check_is_fitted(self, ["pky_", "pty_"])

X = check_array(X)
X = validate_data(self, X, reset=False)
K = self._get_kernel(X, self.X_fit_)
if self.center:
K = self.centerer_.transform(K)
Expand All @@ -408,7 +408,7 @@ def transform(self, X):
"""
check_is_fitted(self, ["pkt_", "X_fit_"])

X = check_array(X)
X = validate_data(self, X, reset=False)
K = self._get_kernel(X, self.X_fit_)

if self.center:
Expand Down Expand Up @@ -440,7 +440,7 @@ def inverse_transform(self, T):
"""
return T @ self.ptx_

def score(self, X, Y):
def score(self, X, y):
r"""Computes the (negative) loss values for KernelPCovR on the given predictor
and response variables. The loss in :math:`\mathbf{K}`, as explained in
[Helfrecht2020]_ does not correspond to a traditional Gram loss
Expand Down Expand Up @@ -474,7 +474,7 @@ def score(self, X, Y):
"""
check_is_fitted(self, ["pkt_", "X_fit_"])

X = check_array(X)
X, y = validate_data(self, X, y, reset=False)

K_NN = self._get_kernel(self.X_fit_, self.X_fit_)
K_VN = self._get_kernel(X, self.X_fit_)
Expand All @@ -485,8 +485,8 @@ def score(self, X, Y):
K_VN = self.centerer_.transform(K_VN)
K_VV = self.centerer_.transform(K_VV)

y = K_VN @ self.pky_
Lkrr = np.linalg.norm(Y - y) ** 2 / np.linalg.norm(Y) ** 2
ypred = K_VN @ self.pky_
Lkrr = np.linalg.norm(y - ypred) ** 2 / np.linalg.norm(y) ** 2

t_n = K_NN @ self.pkt_
t_v = K_VN @ self.pkt_
Expand Down
22 changes: 12 additions & 10 deletions src/skmatter/decomposition/_pcovr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from sklearn.decomposition._pca import _infer_dimension
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
from sklearn.linear_model._base import LinearModel
from sklearn.utils import check_array, check_random_state
from sklearn.utils import check_random_state
from sklearn.utils._arpack import _init_arpack_v0
from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
from sklearn.utils.validation import check_is_fitted, check_X_y
from sklearn.utils.validation import check_is_fitted, validate_data

from ..utils import check_lr_fit, pcovr_covariance, pcovr_kernel

Expand Down Expand Up @@ -221,7 +221,7 @@ def fit(self, X, Y, W=None):
Regression weights, optional when regressor=`precomputed`. If not
passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]`
"""
X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True)
X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True)

# saved for inverse transformations from the latent space,
# should be zero in the case that the features have been properly centered
Expand Down Expand Up @@ -582,10 +582,10 @@ def predict(self, X=None, T=None):
raise ValueError("Either X or T must be supplied.")

if X is not None:
X = check_array(X)
X = validate_data(self, X, reset=False)
return X @ self.pxy_
else:
T = check_array(T)
T = validate_data(self, T, reset=False)
return T @ self.pty_

def transform(self, X=None):
Expand All @@ -604,7 +604,7 @@ def transform(self, X=None):

return super().transform(X)

def score(self, X, Y, T=None):
def score(self, X, y, T=None):
r"""Return the (negative) total reconstruction error for X and Y,
defined as:

Expand Down Expand Up @@ -635,13 +635,15 @@ def score(self, X, Y, T=None):
Negative sum of the loss in reconstructing X from the latent-space
projection T and the loss in predicting Y from the latent-space projection T
"""
X, y = validate_data(self, X, y, reset=False)

if T is None:
T = self.transform(X)

x = self.inverse_transform(T)
y = self.predict(T=T)
Xrec = self.inverse_transform(T)
ypred = self.predict(T=T)

return -(
np.linalg.norm(X - x) ** 2.0 / np.linalg.norm(X) ** 2.0
+ np.linalg.norm(Y - y) ** 2.0 / np.linalg.norm(Y) ** 2.0
np.linalg.norm(X - Xrec) ** 2.0 / np.linalg.norm(X) ** 2.0
+ np.linalg.norm(y - ypred) ** 2.0 / np.linalg.norm(y) ** 2.0
)
14 changes: 9 additions & 5 deletions src/skmatter/linear_model/_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
from sklearn.metrics import check_scoring
from sklearn.model_selection import KFold, check_cv
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_is_fitted, validate_data


class Ridge2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
class Ridge2FoldCV(RegressorMixin, MultiOutputMixin, BaseEstimator):
r"""Ridge regression with an efficient 2-fold cross-validation method using the SVD
solver.

Expand All @@ -20,7 +19,7 @@ class Ridge2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
while the alpha value is determined with a 2-fold cross-validation from a list of
alpha values. It is more efficient version than doing 2-fold cross-validation
naively The algorithmic trick is to reuse the matrices obtained by SVD for each
regularization paramater :param alpha: The 2-fold CV can be broken donw to
regularization paramater :param alpha: The 2-fold CV can be broken down to

.. math::

Expand Down Expand Up @@ -136,6 +135,11 @@ def __init__(
self.shuffle = shuffle
self.n_jobs = n_jobs

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.target_tags.single_output = False
return tags

def _more_tags(self):
return {"multioutput_only": True}

Expand Down Expand Up @@ -195,7 +199,7 @@ def predict(self, X):
Training data, where n_samples is the number of samples
and n_features is the number of features.
"""
X = check_array(X)
X = validate_data(self, X, reset=False)

check_is_fitted(self, ["coef_"])

Expand Down
2 changes: 1 addition & 1 deletion src/skmatter/sample_selection/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np
from scipy.interpolate import LinearNDInterpolator, interp1d
from scipy.interpolate.interpnd import _ndim_coords_from_arrays
from scipy.interpolate._interpnd import _ndim_coords_from_arrays
from scipy.spatial import ConvexHull
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y

Expand Down
Loading