Skip to content

Commit 5594f8e

Browse files
authored
Merge pull request #211 from mandjevant/2025Update
Sklearn 1.5 & 1.6 support. Various improvements
2 parents 4bc2864 + 6797eb9 commit 5594f8e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+477
-218
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
strategy:
1111
matrix:
1212
os: [ubuntu-latest, windows-latest]
13-
python-version: ['3.9', '3.10', '3.11']
13+
python-version: ['3.11', '3.12', '3.13']
1414

1515
steps:
1616
- uses: actions/checkout@v2

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,4 @@ nosetests.xml
3636

3737
.idea
3838
notebooks/.ipynb_checkpoints
39+
/venv

hpsklearn/components/__init__.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,9 @@
147147
lightgbm_regression
148148

149149
from .feature_extraction import \
150-
tfidf
150+
tfidf_vectorizer, \
151+
hashing_vectorizer, \
152+
count_vectorizer
151153

152154
from .decomposition import pca
153155

@@ -253,7 +255,11 @@ def any_text_preprocessing(name):
253255
"""
254256
Generic pre-processing appropriate for text data
255257
"""
256-
return hp.choice(name, [[tfidf(name + '.tfidf')]])
258+
return hp.choice(name, [
259+
[tfidf_vectorizer(name + ".tfidf")],
260+
[hashing_vectorizer(name + ".hashing")],
261+
[count_vectorizer(name + ".count")],
262+
])
257263

258264

259265
# Legacy any pre-processing as proposed in #137
@@ -423,7 +429,9 @@ def all_preprocessing(name):
423429
[polynomial_features(name + ".polynomial_features")],
424430
[spline_transformer(name + ".spline_transformer")],
425431
[k_bins_discretizer(name + ".k_bins_discretizer")],
426-
[tfidf(name + ".tfidf")],
432+
[tfidf_vectorizer(name + ".tfidf")],
433+
[hashing_vectorizer(name + ".hashing")],
434+
[count_vectorizer(name + ".count")],
427435
[pca(name + ".pca")],
428436
[ts_lagselector(name + ".ts_lagselector")],
429437
[colkmeans(name + ".colkmeans")],

hpsklearn/components/cluster/_kmeans.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ def _kmeans_hp_space(
4444
n_clusters: typing.Union[int, Apply] = None,
4545
init: typing.Union[str, callable, npt.ArrayLike, Apply] = None,
4646
verbose: int = 0,
47-
random_state=None
47+
random_state=None,
48+
**kwargs
4849
):
4950
"""
5051
Hyper parameter search space for
@@ -55,7 +56,8 @@ def _kmeans_hp_space(
5556
n_clusters=_kmeans_n_clusters(name_func("n_clusters")) if n_clusters is None else n_clusters,
5657
init=_kmeans_init(name_func("init")) if init is None else init,
5758
verbose=verbose,
58-
random_state=_kmeans_random_state(name_func("random_state")) if random_state is None else random_state
59+
random_state=_kmeans_random_state(name_func("random_state")) if random_state is None else random_state,
60+
**kwargs
5961
)
6062
return hp_space
6163

hpsklearn/components/compose/_target.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def transformed_target_regressor(name: str,
1313
transformer: object = None,
1414
func: callable = None,
1515
inverse_func: callable = None,
16-
check_inverse: bool = True):
16+
check_inverse: bool = True,
17+
**kwargs):
1718
"""
1819
Return a pyll graph with hyperparameters that will construct
1920
a sklearn.compose.TransformedTargetRegressor model.
@@ -36,6 +37,7 @@ def _name(msg):
3637
transformer=transformer,
3738
func=func,
3839
inverse_func=inverse_func,
39-
check_inverse=check_inverse
40+
check_inverse=check_inverse,
41+
**kwargs
4042
)
4143
return scope.sklearn_TransformedTargetRegressor(**hp_space)

hpsklearn/components/covariance/_elliptic_envelope.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ def elliptic_envelope(name: str,
1616
assume_centered: bool = False,
1717
support_fraction: typing.Union[float, Apply] = None,
1818
contamination: typing.Union[float, Apply] = 0.1,
19-
random_state=None):
19+
random_state=None,
20+
**kwargs):
2021
"""
2122
Return a pyll graph with hyperparameters that will construct
2223
a sklearn.covariance.EllipticEnvelope model.
@@ -39,6 +40,7 @@ def _name(msg):
3940
support_fraction=hp.uniform(_name("support_fraction"), 0.05, 0.95)
4041
if support_fraction is None else support_fraction,
4142
contamination=hp.uniform(_name("contamination"), 0.0, 0.3) if contamination is None else contamination,
42-
random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state
43+
random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state,
44+
**kwargs
4345
)
4446
return scope.sklearn_EllipticEnvelope(**hp_space)

hpsklearn/components/cross_decomposition/_pls.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ def _pls_hp_space(
5151
scale: bool = True,
5252
max_iter: typing.Union[int, Apply] = None,
5353
tol: typing.Union[float, Apply] = None,
54-
copy: bool = True
54+
copy: bool = True,
55+
**kwargs
5556
):
5657
"""
5758
Hyper parameter search space for
@@ -64,7 +65,8 @@ def _pls_hp_space(
6465
scale=scale,
6566
max_iter=_pls_max_iter(name_func("max_iter")) if max_iter is None else max_iter,
6667
tol=_pls_tol(name_func("tol")) if tol is None else tol,
67-
copy=copy
68+
copy=copy,
69+
**kwargs
6870
)
6971
return hp_space
7072

hpsklearn/components/discriminant_analysis.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def _discriminant_analysis_hp_space(
3030
name_func,
3131
priors: npt.ArrayLike = None,
3232
store_covariance: bool = False,
33-
tol: float = None
33+
tol: float = None,
34+
**kwargs
3435
):
3536
"""
3637
Common hyper parameter search space
@@ -40,7 +41,8 @@ def _discriminant_analysis_hp_space(
4041
hp_space = dict(
4142
priors=priors,
4243
store_covariance=store_covariance,
43-
tol=_discriminant_analysis_tol(name_func("tol")) if tol is None else tol
44+
tol=_discriminant_analysis_tol(name_func("tol")) if tol is None else tol,
45+
**kwargs
4446
)
4547
return hp_space
4648

hpsklearn/components/dummy.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ def sklearn_DummyRegressor(*args, **kwargs):
2626
def dummy_classifier(name: str,
2727
strategy: typing.Union[str, Apply] = None,
2828
random_state=None,
29-
constant: typing.Union[int, str, npt.ArrayLike] = None):
29+
constant: typing.Union[int, str, npt.ArrayLike] = None,
30+
**kwargs):
3031
"""
3132
Return a pyll graph with hyperparameters that will construct
3233
a sklearn.dummy.DummyClassifier model.
@@ -45,7 +46,8 @@ def _name(msg):
4546
strategy=hp.choice(_name("strategy"), ["stratified", "most_frequent", "prior", "uniform"])
4647
if strategy is None else strategy,
4748
random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state,
48-
constant=constant
49+
constant=constant,
50+
**kwargs
4951
)
5052
return scope.sklearn_DummyClassifier(**hp_space)
5153

@@ -60,7 +62,8 @@ def _name(msg):
6062
def dummy_regressor(name: str,
6163
strategy: typing.Union[str, Apply] = None,
6264
constant: typing.Union[int, str, npt.ArrayLike] = None,
63-
quantile: float = None):
65+
quantile: float = None,
66+
**kwargs):
6467
"""
6568
Return a pyll graph with hyperparameters that will construct
6669
a sklearn.dummy.DummyRegressor model.
@@ -78,6 +81,7 @@ def _name(msg):
7881
hp_space = dict(
7982
strategy=hp.choice(_name("strategy"), ["mean", "median"]) if strategy is None else strategy,
8083
constant=constant,
81-
quantile=quantile
84+
quantile=quantile,
85+
**kwargs
8286
)
8387
return scope.sklearn_DummyRegressor(**hp_space)

hpsklearn/components/ensemble/_bagging.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def _bagging_hp_space(
9393
n_jobs: int = 1,
9494
random_state=None,
9595
verbose: int = False,
96+
**kwargs,
9697
):
9798
"""
9899
Hyper parameter search space for
@@ -112,6 +113,7 @@ def _bagging_hp_space(
112113
n_jobs=n_jobs,
113114
random_state=_bagging_random_state(name_func("random_state")) if random_state is None else random_state,
114115
verbose=verbose,
116+
**kwargs
115117
)
116118
return hp_space
117119

0 commit comments

Comments
 (0)