Skip to content

Commit f407976

Browse files
authored
MNT Compatibility with sklearn 1.0 (#864)
1 parent edf6eae commit f407976

30 files changed

+252
-133
lines changed

.pep8speaks.yml

-10
This file was deleted.

.pre-commit-config.yaml

+5-8
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
11
repos:
2-
- repo: https://github.com/python/black
3-
rev: stable
4-
hooks:
5-
- id: black
62
- repo: https://github.com/pre-commit/pre-commit-hooks
73
rev: v2.3.0
84
hooks:
95
- id: check-yaml
106
- id: end-of-file-fixer
117
- id: trailing-whitespace
8+
- repo: https://github.com/psf/black
9+
rev: 21.6b0
10+
hooks:
11+
- id: black
1212
- repo: https://gitlab.com/pycqa/flake8
13-
rev: 3.7.8
13+
rev: 3.9.2
1414
hooks:
1515
- id: flake8
1616
types: [file, python]
17-
# only check for unused imports for now, as long as
18-
# the code is not fully PEP8 compatible
19-
args: [--select=F401]

azure-pipelines.yml

+19-12
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,26 @@ jobs:
33
- job: linting
44
displayName: Linting
55
pool:
6-
vmImage: ubuntu-18.04
6+
vmImage: ubuntu-20.04
77
steps:
8-
- bash: echo "##vso[task.prependpath]$CONDA/bin"
9-
displayName: Add conda to PATH
10-
- bash: sudo chown -R $USER $CONDA
11-
displayName: Take ownership of conda installation
12-
- bash: conda create --name flake8_env --yes flake8
13-
displayName: Install flake8
8+
- task: UsePythonVersion@0
9+
inputs:
10+
versionSpec: '3.9'
11+
- bash: |
12+
# Include pytest compatibility with mypy
13+
pip install pytest flake8 mypy==0.782 black==21.6b0
14+
displayName: Install linters
15+
- bash: |
16+
black --check .
17+
displayName: Run black
1418
- bash: |
15-
source activate flake8_env
1619
./build_tools/circle/linting.sh
1720
displayName: Run linting
1821
1922
- template: build_tools/azure/posix.yml
2023
parameters:
2124
name: Linux_Runs
22-
vmImage: ubuntu-18.04
25+
vmImage: ubuntu-20.04
2326
matrix:
2427
pylatest_pip_openblas_pandas:
2528
DISTRIB: 'conda-pip-latest'
@@ -33,15 +36,14 @@ jobs:
3336
- template: build_tools/azure/posix.yml
3437
parameters:
3538
name: Linux
36-
vmImage: ubuntu-18.04
39+
vmImage: ubuntu-20.04
3740
dependsOn: [linting]
3841
matrix:
3942
# Linux environment to test that scikit-learn can be built against
4043
# versions of numpy, scipy with ATLAS that comes with Ubuntu Bionic 18.04
4144
# i.e. numpy 1.13.3 and scipy 0.19
4245
py36_ubuntu_atlas:
4346
DISTRIB: 'ubuntu'
44-
PYTHON_VERSION: '3.6'
4547
JOBLIB_VERSION: '*'
4648
pylatest_conda_pandas_keras:
4749
DISTRIB: 'conda'
@@ -61,11 +63,16 @@ jobs:
6163
TENSORFLOW_VERSION: '*'
6264
COVERAGE: 'true'
6365
TEST_DOCSTRINGS: 'true'
66+
sklearn_0_24_conda:
67+
DISTRIB: 'conda'
68+
PYTHON_VERSION: '3.7'
69+
SKLEARN_VERSION: '0.24.2'
70+
INSTALL_MKL: 'true'
6471

6572
- template: build_tools/azure/posix-32.yml
6673
parameters:
6774
name: Linux32
68-
vmImage: ubuntu-18.04
75+
vmImage: ubuntu-20.04
6976
dependsOn: [linting]
7077
matrix:
7178
py36_ubuntu_atlas_32bit:

build_tools/azure/install.sh

+6-1
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,15 @@ if [[ "$DISTRIB" == "conda" ]]; then
3232
fi
3333

3434
make_conda $TO_INSTALL
35-
python -m pip install scikit-learn
3635

3736
TO_INSTALL=""
3837

38+
if [[ -n "$SKLEARN_VERSION" ]]; then
39+
TO_INSTALL="$TO_INSTALL scikit-learn=$SKLEARN_VERSION"
40+
else
41+
TO_INSTALL="$TO_INSTALL scikit-learn"
42+
fi
43+
3944
if [[ -n "$PANDAS_VERSION" ]]; then
4045
TO_INSTALL="$TO_INSTALL pandas=$PANDAS_VERSION"
4146
fi

doc/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ The imbalanced-learn package requires the following dependencies:
1212
* python (>=3.6)
1313
* numpy (>=1.13.3)
1414
* scipy (>=0.19.1)
15-
* scikit-learn (>=0.23)
15+
* scikit-learn (>=0.24)
1616
* keras 2 (optional)
1717
* tensorflow (optional)
1818

doc/sphinxext/sphinx_issues.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,11 @@ class IssueRole(object):
8080
EXTERNAL_REPO_REGEX = re.compile(r"^(\w+)/(.+)([#@])([\w]+)$")
8181

8282
def __init__(
83-
self, uri_config_option, format_kwarg, github_uri_template, format_text=None,
83+
self,
84+
uri_config_option,
85+
format_kwarg,
86+
github_uri_template,
87+
format_text=None,
8488
):
8589
self.uri_config_option = uri_config_option
8690
self.format_kwarg = format_kwarg
@@ -103,7 +107,9 @@ def make_node(self, name, issue_no, config, options=None):
103107
)
104108
path = name_map.get(name)
105109
ref = "https://github.com/{issues_github_path}/{path}/{n}".format(
106-
issues_github_path="{}/{}".format(username, repo), path=path, n=issue,
110+
issues_github_path="{}/{}".format(username, repo),
111+
path=path,
112+
n=issue,
107113
)
108114
formatted_issue = self.format_text(issue).lstrip("#")
109115
text = "{username}/{repo}{symbol}{formatted_issue}".format(**locals())

doc/whats_new/v0.8.rst

+13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
.. _changes_0_8:
22

3+
Version 0.8.1
4+
=============
5+
6+
**In development**
7+
8+
Changelog
9+
10+
Maintenance
11+
...........
12+
13+
- Make `imbalanced-learn` compatible with `scikit-learn` 1.0.
14+
:pr:`864` by :user:`Guillaume Lemaitre <glemaitre>`.
15+
316
Version 0.8.0
417
=============
518

imblearn/base.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ def fit_resample(self, X, y):
8282

8383
output = self._fit_resample(X, y)
8484

85-
y_ = label_binarize(output[1], np.unique(y)) if binarize_y else output[1]
85+
y_ = (
86+
label_binarize(output[1], classes=np.unique(y)) if binarize_y else output[1]
87+
)
8688

8789
X_, y_ = arrays_transformer.transform(output[0], y_)
8890
return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
@@ -284,7 +286,11 @@ def fit_resample(self, X, y):
284286

285287
if self.validate:
286288

287-
y_ = label_binarize(output[1], np.unique(y)) if binarize_y else output[1]
289+
y_ = (
290+
label_binarize(output[1], classes=np.unique(y))
291+
if binarize_y
292+
else output[1]
293+
)
288294
X_, y_ = arrays_transformer.transform(output[0], y_)
289295
return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
290296

imblearn/ensemble/_forest.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -422,15 +422,13 @@ def fit(self, X, y, sample_weight=None):
422422
)
423423
if sample_weight is not None:
424424
sample_weight = _check_sample_weight(sample_weight, X)
425+
self._n_features = X.shape[1]
425426

426427
if issparse(X):
427428
# Pre-sort indices to avoid that each individual tree of the
428429
# ensemble sorts the indices.
429430
X.sort_indices()
430431

431-
# Remap output
432-
_, self.n_features_ = X.shape
433-
434432
y = np.atleast_1d(y)
435433
if y.ndim == 2 and y.shape[1] == 1:
436434
warn(
@@ -627,5 +625,13 @@ def _set_oob_score(self, X, y):
627625

628626
self.oob_score_ = oob_score / self.n_outputs_
629627

628+
@property
629+
def n_features_(self):
630+
"""Number of features when fitting the estimator."""
631+
return getattr(self.n_features_in_, self._n_features)
632+
630633
def _more_tags(self):
631-
return {"multioutput": False}
634+
return {
635+
"multioutput": False,
636+
"multilabel": False,
637+
}

imblearn/ensemble/tests/test_easy_ensemble.py

+30-8
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@
4848
def test_easy_ensemble_classifier(n_estimators, base_estimator):
4949
# Check classification for various parameter settings.
5050
X, y = make_imbalance(
51-
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
51+
iris.data,
52+
iris.target,
53+
sampling_strategy={0: 20, 1: 25, 2: 50},
54+
random_state=0,
5255
)
5356
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
5457

@@ -72,7 +75,10 @@ def test_easy_ensemble_classifier(n_estimators, base_estimator):
7275
def test_base_estimator():
7376
# Check base_estimator and its default values.
7477
X, y = make_imbalance(
75-
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
78+
iris.data,
79+
iris.target,
80+
sampling_strategy={0: 20, 1: 25, 2: 50},
81+
random_state=0,
7682
)
7783
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
7884

@@ -91,7 +97,10 @@ def test_base_estimator():
9197

9298
def test_bagging_with_pipeline():
9399
X, y = make_imbalance(
94-
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
100+
iris.data,
101+
iris.target,
102+
sampling_strategy={0: 20, 1: 25, 2: 50},
103+
random_state=0,
95104
)
96105
estimator = EasyEnsembleClassifier(
97106
n_estimators=2,
@@ -109,7 +118,9 @@ def test_warm_start(random_state=42):
109118
for n_estimators in [5, 10]:
110119
if clf_ws is None:
111120
clf_ws = EasyEnsembleClassifier(
112-
n_estimators=n_estimators, random_state=random_state, warm_start=True,
121+
n_estimators=n_estimators,
122+
random_state=random_state,
123+
warm_start=True,
113124
)
114125
else:
115126
clf_ws.set_params(n_estimators=n_estimators)
@@ -182,7 +193,10 @@ def test_warm_start_equivalence():
182193
)
183194
def test_easy_ensemble_classifier_error(n_estimators, msg_error):
184195
X, y = make_imbalance(
185-
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
196+
iris.data,
197+
iris.target,
198+
sampling_strategy={0: 20, 1: 25, 2: 50},
199+
random_state=0,
186200
)
187201
with pytest.raises(ValueError, match=msg_error):
188202
eec = EasyEnsembleClassifier(n_estimators=n_estimators)
@@ -191,7 +205,10 @@ def test_easy_ensemble_classifier_error(n_estimators, msg_error):
191205

192206
def test_easy_ensemble_classifier_single_estimator():
193207
X, y = make_imbalance(
194-
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
208+
iris.data,
209+
iris.target,
210+
sampling_strategy={0: 20, 1: 25, 2: 50},
211+
random_state=0,
195212
)
196213
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
197214

@@ -205,14 +222,19 @@ def test_easy_ensemble_classifier_single_estimator():
205222

206223
def test_easy_ensemble_classifier_grid_search():
207224
X, y = make_imbalance(
208-
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
225+
iris.data,
226+
iris.target,
227+
sampling_strategy={0: 20, 1: 25, 2: 50},
228+
random_state=0,
209229
)
210230

211231
parameters = {
212232
"n_estimators": [1, 2],
213233
"base_estimator__n_estimators": [3, 4],
214234
}
215235
grid_search = GridSearchCV(
216-
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()), parameters, cv=5,
236+
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()),
237+
parameters,
238+
cv=5,
217239
)
218240
grid_search.fit(X, y)

imblearn/ensemble/tests/test_forest.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ def imbalanced_dataset():
3232
[
3333
({"n_estimators": "whatever"}, "n_estimators must be an integer"),
3434
({"n_estimators": -100}, "n_estimators must be greater than zero"),
35-
({"bootstrap": False, "oob_score": True}, "Out of bag estimation only",),
35+
(
36+
{"bootstrap": False, "oob_score": True},
37+
"Out of bag estimation only",
38+
),
3639
],
3740
)
3841
def test_balanced_random_forest_error(imbalanced_dataset, forest_params, err_msg):
@@ -105,7 +108,10 @@ def test_balanced_random_forest_oob(imbalanced_dataset):
105108
X, y, random_state=42, stratify=y
106109
)
107110
est = BalancedRandomForestClassifier(
108-
oob_score=True, random_state=0, n_estimators=1000, min_samples_leaf=2,
111+
oob_score=True,
112+
random_state=0,
113+
n_estimators=1000,
114+
min_samples_leaf=2,
109115
)
110116

111117
est.fit(X_train, y_train)
@@ -135,12 +141,16 @@ def test_little_tree_with_small_max_samples():
135141

136142
# First fit with no restriction on max samples
137143
est1 = BalancedRandomForestClassifier(
138-
n_estimators=1, random_state=rng, max_samples=None,
144+
n_estimators=1,
145+
random_state=rng,
146+
max_samples=None,
139147
)
140148

141149
# Second fit with max samples restricted to just 2
142150
est2 = BalancedRandomForestClassifier(
143-
n_estimators=1, random_state=rng, max_samples=2,
151+
n_estimators=1,
152+
random_state=rng,
153+
max_samples=2,
144154
)
145155

146156
est1.fit(X, y)

0 commit comments

Comments
 (0)