Skip to content

Commit 49a5692

Browse files
authored
[MRG] Address issue #113 - Create toy example for testing (#118)
* Create toy and remove npy for RUS * Create toy and remove npy for ROS * Change the testing for CC * Add test for ENN * Add toy testing for CNN * Make toy testing for IHT * Add toy testing for NM * Add toy testing for OSS * Add toy testing for TL * Toy testing for SMOTE * Toy testing for ADASYN * Toy testing for SMOTEENN * Toy testing for SMOTETomek * Toy testing for BC * Toy testing for EE * Toy testing for RENN * Toy testing for AllKNN * Toy testing for NCL * Remove the unecessary files * Change the kernel of SVM to be linear * Improve the testing of IHT * Python 3 compatibility dict and some smells correction * Fix the ratio type issue
1 parent 1660c23 commit 49a5692

File tree

154 files changed

+1819
-615
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

154 files changed

+1819
-615
lines changed

.coveragerc

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@ exclude_lines =
1717
raise NotImplementedError
1818
if 0:
1919
if __name__ == .__main__.:
20-
if self.verbose:
20+
if self.verbose:
21+
show_missing = True

examples/datasets/plot_make_imbalance.py

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99

1010
print(__doc__)
1111

12-
import numpy as np
13-
1412
import matplotlib.pyplot as plt
1513
import seaborn as sns
1614
sns.set()
-1.36 MB
Binary file not shown.
Binary file not shown.
-69.9 KB
Binary file not shown.
Binary file not shown.
-1.37 MB
Binary file not shown.
Binary file not shown.
-70.4 KB
Binary file not shown.
Binary file not shown.

imblearn/combine/tests/test_smote_enn.py

+58-17
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from numpy.testing import assert_raises
88
from numpy.testing import assert_equal
99
from numpy.testing import assert_array_equal
10+
from numpy.testing import assert_array_almost_equal
1011
from numpy.testing import assert_warns
1112

1213
from sklearn.datasets import make_classification
@@ -16,10 +17,27 @@
1617

1718
# Generate a global dataset to use
1819
RND_SEED = 0
19-
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
20-
n_informative=3, n_redundant=1, flip_y=0,
21-
n_features=20, n_clusters_per_class=1,
22-
n_samples=5000, random_state=RND_SEED)
20+
X = np.array([[0.11622591, -0.0317206],
21+
[0.77481731, 0.60935141],
22+
[1.25192108, -0.22367336],
23+
[0.53366841, -0.30312976],
24+
[1.52091956, -0.49283504],
25+
[-0.28162401, -2.10400981],
26+
[0.83680821, 1.72827342],
27+
[0.3084254, 0.33299982],
28+
[0.70472253, -0.73309052],
29+
[0.28893132, -0.38761769],
30+
[1.15514042, 0.0129463],
31+
[0.88407872, 0.35454207],
32+
[1.31301027, -0.92648734],
33+
[-1.11515198, -0.93689695],
34+
[-0.18410027, -0.45194484],
35+
[0.9281014, 0.53085498],
36+
[-0.14374509, 0.27370049],
37+
[-0.41635887, -0.38299653],
38+
[0.08711622, 0.93259929],
39+
[1.70580611, -0.11219234]])
40+
Y = np.array([0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0])
2341

2442

2543
def test_senn_sk_estimator():
@@ -74,8 +92,8 @@ def test_smote_fit():
7492
# Check if the data information have been computed
7593
assert_equal(smote.min_c_, 0)
7694
assert_equal(smote.maj_c_, 1)
77-
assert_equal(smote.stats_c_[0], 500)
78-
assert_equal(smote.stats_c_[1], 4500)
95+
assert_equal(smote.stats_c_[0], 8)
96+
assert_equal(smote.stats_c_[1], 12)
7997

8098

8199
def test_smote_sample_wt_fit():
@@ -97,28 +115,51 @@ def test_sample_regular():
97115

98116
X_resampled, y_resampled = smote.fit_sample(X, Y)
99117

100-
currdir = os.path.dirname(os.path.abspath(__file__))
101-
X_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_x.npy'))
102-
y_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_y.npy'))
103-
assert_array_equal(X_resampled, X_gt)
118+
X_gt = np.array([[0.11622591, -0.0317206],
119+
[1.25192108, -0.22367336],
120+
[0.53366841, -0.30312976],
121+
[1.52091956, -0.49283504],
122+
[0.88407872, 0.35454207],
123+
[1.31301027, -0.92648734],
124+
[-0.41635887, -0.38299653],
125+
[1.70580611, -0.11219234],
126+
[0.29307743, -0.14670439],
127+
[0.84976473, -0.15570176],
128+
[0.61319159, -0.11571668],
129+
[0.66052536, -0.28246517],
130+
[-0.28162401, -2.10400981],
131+
[0.83680821, 1.72827342],
132+
[0.08711622, 0.93259929]])
133+
y_gt = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1])
134+
assert_array_almost_equal(X_resampled, X_gt)
104135
assert_array_equal(y_resampled, y_gt)
105136

106137

107138
def test_sample_regular_half():
108139
"""Test sample function with regular SMOTE and a ratio of 0.5."""
109140

110141
# Create the object
111-
ratio = 0.5
142+
ratio = 0.8
112143
smote = SMOTEENN(ratio=ratio, random_state=RND_SEED)
113144
# Fit the data
114145
smote.fit(X, Y)
115146

116147
X_resampled, y_resampled = smote.fit_sample(X, Y)
117148

118-
currdir = os.path.dirname(os.path.abspath(__file__))
119-
X_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_x_05.npy'))
120-
y_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_y_05.npy'))
121-
assert_array_equal(X_resampled, X_gt)
149+
X_gt = np.array([[0.11622591, -0.0317206],
150+
[1.25192108, -0.22367336],
151+
[0.53366841, -0.30312976],
152+
[1.52091956, -0.49283504],
153+
[0.88407872, 0.35454207],
154+
[1.31301027, -0.92648734],
155+
[-0.41635887, -0.38299653],
156+
[1.70580611, -0.11219234],
157+
[0.36784496, -0.1953161],
158+
[-0.28162401, -2.10400981],
159+
[0.83680821, 1.72827342],
160+
[0.08711622, 0.93259929]])
161+
y_gt = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1])
162+
assert_array_almost_equal(X_resampled, X_gt)
122163
assert_array_equal(y_resampled, y_gt)
123164

124165

@@ -138,11 +179,11 @@ def test_senn_multiclass_error():
138179
type. """
139180

140181
# continuous case
141-
y = np.linspace(0, 1, 5000)
182+
y = np.linspace(0, 1, 20)
142183
sm = SMOTEENN(random_state=RND_SEED)
143184
assert_warns(UserWarning, sm.fit, X, y)
144185

145186
# multiclass case
146-
y = np.array([0] * 2000 + [1] * 2000 + [2] * 1000)
187+
y = np.array([0] * 3 + [1] * 2 + [2] * 15)
147188
sm = SMOTEENN(random_state=RND_SEED)
148189
assert_warns(UserWarning, sm.fit, X, y)

imblearn/combine/tests/test_smote_tomek.py

+69-17
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from numpy.testing import assert_raises
88
from numpy.testing import assert_equal
99
from numpy.testing import assert_array_equal
10+
from numpy.testing import assert_array_almost_equal
1011
from numpy.testing import assert_warns
1112

1213
from sklearn.datasets import make_classification
@@ -16,10 +17,27 @@
1617

1718
# Generate a global dataset to use
1819
RND_SEED = 0
19-
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
20-
n_informative=3, n_redundant=1, flip_y=0,
21-
n_features=20, n_clusters_per_class=1,
22-
n_samples=5000, random_state=RND_SEED)
20+
X = np.array([[0.20622591, 0.0582794],
21+
[0.68481731, 0.51935141],
22+
[1.34192108, -0.13367336],
23+
[0.62366841, -0.21312976],
24+
[1.61091956, -0.40283504],
25+
[-0.37162401, -2.19400981],
26+
[0.74680821, 1.63827342],
27+
[0.2184254, 0.24299982],
28+
[0.61472253, -0.82309052],
29+
[0.19893132, -0.47761769],
30+
[1.06514042, -0.0770537],
31+
[0.97407872, 0.44454207],
32+
[1.40301027, -0.83648734],
33+
[-1.20515198, -1.02689695],
34+
[-0.27410027, -0.54194484],
35+
[0.8381014, 0.44085498],
36+
[-0.23374509, 0.18370049],
37+
[-0.32635887, -0.29299653],
38+
[-0.00288378, 0.84259929],
39+
[1.79580611, -0.02219234]])
40+
Y = np.array([0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0])
2341

2442

2543
def test_smote_sk_estimator():
@@ -74,8 +92,8 @@ def test_smote_fit():
7492
# Check if the data information have been computed
7593
assert_equal(smote.min_c_, 0)
7694
assert_equal(smote.maj_c_, 1)
77-
assert_equal(smote.stats_c_[0], 500)
78-
assert_equal(smote.stats_c_[1], 4500)
95+
assert_equal(smote.stats_c_[0], 8)
96+
assert_equal(smote.stats_c_[1], 12)
7997

8098

8199
def test_smote_sample_wt_fit():
@@ -97,28 +115,62 @@ def test_sample_regular():
97115

98116
X_resampled, y_resampled = smote.fit_sample(X, Y)
99117

100-
currdir = os.path.dirname(os.path.abspath(__file__))
101-
X_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_x.npy'))
102-
y_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_y.npy'))
103-
assert_array_equal(X_resampled, X_gt)
118+
X_gt = np.array([[0.20622591, 0.0582794],
119+
[0.68481731, 0.51935141],
120+
[1.34192108, -0.13367336],
121+
[0.62366841, -0.21312976],
122+
[1.61091956, -0.40283504],
123+
[-0.37162401, -2.19400981],
124+
[0.74680821, 1.63827342],
125+
[0.61472253, -0.82309052],
126+
[0.19893132, -0.47761769],
127+
[0.97407872, 0.44454207],
128+
[1.40301027, -0.83648734],
129+
[-1.20515198, -1.02689695],
130+
[-0.23374509, 0.18370049],
131+
[-0.32635887, -0.29299653],
132+
[-0.00288378, 0.84259929],
133+
[1.79580611, -0.02219234],
134+
[0.38307743, -0.05670439],
135+
[0.93976473, -0.06570176],
136+
[0.70319159, -0.02571668],
137+
[0.75052536, -0.19246517]])
138+
y_gt = np.array([0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
139+
0])
140+
assert_array_almost_equal(X_resampled, X_gt)
104141
assert_array_equal(y_resampled, y_gt)
105142

106143

107144
def test_sample_regular_half():
108145
"""Test sample function with regular SMOTE and a ratio of 0.5."""
109146

110147
# Create the object
111-
ratio = 0.5
148+
ratio = 0.8
112149
smote = SMOTETomek(ratio=ratio, random_state=RND_SEED)
113150
# Fit the data
114151
smote.fit(X, Y)
115152

116153
X_resampled, y_resampled = smote.fit_sample(X, Y)
117154

118-
currdir = os.path.dirname(os.path.abspath(__file__))
119-
X_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_x_05.npy'))
120-
y_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_y_05.npy'))
121-
assert_array_equal(X_resampled, X_gt)
155+
X_gt = np.array([[0.20622591, 0.0582794],
156+
[0.68481731, 0.51935141],
157+
[1.34192108, -0.13367336],
158+
[0.62366841, -0.21312976],
159+
[1.61091956, -0.40283504],
160+
[-0.37162401, -2.19400981],
161+
[0.74680821, 1.63827342],
162+
[0.61472253, -0.82309052],
163+
[0.19893132, -0.47761769],
164+
[0.97407872, 0.44454207],
165+
[1.40301027, -0.83648734],
166+
[-1.20515198, -1.02689695],
167+
[-0.23374509, 0.18370049],
168+
[-0.32635887, -0.29299653],
169+
[-0.00288378, 0.84259929],
170+
[1.79580611, -0.02219234],
171+
[0.45784496, -0.1053161]])
172+
y_gt = np.array([0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0])
173+
assert_array_almost_equal(X_resampled, X_gt)
122174
assert_array_equal(y_resampled, y_gt)
123175

124176

@@ -138,11 +190,11 @@ def test_multiclass_error():
138190
type. """
139191

140192
# continuous case
141-
y = np.linspace(0, 1, 5000)
193+
y = np.linspace(0, 1, 20)
142194
sm = SMOTETomek(random_state=RND_SEED)
143195
assert_warns(UserWarning, sm.fit, X, y)
144196

145197
# multiclass case
146-
y = np.array([0] * 2000 + [1] * 2000 + [2] * 1000)
198+
y = np.array([0] * 3 + [1] * 2 + [2] * 15)
147199
sm = SMOTETomek(random_state=RND_SEED)
148200
assert_warns(UserWarning, sm.fit, X, y)

imblearn/datasets/imbalance.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,13 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
4949
The corresponding label of `X_resampled`
5050
5151
"""
52-
if ratio <= 0.0 or ratio >= 1.0:
53-
raise ValueError('ratio value must be such that 0.0 < ratio < 1.0')
52+
if isinstance(ratio, float):
53+
if ratio > 1:
54+
raise ValueError('Ration cannot be greater than one.')
55+
elif ratio <= 0:
56+
raise ValueError('Ratio cannot be negative.')
57+
else:
58+
raise ValueError('Ratio must be a float between 0.0 < ratio < 1.0')
5459

5560
X, y = check_X_y(X, y)
5661

-75.4 KB
Binary file not shown.
-74.9 KB
Binary file not shown.
-75 KB
Binary file not shown.
-74.9 KB
Binary file not shown.
-51.3 KB
Binary file not shown.
-75.1 KB
Binary file not shown.
-74.8 KB
Binary file not shown.

imblearn/ensemble/tests/data/bc_x.npy

-1.46 MB
Binary file not shown.
-1.07 MB
Binary file not shown.
-1.45 MB
Binary file not shown.
-1.45 MB
Binary file not shown.
-1.45 MB
Binary file not shown.
-1020 KB
Binary file not shown.
-1.46 MB
Binary file not shown.
-1.45 MB
Binary file not shown.

imblearn/ensemble/tests/data/bc_y.npy

-75.4 KB
Binary file not shown.
-55.3 KB
Binary file not shown.
-74.9 KB
Binary file not shown.
-75 KB
Binary file not shown.
-74.9 KB
Binary file not shown.
-51.3 KB
Binary file not shown.
-75.1 KB
Binary file not shown.
-74.8 KB
Binary file not shown.
-78.2 KB
Binary file not shown.

imblearn/ensemble/tests/data/ee_x.npy

-1.53 MB
Binary file not shown.
-2.29 MB
Binary file not shown.

imblearn/ensemble/tests/data/ee_y.npy

-78.2 KB
Binary file not shown.
-117 KB
Binary file not shown.

0 commit comments

Comments
 (0)