Skip to content

Commit 121862c

Browse files
committed
DOC add test for Geometric SMOTE (#881)
1 parent 47ee77a commit 121862c

File tree

1 file changed

+209
-0
lines changed

1 file changed

+209
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
"""
2+
Test the geometric_smote module.
3+
"""
4+
5+
from collections import Counter
6+
7+
import pytest
8+
import numpy as np
9+
from numpy.linalg import norm
10+
from sklearn.utils import check_random_state
11+
from sklearn.datasets import make_classification
12+
13+
from ..geometric import _make_geometric_sample, GeometricSMOTE, SELECTION_STRATEGY
14+
15+
RND_SEED = 0
16+
RANDOM_STATE = check_random_state(RND_SEED)
17+
CENTERS = [
18+
RANDOM_STATE.random_sample((2,)),
19+
2.6 * RANDOM_STATE.random_sample((4,)),
20+
3.2 * RANDOM_STATE.random_sample((10,)),
21+
-0.5 * RANDOM_STATE.random_sample((1,)),
22+
]
23+
SURFACE_POINTS = [
24+
RANDOM_STATE.random_sample((2,)),
25+
5.2 * RANDOM_STATE.random_sample((4,)),
26+
-3.5 * RANDOM_STATE.random_sample((10,)),
27+
-10.9 * RANDOM_STATE.random_sample((1,)),
28+
]
29+
TRUNCATION_FACTORS = [-1.0, -0.5, 0.0, 0.5, 1.0]
30+
DEFORMATION_FACTORS = [0.0, 0.25, 0.5, 0.75, 1.0]
31+
32+
33+
@pytest.mark.parametrize(
34+
'center,surface_point',
35+
[
36+
(CENTERS[0], SURFACE_POINTS[0]),
37+
(CENTERS[1], SURFACE_POINTS[1]),
38+
(CENTERS[2], SURFACE_POINTS[2]),
39+
(CENTERS[3], SURFACE_POINTS[3]),
40+
],
41+
)
42+
def test_make_geometric_sample_hypersphere(center, surface_point):
43+
"""Test the generation of points inside a hypersphere."""
44+
point = _make_geometric_sample(center, surface_point, 0.0, 0.0, RANDOM_STATE)
45+
rel_point = point - center
46+
rel_surface_point = surface_point - center
47+
np.testing.assert_array_less(0.0, norm(rel_surface_point) - norm(rel_point))
48+
49+
50+
@pytest.mark.parametrize(
51+
'surface_point,deformation_factor',
52+
[
53+
(np.array([1.0, 0.0]), 0.0),
54+
(2.6 * np.array([0.0, 1.0]), 0.25),
55+
(3.2 * np.array([0.0, 1.0, 0.0, 0.0]), 0.50),
56+
(0.5 * np.array([0.0, 0.0, 1.0]), 0.75),
57+
(6.7 * np.array([0.0, 0.0, 1.0, 0.0, 0.0]), 1.0),
58+
],
59+
)
60+
def test_make_geometric_sample_half_hypersphere(surface_point, deformation_factor):
61+
"""Test the generation of points inside a hypersphere."""
62+
center = np.zeros(surface_point.shape)
63+
point = _make_geometric_sample(
64+
center, surface_point, 1.0, deformation_factor, RANDOM_STATE
65+
)
66+
np.testing.assert_array_less(0.0, norm(surface_point) - norm(point))
67+
np.testing.assert_array_less(0.0, np.dot(point, surface_point))
68+
69+
70+
@pytest.mark.parametrize(
71+
'center,surface_point,truncation_factor',
72+
[
73+
(center, surface_point, truncation_factor)
74+
for center, surface_point in zip(CENTERS, SURFACE_POINTS)
75+
for truncation_factor in TRUNCATION_FACTORS
76+
],
77+
)
78+
def test_make_geometric_sample_line_segment(center, surface_point, truncation_factor):
79+
"""Test the generation of points on a line segment."""
80+
point = _make_geometric_sample(
81+
center, surface_point, truncation_factor, 1.0, RANDOM_STATE
82+
)
83+
rel_point = point - center
84+
rel_surface_point = surface_point - center
85+
dot_product = np.dot(rel_point, rel_surface_point)
86+
norms_product = norm(rel_point) * norm(rel_surface_point)
87+
np.testing.assert_array_less(0.0, norm(rel_surface_point) - norm(rel_point))
88+
dot_product = (
89+
np.abs(dot_product) if truncation_factor == 0.0 else (-1) * dot_product
90+
)
91+
np.testing.assert_allclose(np.abs(dot_product) / norms_product, 1.0)
92+
93+
94+
def test_gsmote_default_init():
95+
"""Test the intialization with default parameters."""
96+
gsmote = GeometricSMOTE()
97+
assert gsmote.sampling_strategy == 'auto'
98+
assert gsmote.random_state is None
99+
assert gsmote.truncation_factor == 1.0
100+
assert gsmote.deformation_factor == 0.0
101+
assert gsmote.selection_strategy == 'combined'
102+
assert gsmote.k_neighbors == 5
103+
assert gsmote.n_jobs == 1
104+
105+
106+
def test_gsmote_fit():
107+
"""Test fit method."""
108+
n_samples, weights = 200, [0.6, 0.4]
109+
X, y = make_classification(
110+
random_state=RND_SEED, n_samples=n_samples, weights=weights
111+
)
112+
gsmote = GeometricSMOTE(random_state=RANDOM_STATE).fit(X, y)
113+
assert gsmote.sampling_strategy_ == {1: 40}
114+
115+
116+
def test_gsmote_invalid_selection_strategy():
117+
"""Test invalid selection strategy."""
118+
n_samples, weights = 200, [0.6, 0.4]
119+
X, y = make_classification(
120+
random_state=RND_SEED, n_samples=n_samples, weights=weights
121+
)
122+
gsmote = GeometricSMOTE(random_state=RANDOM_STATE, selection_strategy='Minority')
123+
with pytest.raises(ValueError):
124+
gsmote.fit_resample(X, y)
125+
126+
127+
@pytest.mark.parametrize('selection_strategy', ['combined', 'minority', 'majority'])
128+
def test_gsmote_nn(selection_strategy):
129+
"""Test nearest neighbors object."""
130+
n_samples, weights = 200, [0.6, 0.4]
131+
X, y = make_classification(
132+
random_state=RND_SEED, n_samples=n_samples, weights=weights
133+
)
134+
gsmote = GeometricSMOTE(
135+
random_state=RANDOM_STATE, selection_strategy=selection_strategy
136+
)
137+
_ = gsmote.fit_resample(X, y)
138+
if selection_strategy in ('minority', 'combined'):
139+
assert gsmote.nns_pos_.n_neighbors == gsmote.k_neighbors + 1
140+
if selection_strategy in ('majority', 'combined'):
141+
assert gsmote.nn_neg_.n_neighbors == 1
142+
143+
144+
@pytest.mark.parametrize(
145+
'selection_strategy, truncation_factor, deformation_factor',
146+
[
147+
(selection_strategy, truncation_factor, deformation_factor)
148+
for selection_strategy in SELECTION_STRATEGY
149+
for truncation_factor in TRUNCATION_FACTORS
150+
for deformation_factor in DEFORMATION_FACTORS
151+
],
152+
)
153+
def test_gsmote_fit_resample_binary(
154+
selection_strategy, truncation_factor, deformation_factor
155+
):
156+
"""Test fit and sample for binary class case."""
157+
n_maj, n_min, step, min_coor, max_coor = 12, 5, 0.5, 0.0, 8.5
158+
X = np.repeat(np.arange(min_coor, max_coor, step), 2).reshape(-1, 2)
159+
y = np.concatenate([np.repeat(0, n_maj), np.repeat(1, n_min)])
160+
radius = np.sqrt(0.5) * step
161+
k_neighbors = 1
162+
gsmote = GeometricSMOTE(
163+
'auto',
164+
RANDOM_STATE,
165+
truncation_factor,
166+
deformation_factor,
167+
selection_strategy,
168+
k_neighbors,
169+
)
170+
X_resampled, y_resampled = gsmote.fit_resample(X, y)
171+
assert gsmote.sampling_strategy_ == {1: (n_maj - n_min)}
172+
assert y_resampled.sum() == n_maj
173+
np.testing.assert_array_less(X[n_maj - 1] - radius, X_resampled[n_maj + n_min])
174+
175+
176+
@pytest.mark.parametrize(
177+
'selection_strategy, truncation_factor, deformation_factor',
178+
[
179+
(selection_strategy, truncation_factor, deformation_factor)
180+
for selection_strategy in SELECTION_STRATEGY
181+
for truncation_factor in TRUNCATION_FACTORS
182+
for deformation_factor in DEFORMATION_FACTORS
183+
],
184+
)
185+
def test_gsmote_fit_resample_multiclass(
186+
selection_strategy, truncation_factor, deformation_factor
187+
):
188+
"""Test fit and sample for multiclass case."""
189+
n_samples, weights = 100, [0.75, 0.15, 0.10]
190+
X, y = make_classification(
191+
random_state=RND_SEED,
192+
n_samples=n_samples,
193+
weights=weights,
194+
n_classes=3,
195+
n_informative=5,
196+
)
197+
k_neighbors, majority_label = 1, 0
198+
gsmote = GeometricSMOTE(
199+
'auto',
200+
RANDOM_STATE,
201+
truncation_factor,
202+
deformation_factor,
203+
selection_strategy,
204+
k_neighbors,
205+
)
206+
_, y_resampled = gsmote.fit_resample(X, y)
207+
assert majority_label not in gsmote.sampling_strategy_.keys()
208+
np.testing.assert_array_equal(np.unique(y), np.unique(y_resampled))
209+
assert len(set(Counter(y_resampled).values())) == 1

0 commit comments

Comments
 (0)