Skip to content

Commit 4c24526

Browse files
glemaitrechkoar
authored andcommitted
Address #131 - Replace nonzero by flatnonzero whenever possible (#132)
* Remove nonzero occurence in NCR * Remove nonzero occurence in OSS * Remove nonzero occurence in BC * Remove nonzero occurence in CNN * Replace nonzero occurence in TL * Remove nonzero occurence in IHT * Remove nonzero occurence in ENN
1 parent 6b5c9d9 commit 4c24526

11 files changed

+39
-31
lines changed

imblearn/datasets/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
The :mod:`imblearn.datasets` provides methods to generate
2+
The :mod:`imblearn.datasets` provides methods to generate
33
imbalanced data.
44
"""
55

imblearn/datasets/imbalance.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,3 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
8383
LOGGER.info('Make the dataset imbalanced: %s', Counter(y_resampled))
8484

8585
return X_resampled, y_resampled
86-

imblearn/datasets/tests/test_make_imbalance.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Y = np.zeros(1000)
1515
Y[500:] = 1
1616

17+
1718
def test_make_imbalance_bad_ratio():
1819
"""Test either if an error is raised with bad ratio
1920
argument"""
@@ -46,12 +47,14 @@ def test_make_imbalance_invalid_ratio():
4647
ratio = 0.5
4748
assert_raises(ValueError, make_imbalance, X, y_, ratio)
4849

50+
4951
def test_make_imbalance_single_class():
5052
"""Test either if an error when there is a single class"""
5153
y_ = np.zeros((X.shape[0], ))
5254
ratio = 0.5
5355
assert_raises(ValueError, make_imbalance, X, y_, ratio)
5456

57+
5558
def test_make_imbalance_1():
5659
"""Test make_imbalance"""
5760
X_, y_ = make_imbalance(X, Y, ratio=0.5, min_c_=1)
@@ -60,6 +63,7 @@ def test_make_imbalance_1():
6063
assert_equal(counter[1], 250)
6164
assert(np.all([X_i in X for X_i in X_]))
6265

66+
6367
def test_make_imbalance_2():
6468
"""Test make_imbalance"""
6569
X_, y_ = make_imbalance(X, Y, ratio=0.25, min_c_=1)
@@ -68,6 +72,7 @@ def test_make_imbalance_2():
6872
assert_equal(counter[1], 125)
6973
assert(np.all([X_i in X for X_i in X_]))
7074

75+
7176
def test_make_imbalance_3():
7277
"""Test make_imbalance"""
7378
X_, y_ = make_imbalance(X, Y, ratio=0.1, min_c_=1)
@@ -76,6 +81,7 @@ def test_make_imbalance_3():
7681
assert_equal(counter[1], 50)
7782
assert(np.all([X_i in X for X_i in X_]))
7883

84+
7985
def test_make_imbalance_4():
8086
"""Test make_imbalance"""
8187
X_, y_ = make_imbalance(X, Y, ratio=0.01, min_c_=1)
@@ -84,6 +90,7 @@ def test_make_imbalance_4():
8490
assert_equal(counter[1], 5)
8591
assert(np.all([X_i in X for X_i in X_]))
8692

93+
8794
def test_make_imbalance_5():
8895
"""Test make_imbalance"""
8996
X_, y_ = make_imbalance(X, Y, ratio=0.01, min_c_=0)
@@ -92,6 +99,7 @@ def test_make_imbalance_5():
9299
assert_equal(counter[0], 5)
93100
assert(np.all([X_i in X for X_i in X_]))
94101

102+
95103
def test_make_imbalance_multiclass():
96104
"""Test make_imbalance with multiclass data"""
97105
# Make y to be multiclass

imblearn/ensemble/balance_cascade.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def _sample(self, X, y):
186186
# Keep the indices of the minority class somewhere if we need to
187187
# return them later
188188
if self.return_indices:
189-
idx_min = np.nonzero(y == self.min_c_)[0]
189+
idx_min = np.flatnonzero(y == self.min_c_)
190190

191191
# Condition to initiliase before the search
192192
b_subset_search = True
@@ -208,7 +208,7 @@ def _sample(self, X, y):
208208
# Generate an appropriate number of index to extract
209209
# from the majority class depending of the false classification
210210
# rate of the previous iteration
211-
idx_sel_from_maj = random_state.choice(np.nonzero(b_sel_N)[0],
211+
idx_sel_from_maj = random_state.choice(np.flatnonzero(b_sel_N),
212212
size=num_samples,
213213
replace=False)
214214
idx_sel_from_maj = np.concatenate((idx_mis_class,
@@ -254,8 +254,8 @@ def _sample(self, X, y):
254254
# next round
255255

256256
# Find the misclassified index to keep them for the next round
257-
idx_mis_class = idx_sel_from_maj[np.nonzero(pred_label !=
258-
N_y[idx_sel_from_maj])]
257+
idx_mis_class = idx_sel_from_maj[np.flatnonzero(
258+
pred_label != N_y[idx_sel_from_maj])]
259259
self.logger.debug('Elements misclassified: %s', idx_mis_class)
260260

261261
# Count how many random element will be selected
@@ -275,7 +275,7 @@ def _sample(self, X, y):
275275
if n_subsets == (self.n_max_subset - 1):
276276
b_subset_search = False
277277
# Select the remaining data
278-
idx_sel_from_maj = np.nonzero(b_sel_N)[0]
278+
idx_sel_from_maj = np.flatnonzero(b_sel_N)
279279
idx_sel_from_maj = np.concatenate((idx_mis_class,
280280
idx_sel_from_maj),
281281
axis=0).astype(int)
@@ -305,7 +305,7 @@ def _sample(self, X, y):
305305
if num_samples > np.count_nonzero(b_sel_N):
306306
b_subset_search = False
307307
# Select the remaining data
308-
idx_sel_from_maj = np.nonzero(b_sel_N)[0]
308+
idx_sel_from_maj = np.flatnonzero(b_sel_N)
309309
idx_sel_from_maj = np.concatenate((idx_mis_class,
310310
idx_sel_from_maj),
311311
axis=0).astype(int)

imblearn/under_sampling/condensed_nearest_neighbour.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def _sample(self, X, y):
135135

136136
# If we need to offer support for the indices
137137
if self.return_indices:
138-
idx_under = np.nonzero(y == self.min_c_)[0]
138+
idx_under = np.flatnonzero(y == self.min_c_)
139139

140140
# Loop over the other classes under picking at random
141141
for key in self.stats_c_.keys():
@@ -198,7 +198,7 @@ def _sample(self, X, y):
198198
pred_S_y = knn.predict(S_x)
199199
good_classif_label = np.unique(
200200
np.append(idx_maj_sample,
201-
np.nonzero(pred_S_y == S_y)[0]))
201+
np.flatnonzero(pred_S_y == S_y)))
202202

203203
# Find the misclassified S_y
204204
sel_x = np.squeeze(S_x[idx_maj_sample, :])

imblearn/under_sampling/edited_nearest_neighbours.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def _sample(self, X, y):
141141

142142
# If we need to offer support for the indices
143143
if self.return_indices:
144-
idx_under = np.nonzero(y == self.min_c_)[0]
144+
idx_under = np.flatnonzero(y == self.min_c_)
145145

146146
# Create a k-NN to fit the whole data
147147
nn_obj = NearestNeighbors(n_neighbors=self.size_ngh + 1,
@@ -179,12 +179,12 @@ def _sample(self, X, y):
179179
raise NotImplementedError
180180

181181
# Get the samples which agree all together
182-
sel_x = np.squeeze(sub_samples_x[np.nonzero(nnhood_bool), :])
183-
sel_y = sub_samples_y[np.nonzero(nnhood_bool)]
182+
sel_x = sub_samples_x[np.flatnonzero(nnhood_bool), :]
183+
sel_y = sub_samples_y[np.flatnonzero(nnhood_bool)]
184184

185185
# If we need to offer support for the indices selected
186186
if self.return_indices:
187-
idx_tmp = np.nonzero(y == key)[0][np.nonzero(nnhood_bool)]
187+
idx_tmp = np.flatnonzero(y == key)[np.flatnonzero(nnhood_bool)]
188188
idx_under = np.concatenate((idx_under, idx_tmp), axis=0)
189189

190190
X_resampled = np.concatenate((X_resampled, sel_x), axis=0)

imblearn/under_sampling/instance_hardness_threshold.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def _sample(self, X, y):
210210

211211
# If we need to offer support for the indices
212212
if self.return_indices:
213-
idx_under = np.nonzero(mask)[0]
213+
idx_under = np.flatnonzero(mask)
214214
return X_resampled, y_resampled, idx_under
215215
else:
216216
return X_resampled, y_resampled

imblearn/under_sampling/nearmiss.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def _selection_dist_based(self, X, y, dist_vec, num_samples, key,
183183
sel_idx = sorted_idx[:num_samples]
184184

185185
return (X[y == key][sel_idx], y[y == key][sel_idx],
186-
np.nonzero(y == key)[0][sel_idx])
186+
np.flatnonzero(y == key)[sel_idx])
187187

188188
def _sample(self, X, y):
189189
"""Resample the dataset.
@@ -212,9 +212,9 @@ def _sample(self, X, y):
212212

213213
# Assign the parameter of the element of this class
214214
# Check that the version asked is implemented
215-
if self.version not in [1,2,3]:
216-
raise ValueError("Parameter 'version' must be 1, 2 or 3, "
217-
"got {0}".format(self.version))
215+
if self.version not in [1, 2, 3]:
216+
raise ValueError('Parameter `version` must be 1, 2 or 3, got'
217+
' {}'.format(self.version))
218218

219219
# Start with the minority class
220220
X_min = X[y == self.min_c_]
@@ -232,7 +232,7 @@ def _sample(self, X, y):
232232

233233
# If we need to offer support for the indices
234234
if self.return_indices:
235-
idx_under = np.nonzero(y == self.min_c_)[0]
235+
idx_under = np.flatnonzero(y == self.min_c_)
236236

237237
# For each element of the current class, find the set of NN
238238
# of the minority class

imblearn/under_sampling/neighbourhood_cleaning_rule.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def _sample(self, X, y):
123123

124124
# If we need to offer support for the indices
125125
if self.return_indices:
126-
idx_under = np.nonzero(y == self.min_c_)[0]
126+
idx_under = np.flatnonzero(y == self.min_c_)
127127

128128
# Create a k-NN to fit the whole data
129129
nn_obj = NearestNeighbors(n_neighbors=self.size_ngh,
@@ -140,7 +140,7 @@ def _sample(self, X, y):
140140
sub_samples_x = X[y == key]
141141

142142
# Get the samples associated
143-
idx_sub_sample = np.nonzero(y == key)[0]
143+
idx_sub_sample = np.flatnonzero(y == key)
144144

145145
# Find the NN for the current class
146146
nnhood_idx = nn_obj.kneighbors(sub_samples_x,
@@ -157,7 +157,7 @@ def _sample(self, X, y):
157157
if key == self.min_c_:
158158
# Get the index to exclude
159159
idx_to_exclude += nnhood_idx[np.nonzero(
160-
nnhood_label[np.nonzero(nnhood_bool)])].tolist()
160+
nnhood_label[np.flatnonzero(nnhood_bool)])].tolist()
161161
else:
162162
# Get the index to exclude
163163
idx_to_exclude += idx_sub_sample[np.nonzero(
@@ -173,12 +173,12 @@ def _sample(self, X, y):
173173
sel_idx[y == self.min_c_] = 0
174174

175175
# Get the samples from the majority classes
176-
sel_x = np.squeeze(X[np.nonzero(sel_idx), :])
177-
sel_y = y[np.nonzero(sel_idx)]
176+
sel_x = X[np.flatnonzero(sel_idx), :]
177+
sel_y = y[np.flatnonzero(sel_idx)]
178178

179179
# If we need to offer support for the indices selected
180180
if self.return_indices:
181-
idx_tmp = np.nonzero(sel_idx)[0]
181+
idx_tmp = np.flatnonzero(sel_idx)
182182
idx_under = np.concatenate((idx_under, idx_tmp), axis=0)
183183

184184
X_resampled = np.concatenate((X_resampled, sel_x), axis=0)

imblearn/under_sampling/one_sided_selection.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def _sample(self, X, y):
134134

135135
# If we need to offer support for the indices
136136
if self.return_indices:
137-
idx_under = np.nonzero(y == self.min_c_)[0]
137+
idx_under = np.flatnonzero(y == self.min_c_)
138138

139139
# Loop over the other classes under picking at random
140140
for key in self.stats_c_.keys():
@@ -177,14 +177,15 @@ def _sample(self, X, y):
177177
pred_S_y = knn.predict(S_x)
178178

179179
# Find the misclassified S_y
180-
sel_x = np.squeeze(S_x[np.nonzero(pred_S_y != S_y), :])
181-
sel_y = S_y[np.nonzero(pred_S_y != S_y)]
180+
sel_x = S_x[np.flatnonzero(pred_S_y != S_y), :]
181+
sel_y = S_y[np.flatnonzero(pred_S_y != S_y)]
182182

183183
# If we need to offer support for the indices selected
184184
# We concatenate the misclassified samples with the seed and the
185185
# minority samples
186186
if self.return_indices:
187-
idx_tmp = np.nonzero(y == key)[0][np.nonzero(pred_S_y != S_y)]
187+
idx_tmp = np.flatnonzero(y == key)[
188+
np.flatnonzero(pred_S_y != S_y)]
188189
idx_under = np.concatenate((idx_under,
189190
idx_maj_sample,
190191
idx_tmp),

0 commit comments

Comments
 (0)