Address #131 - Replace nonzero by flatnonzero whenever possible (#132)

glemaitre · chkoar · commit 4c2452673550 · 2016-08-30T14:55:05.000+03:00
* Remove nonzero occurence in NCR
* Remove nonzero occurence in OSS
* Remove nonzero occurence in BC
* Remove nonzero occurence in CNN
* Replace nonzero occurence in TL
* Remove nonzero occurence in IHT
* Remove nonzero occurence in ENN
diff --git a/imblearn/datasets/__init__.py b/imblearn/datasets/__init__.py
@@ -1,5 +1,5 @@
 """
-The :mod:`imblearn.datasets` provides methods to generate 
+The :mod:`imblearn.datasets` provides methods to generate
 imbalanced data.
 """
 
diff --git a/imblearn/datasets/imbalance.py b/imblearn/datasets/imbalance.py
@@ -83,4 +83,3 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
     LOGGER.info('Make the dataset imbalanced: %s', Counter(y_resampled))
 
     return X_resampled, y_resampled
-
diff --git a/imblearn/datasets/tests/test_make_imbalance.py b/imblearn/datasets/tests/test_make_imbalance.py
@@ -14,6 +14,7 @@
 Y = np.zeros(1000)
 Y[500:] = 1
 
+
 def test_make_imbalance_bad_ratio():
     """Test either if an error is raised with bad ratio
     argument"""
@@ -46,12 +47,14 @@ def test_make_imbalance_invalid_ratio():
     ratio = 0.5
     assert_raises(ValueError, make_imbalance, X, y_, ratio)
 
+
 def test_make_imbalance_single_class():
     """Test either if an error when there is a single class"""
     y_ = np.zeros((X.shape[0], ))
     ratio = 0.5
     assert_raises(ValueError, make_imbalance, X, y_, ratio)
 
+
 def test_make_imbalance_1():
     """Test make_imbalance"""
     X_, y_ = make_imbalance(X, Y, ratio=0.5, min_c_=1)
@@ -60,6 +63,7 @@ def test_make_imbalance_1():
     assert_equal(counter[1], 250)
     assert(np.all([X_i in X for X_i in X_]))
 
+
 def test_make_imbalance_2():
     """Test make_imbalance"""
     X_, y_ = make_imbalance(X, Y, ratio=0.25, min_c_=1)
@@ -68,6 +72,7 @@ def test_make_imbalance_2():
     assert_equal(counter[1], 125)
     assert(np.all([X_i in X for X_i in X_]))
 
+
 def test_make_imbalance_3():
     """Test make_imbalance"""
     X_, y_ = make_imbalance(X, Y, ratio=0.1, min_c_=1)
@@ -76,6 +81,7 @@ def test_make_imbalance_3():
     assert_equal(counter[1], 50)
     assert(np.all([X_i in X for X_i in X_]))
 
+
 def test_make_imbalance_4():
     """Test make_imbalance"""
     X_, y_ = make_imbalance(X, Y, ratio=0.01, min_c_=1)
@@ -84,6 +90,7 @@ def test_make_imbalance_4():
     assert_equal(counter[1], 5)
     assert(np.all([X_i in X for X_i in X_]))
 
+
 def test_make_imbalance_5():
     """Test make_imbalance"""
     X_, y_ = make_imbalance(X, Y, ratio=0.01, min_c_=0)
@@ -92,6 +99,7 @@ def test_make_imbalance_5():
     assert_equal(counter[0], 5)
     assert(np.all([X_i in X for X_i in X_]))
 
+
 def test_make_imbalance_multiclass():
     """Test make_imbalance with multiclass data"""
     # Make y to be multiclass
diff --git a/imblearn/ensemble/balance_cascade.py b/imblearn/ensemble/balance_cascade.py
@@ -186,7 +186,7 @@ def _sample(self, X, y):
         # Keep the indices of the minority class somewhere if we need to
         # return them later
         if self.return_indices:
-            idx_min = np.nonzero(y == self.min_c_)[0]
+            idx_min = np.flatnonzero(y == self.min_c_)
 
         # Condition to initiliase before the search
         b_subset_search = True
@@ -208,7 +208,7 @@ def _sample(self, X, y):
             # Generate an appropriate number of index to extract
             # from the majority class depending of the false classification
             # rate of the previous iteration
-            idx_sel_from_maj = random_state.choice(np.nonzero(b_sel_N)[0],
+            idx_sel_from_maj = random_state.choice(np.flatnonzero(b_sel_N),
                                                    size=num_samples,
                                                    replace=False)
             idx_sel_from_maj = np.concatenate((idx_mis_class,
@@ -254,8 +254,8 @@ def _sample(self, X, y):
             # next round
 
             # Find the misclassified index to keep them for the next round
-            idx_mis_class = idx_sel_from_maj[np.nonzero(pred_label !=
-                                                        N_y[idx_sel_from_maj])]
+            idx_mis_class = idx_sel_from_maj[np.flatnonzero(
+                pred_label != N_y[idx_sel_from_maj])]
             self.logger.debug('Elements misclassified: %s', idx_mis_class)
 
             # Count how many random element will be selected
@@ -275,7 +275,7 @@ def _sample(self, X, y):
                 if n_subsets == (self.n_max_subset - 1):
                     b_subset_search = False
                     # Select the remaining data
-                    idx_sel_from_maj = np.nonzero(b_sel_N)[0]
+                    idx_sel_from_maj = np.flatnonzero(b_sel_N)
                     idx_sel_from_maj = np.concatenate((idx_mis_class,
                                                        idx_sel_from_maj),
                                                       axis=0).astype(int)
@@ -305,7 +305,7 @@ def _sample(self, X, y):
             if num_samples > np.count_nonzero(b_sel_N):
                 b_subset_search = False
                 # Select the remaining data
-                idx_sel_from_maj = np.nonzero(b_sel_N)[0]
+                idx_sel_from_maj = np.flatnonzero(b_sel_N)
                 idx_sel_from_maj = np.concatenate((idx_mis_class,
                                                    idx_sel_from_maj),
                                                   axis=0).astype(int)
diff --git a/imblearn/under_sampling/condensed_nearest_neighbour.py b/imblearn/under_sampling/condensed_nearest_neighbour.py
@@ -135,7 +135,7 @@ def _sample(self, X, y):
 
         # If we need to offer support for the indices
         if self.return_indices:
-            idx_under = np.nonzero(y == self.min_c_)[0]
+            idx_under = np.flatnonzero(y == self.min_c_)
 
         # Loop over the other classes under picking at random
         for key in self.stats_c_.keys():
@@ -198,7 +198,7 @@ def _sample(self, X, y):
                     pred_S_y = knn.predict(S_x)
                     good_classif_label = np.unique(
                         np.append(idx_maj_sample,
-                                  np.nonzero(pred_S_y == S_y)[0]))
+                                  np.flatnonzero(pred_S_y == S_y)))
 
             # Find the misclassified S_y
             sel_x = np.squeeze(S_x[idx_maj_sample, :])
diff --git a/imblearn/under_sampling/edited_nearest_neighbours.py b/imblearn/under_sampling/edited_nearest_neighbours.py
@@ -141,7 +141,7 @@ def _sample(self, X, y):
 
         # If we need to offer support for the indices
         if self.return_indices:
-            idx_under = np.nonzero(y == self.min_c_)[0]
+            idx_under = np.flatnonzero(y == self.min_c_)
 
         # Create a k-NN to fit the whole data
         nn_obj = NearestNeighbors(n_neighbors=self.size_ngh + 1,
@@ -179,12 +179,12 @@ def _sample(self, X, y):
                 raise NotImplementedError
 
             # Get the samples which agree all together
-            sel_x = np.squeeze(sub_samples_x[np.nonzero(nnhood_bool), :])
-            sel_y = sub_samples_y[np.nonzero(nnhood_bool)]
+            sel_x = sub_samples_x[np.flatnonzero(nnhood_bool), :]
+            sel_y = sub_samples_y[np.flatnonzero(nnhood_bool)]
 
             # If we need to offer support for the indices selected
             if self.return_indices:
-                idx_tmp = np.nonzero(y == key)[0][np.nonzero(nnhood_bool)]
+                idx_tmp = np.flatnonzero(y == key)[np.flatnonzero(nnhood_bool)]
                 idx_under = np.concatenate((idx_under, idx_tmp), axis=0)
 
             X_resampled = np.concatenate((X_resampled, sel_x), axis=0)
diff --git a/imblearn/under_sampling/instance_hardness_threshold.py b/imblearn/under_sampling/instance_hardness_threshold.py
@@ -210,7 +210,7 @@ def _sample(self, X, y):
 
         # If we need to offer support for the indices
         if self.return_indices:
-            idx_under = np.nonzero(mask)[0]
+            idx_under = np.flatnonzero(mask)
             return X_resampled, y_resampled, idx_under
         else:
             return X_resampled, y_resampled
diff --git a/imblearn/under_sampling/nearmiss.py b/imblearn/under_sampling/nearmiss.py
@@ -183,7 +183,7 @@ def _selection_dist_based(self, X, y, dist_vec, num_samples, key,
         sel_idx = sorted_idx[:num_samples]
 
         return (X[y == key][sel_idx], y[y == key][sel_idx],
-                np.nonzero(y == key)[0][sel_idx])
+                np.flatnonzero(y == key)[sel_idx])
 
     def _sample(self, X, y):
         """Resample the dataset.
@@ -212,9 +212,9 @@ def _sample(self, X, y):
 
         # Assign the parameter of the element of this class
         # Check that the version asked is implemented
-        if self.version not in [1,2,3]:
-            raise ValueError("Parameter 'version' must be 1, 2 or 3, " 
-	                     "got {0}".format(self.version))
+        if self.version not in [1, 2, 3]:
+            raise ValueError('Parameter `version` must be 1, 2 or 3, got'
+                             ' {}'.format(self.version))
 
         # Start with the minority class
         X_min = X[y == self.min_c_]
@@ -232,7 +232,7 @@ def _sample(self, X, y):
 
         # If we need to offer support for the indices
         if self.return_indices:
-            idx_under = np.nonzero(y == self.min_c_)[0]
+            idx_under = np.flatnonzero(y == self.min_c_)
 
         # For each element of the current class, find the set of NN
         # of the minority class
diff --git a/imblearn/under_sampling/neighbourhood_cleaning_rule.py b/imblearn/under_sampling/neighbourhood_cleaning_rule.py
@@ -123,7 +123,7 @@ def _sample(self, X, y):
 
         # If we need to offer support for the indices
         if self.return_indices:
-            idx_under = np.nonzero(y == self.min_c_)[0]
+            idx_under = np.flatnonzero(y == self.min_c_)
 
         # Create a k-NN to fit the whole data
         nn_obj = NearestNeighbors(n_neighbors=self.size_ngh,
@@ -140,7 +140,7 @@ def _sample(self, X, y):
             sub_samples_x = X[y == key]
 
             # Get the samples associated
-            idx_sub_sample = np.nonzero(y == key)[0]
+            idx_sub_sample = np.flatnonzero(y == key)
 
             # Find the NN for the current class
             nnhood_idx = nn_obj.kneighbors(sub_samples_x,
@@ -157,7 +157,7 @@ def _sample(self, X, y):
             if key == self.min_c_:
                 # Get the index to exclude
                 idx_to_exclude += nnhood_idx[np.nonzero(
-                    nnhood_label[np.nonzero(nnhood_bool)])].tolist()
+                    nnhood_label[np.flatnonzero(nnhood_bool)])].tolist()
             else:
                 # Get the index to exclude
                 idx_to_exclude += idx_sub_sample[np.nonzero(
@@ -173,12 +173,12 @@ def _sample(self, X, y):
         sel_idx[y == self.min_c_] = 0
 
         # Get the samples from the majority classes
-        sel_x = np.squeeze(X[np.nonzero(sel_idx), :])
-        sel_y = y[np.nonzero(sel_idx)]
+        sel_x = X[np.flatnonzero(sel_idx), :]
+        sel_y = y[np.flatnonzero(sel_idx)]
 
         # If we need to offer support for the indices selected
         if self.return_indices:
-            idx_tmp = np.nonzero(sel_idx)[0]
+            idx_tmp = np.flatnonzero(sel_idx)
             idx_under = np.concatenate((idx_under, idx_tmp), axis=0)
 
         X_resampled = np.concatenate((X_resampled, sel_x), axis=0)
diff --git a/imblearn/under_sampling/one_sided_selection.py b/imblearn/under_sampling/one_sided_selection.py
@@ -134,7 +134,7 @@ def _sample(self, X, y):
 
         # If we need to offer support for the indices
         if self.return_indices:
-            idx_under = np.nonzero(y == self.min_c_)[0]
+            idx_under = np.flatnonzero(y == self.min_c_)
 
         # Loop over the other classes under picking at random
         for key in self.stats_c_.keys():
@@ -177,14 +177,15 @@ def _sample(self, X, y):
             pred_S_y = knn.predict(S_x)
 
             # Find the misclassified S_y
-            sel_x = np.squeeze(S_x[np.nonzero(pred_S_y != S_y), :])
-            sel_y = S_y[np.nonzero(pred_S_y != S_y)]
+            sel_x = S_x[np.flatnonzero(pred_S_y != S_y), :]
+            sel_y = S_y[np.flatnonzero(pred_S_y != S_y)]
 
             # If we need to offer support for the indices selected
             # We concatenate the misclassified samples with the seed and the
             # minority samples
             if self.return_indices:
-                idx_tmp = np.nonzero(y == key)[0][np.nonzero(pred_S_y != S_y)]
+                idx_tmp = np.flatnonzero(y == key)[
+                    np.flatnonzero(pred_S_y != S_y)]
                 idx_under = np.concatenate((idx_under,
                                             idx_maj_sample,
                                             idx_tmp),
diff --git a/imblearn/under_sampling/tomek_links.py b/imblearn/under_sampling/tomek_links.py
@@ -172,7 +172,7 @@ def _sample(self, X, y):
         if self.return_indices:
             # Return the indices of interest
             return (X[np.logical_not(links)], y[np.logical_not(links)],
-                    np.nonzero(np.logical_not(links))[0])
+                    np.flatnonzero(np.logical_not(links)))
         else:
             # Return data set without majority Tomek links.
             return X[np.logical_not(links)], y[np.logical_not(links)]

Original file line number	Diff line number	Diff line change
`@@ -83,4 +83,3 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):`
`83`	`83`	`LOGGER.info('Make the dataset imbalanced: %s', Counter(y_resampled))`
`84`	`84`
`85`	`85`	`return X_resampled, y_resampled`
`86`		`-`