From 55d8f279d512e44861407086761079464699d74d Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Mon, 3 Feb 2020 12:29:55 +0200
Subject: [PATCH 1/9] Better in-out support for pandas.

---
 imblearn/base.py                              | 49 +++----------------
 .../over_sampling/_random_over_sampler.py     | 18 +------
 imblearn/over_sampling/_smote.py              | 18 +------
 .../_random_under_sampler.py                  | 18 +------
 imblearn/utils/_validation.py                 | 45 +++++++++++++++++
 imblearn/utils/estimator_checks.py            | 27 ++++++----
 6 files changed, 75 insertions(+), 100 deletions(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index c5d6b0185..13033ca40 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -14,6 +14,7 @@
 from sklearn.utils.multiclass import check_classification_targets
 
 from .utils import check_sampling_strategy, check_target_type
+from .utils._validation import _OutputReconstructor
 
 
 class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
@@ -80,21 +81,10 @@ def fit_resample(self, X, y):
 
         output = self._fit_resample(X, y)
 
-        if self._X_columns is not None or self._y_name is not None:
-            import pandas as pd
-
-        if self._X_columns is not None:
-            X_ = pd.DataFrame(output[0], columns=self._X_columns)
-            X_ = X_.astype(self._X_dtypes)
-        else:
-            X_ = output[0]
-
         y_ = (label_binarize(output[1], np.unique(y))
               if binarize_y else output[1])
 
-        if self._y_name is not None:
-            y_ = pd.Series(y_, dtype=self._y_dtype, name=self._y_name)
-
+        X_, y_ = self._reconstructor.reconstruct(output[0], y_)
         return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
 
     #  define an alias for back-compatibility
@@ -137,22 +127,7 @@ def __init__(self, sampling_strategy="auto"):
         self.sampling_strategy = sampling_strategy
 
     def _check_X_y(self, X, y, accept_sparse=None):
-        if hasattr(X, "loc"):
-            # store information to build dataframe
-            self._X_columns = X.columns
-            self._X_dtypes = X.dtypes
-        else:
-            self._X_columns = None
-            self._X_dtypes = None
-
-        if hasattr(y, "loc"):
-            # store information to build a series
-            self._y_name = y.name
-            self._y_dtype = y.dtype
-        else:
-            self._y_name = None
-            self._y_dtype = None
-
+        self._reconstructor = _OutputReconstructor(X, y)
         if accept_sparse is None:
             accept_sparse = ["csr", "csc"]
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
@@ -265,8 +240,8 @@ def fit_resample(self, X, y):
         y_resampled : array-like of shape (n_samples_new,)
             The corresponding label of `X_resampled`.
         """
-        # store the columns name to reconstruct a dataframe
-        self._columns = X.columns if hasattr(X, "loc") else None
+        self._reconstructor = _OutputReconstructor(X, y)
+
         if self.validate:
             check_classification_targets(y)
             X, y, binarize_y = self._check_X_y(
@@ -280,22 +255,12 @@ def fit_resample(self, X, y):
         output = self._fit_resample(X, y)
 
         if self.validate:
-            if self._X_columns is not None or self._y_name is not None:
-                import pandas as pd
-
-            if self._X_columns is not None:
-                X_ = pd.DataFrame(output[0], columns=self._X_columns)
-                X_ = X_.astype(self._X_dtypes)
-            else:
-                X_ = output[0]
 
             y_ = (label_binarize(output[1], np.unique(y))
                   if binarize_y else output[1])
-
-            if self._y_name is not None:
-                y_ = pd.Series(y_, dtype=self._y_dtype, name=self._y_name)
-
+            X_, y_ = self._reconstructor.reconstruct(output[0], y_)
             return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
+
         return output
 
     def _fit_resample(self, X, y):
diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py
index afcb89da5..8addb2a87 100644
--- a/imblearn/over_sampling/_random_over_sampler.py
+++ b/imblearn/over_sampling/_random_over_sampler.py
@@ -15,6 +15,7 @@
 from ..utils import check_target_type
 from ..utils import Substitution
 from ..utils._docstring import _random_state_docstring
+from ..utils._validation import _OutputReconstructor
 
 
 @Substitution(
@@ -75,22 +76,7 @@ def __init__(self, sampling_strategy="auto", random_state=None):
         self.random_state = random_state
 
     def _check_X_y(self, X, y):
-        if hasattr(X, "loc"):
-            # store information to build dataframe
-            self._X_columns = X.columns
-            self._X_dtypes = X.dtypes
-        else:
-            self._X_columns = None
-            self._X_dtypes = None
-
-        if hasattr(y, "loc"):
-            # store information to build a series
-            self._y_name = y.name
-            self._y_dtype = y.dtype
-        else:
-            self._y_name = None
-            self._y_dtype = None
-
+        self._reconstructor = _OutputReconstructor(X, y)
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X = check_array(X, accept_sparse=["csr", "csc"], dtype=None,
                         force_all_finite=False)
diff --git a/imblearn/over_sampling/_smote.py b/imblearn/over_sampling/_smote.py
index cea14cfd2..8d8de9a0a 100644
--- a/imblearn/over_sampling/_smote.py
+++ b/imblearn/over_sampling/_smote.py
@@ -31,6 +31,7 @@
 from ..utils import Substitution
 from ..utils._docstring import _n_jobs_docstring
 from ..utils._docstring import _random_state_docstring
+from ..utils._validation import _OutputReconstructor
 
 
 class BaseSMOTE(BaseOverSampler):
@@ -891,22 +892,7 @@ def _check_X_y(self, X, y):
         """Overwrite the checking to let pass some string for categorical
         features.
         """
-        if hasattr(X, "loc"):
-            # store information to build dataframe
-            self._X_columns = X.columns
-            self._X_dtypes = X.dtypes
-        else:
-            self._X_columns = None
-            self._X_dtypes = None
-
-        if hasattr(y, "loc"):
-            # store information to build a series
-            self._y_name = y.name
-            self._y_dtype = y.dtype
-        else:
-            self._y_name = None
-            self._y_dtype = None
-
+        self._reconstructor = _OutputReconstructor(X, y)
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X, y = check_X_y(X, y, accept_sparse=["csr", "csc"], dtype=None)
         return X, y, binarize_y
diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
index 8d7c08c93..fb90f2bfb 100644
--- a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
+++ b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
@@ -15,6 +15,7 @@
 from ...utils import check_target_type
 from ...utils import Substitution
 from ...utils._docstring import _random_state_docstring
+from ...utils._validation import _OutputReconstructor
 
 
 @Substitution(
@@ -81,22 +82,7 @@ def __init__(
         self.replacement = replacement
 
     def _check_X_y(self, X, y):
-        if hasattr(X, "loc"):
-            # store information to build dataframe
-            self._X_columns = X.columns
-            self._X_dtypes = X.dtypes
-        else:
-            self._X_columns = None
-            self._X_dtypes = None
-
-        if hasattr(y, "loc"):
-            # store information to build a series
-            self._y_name = y.name
-            self._y_dtype = y.dtype
-        else:
-            self._y_name = None
-            self._y_dtype = None
-
+        self._reconstructor = _OutputReconstructor(X, y)
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X = check_array(X, accept_sparse=["csr", "csc"], dtype=None,
                         force_all_finite=False)
diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index d1b0069b7..0f9f2f9c3 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -27,6 +27,51 @@
 TARGET_KIND = ("binary", "multiclass", "multilabel-indicator")
 
 
+class _OutputReconstructor:
+    """A class for converting input types to numpy and back."""
+
+    def __init__(self, X, y):
+        self.x_props = self._gets_props(X)
+        self.y_props = self._gets_props(y)
+
+    def reconstruct(self, X, y):
+        X = self._transfrom(X, self.x_props)
+        y = self._transfrom(y, self.y_props)
+        return X, y
+
+    def _gets_props(self, array):
+        props = {}
+        props["type"] = array.__class__.__name__
+        props["columns"] = getattr(array, "columns", None)
+        props["name"] = getattr(array, "name", None)
+        props["dtypes"] = getattr(array, "dtypes", None)
+        return props
+
+    def _transfrom(self, array, props):
+        type_ = props["type"].lower()
+        msg="Could not convert to {}".format(type_)
+        if type_ == "list":
+            ret = array.tolist()
+        elif type_ == "dataframe":
+            try:
+                import pandas as pd
+                ret = pd.DataFrame(array, columns=props["columns"])
+                ret = ret.astype(props["dtypes"])
+            except Exception:
+                warnings.warn(msg)
+        elif type_ == "series":
+            try:
+                import pandas as pd
+                ret = pd.Series(array,
+                                dtype=props["dtypes"],
+                                name=props["name"])
+            except Exception:
+                warnings.warn(msg)
+        else:
+            ret = array
+        return ret
+
+
 def check_neighbors_object(nn_name, nn_object, additional_neighbor=0):
     """Check the objects is consistent to be a NN.
 
diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 51a039f85..8f094397d 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -242,8 +242,9 @@ def check_samplers_pandas(name, Sampler):
         weights=[0.2, 0.3, 0.5],
         random_state=0,
     )
-    X_pd = pd.DataFrame(X, columns=[str(i) for i in range(X.shape[1])])
-    y_pd = pd.Series(y, name="class")
+    X_df = pd.DataFrame(X, columns=[str(i) for i in range(X.shape[1])])
+    y_df = pd.DataFrame(y)
+    y_s = pd.Series(y, name="class")
     sampler = Sampler()
     if isinstance(Sampler(), NearMiss):
         samplers = [Sampler(version=version) for version in (1, 2, 3)]
@@ -253,16 +254,22 @@ def check_samplers_pandas(name, Sampler):
 
     for sampler in samplers:
         set_random_state(sampler)
-        X_res_pd, y_res_pd = sampler.fit_resample(X_pd, y_pd)
+        X_res_df, y_res_s = sampler.fit_resample(X_df, y_s)
+        X_res_df, y_res_df = sampler.fit_resample(X_df, y_df)
         X_res, y_res = sampler.fit_resample(X, y)
 
-        # check that we return a pandas dataframe if a dataframe was given in
-        assert isinstance(X_res_pd, pd.DataFrame)
-        assert isinstance(y_res_pd, pd.Series)
-        assert X_pd.columns.to_list() == X_res_pd.columns.to_list()
-        assert y_pd.name == y_res_pd.name
-        assert_allclose(X_res_pd.to_numpy(), X_res)
-        assert_allclose(y_res_pd.to_numpy(), y_res)
+        # check that we return the same type for dataframes or seires types
+        assert isinstance(X_res_df, pd.DataFrame)
+        assert isinstance(y_res_df, pd.DataFrame)
+        assert isinstance(y_res_s, pd.Series)
+
+        assert X_df.columns.to_list() == X_res_df.columns.to_list()
+        assert y_df.columns.to_list() == y_res_df.columns.to_list()
+        assert y_s.name == y_res_s.name
+
+        assert_allclose(X_res_df.to_numpy(), X_res)
+        assert_allclose(y_res_df.to_numpy().ravel(), y_res)
+        assert_allclose(y_res_s.to_numpy(), y_res)
 
 
 def check_samplers_multiclass_ova(name, Sampler):

From 92dab4786fa368130d56021ef48097e169dd7435 Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Mon, 3 Feb 2020 13:45:07 +0200
Subject: [PATCH 2/9] Make pep8 happy!

---
 imblearn/utils/_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index 0f9f2f9c3..0d02ea28b 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -49,7 +49,7 @@ def _gets_props(self, array):
 
     def _transfrom(self, array, props):
         type_ = props["type"].lower()
-        msg="Could not convert to {}".format(type_)
+        msg = "Could not convert to {}".format(type_)
         if type_ == "list":
             ret = array.tolist()
         elif type_ == "dataframe":

From 35d7af936aac6471c4097b14a8d958c2e3086040 Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Mon, 3 Feb 2020 14:04:35 +0200
Subject: [PATCH 3/9] Relax reconstructor checks. Add test for simple lists.

---
 imblearn/utils/_validation.py      | 19 +++++-------------
 imblearn/utils/estimator_checks.py | 32 +++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index 0d02ea28b..df25a52a0 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -49,24 +49,15 @@ def _gets_props(self, array):
 
     def _transfrom(self, array, props):
         type_ = props["type"].lower()
-        msg = "Could not convert to {}".format(type_)
         if type_ == "list":
             ret = array.tolist()
         elif type_ == "dataframe":
-            try:
-                import pandas as pd
-                ret = pd.DataFrame(array, columns=props["columns"])
-                ret = ret.astype(props["dtypes"])
-            except Exception:
-                warnings.warn(msg)
+            import pandas as pd
+            ret = pd.DataFrame(array, columns=props["columns"])
+            ret = ret.astype(props["dtypes"])
         elif type_ == "series":
-            try:
-                import pandas as pd
-                ret = pd.Series(array,
-                                dtype=props["dtypes"],
-                                name=props["name"])
-            except Exception:
-                warnings.warn(msg)
+            import pandas as pd
+            ret = pd.Series(array, dtype=props["dtypes"], name=props["name"])
         else:
             ret = array
         return ret
diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 8f094397d..5b5ef34fe 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -258,7 +258,7 @@ def check_samplers_pandas(name, Sampler):
         X_res_df, y_res_df = sampler.fit_resample(X_df, y_df)
         X_res, y_res = sampler.fit_resample(X, y)
 
-        # check that we return the same type for dataframes or seires types
+        # check that we return the same type for dataframes or series types
         assert isinstance(X_res_df, pd.DataFrame)
         assert isinstance(y_res_df, pd.DataFrame)
         assert isinstance(y_res_s, pd.Series)
@@ -272,6 +272,36 @@ def check_samplers_pandas(name, Sampler):
         assert_allclose(y_res_s.to_numpy(), y_res)
 
 
+def check_samplers_list(name, Sampler):
+    # Check that the can samplers handle simple lists
+    X, y = make_classification(
+        n_samples=1000,
+        n_classes=3,
+        n_informative=4,
+        weights=[0.2, 0.3, 0.5],
+        random_state=0,
+    )
+    X_list = X.tolist()
+    y_list = y.tolist()
+    sampler = Sampler()
+    if isinstance(Sampler(), NearMiss):
+        samplers = [Sampler(version=version) for version in (1, 2, 3)]
+
+    else:
+        samplers = [Sampler()]
+
+    for sampler in samplers:
+        set_random_state(sampler)
+        X_res, y_res = sampler.fit_resample(X, y)
+        X_res_list, y_res_list = sampler.fit_resample(X_list, y_list)
+
+        assert isinstance(X_res_list, list)
+        assert isinstance(y_res_list, list)
+
+        assert_allclose(X_res, X_res_list)
+        assert_allclose(y_res, y_res_list)
+
+
 def check_samplers_multiclass_ova(name, Sampler):
     # Check that multiclass target lead to the same results than OVA encoding
     X, y = make_classification(

From 182dc6e0266cc966fbbe73847636a49f20dde19f Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Mon, 3 Feb 2020 14:17:54 +0200
Subject: [PATCH 4/9] Add missing estimator check

---
 imblearn/utils/estimator_checks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 5b5ef34fe..7d7901fa7 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -41,6 +41,7 @@ def _yield_sampler_checks(name, Estimator):
     yield check_samplers_sampling_strategy_fit_resample
     yield check_samplers_sparse
     yield check_samplers_pandas
+    yield check_samplers_list
     yield check_samplers_multiclass_ova
     yield check_samplers_preserve_dtype
     yield check_samplers_sample_indices

From 1fd289ae11170549f19fa7f564e571084b5d6c64 Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Mon, 3 Feb 2020 14:23:15 +0200
Subject: [PATCH 5/9] Fix a check

---
 imblearn/utils/estimator_checks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 7d7901fa7..7bd77c2f3 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -285,11 +285,11 @@ def check_samplers_list(name, Sampler):
     X_list = X.tolist()
     y_list = y.tolist()
     sampler = Sampler()
-    if isinstance(Sampler(), NearMiss):
+    if isinstance(sampler, NearMiss):
         samplers = [Sampler(version=version) for version in (1, 2, 3)]
 
     else:
-        samplers = [Sampler()]
+        samplers = [sampler]
 
     for sampler in samplers:
         set_random_state(sampler)

From fa8c96830da07c66639b0e326d0a44c6167e06a9 Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Wed, 5 Feb 2020 11:25:46 +0200
Subject: [PATCH 6/9] Refactor

---
 imblearn/base.py                                       | 10 +++++-----
 imblearn/over_sampling/_random_over_sampler.py         |  3 ---
 imblearn/over_sampling/_smote.py                       |  2 --
 .../under_sampling/_prototype_selection/_nearmiss.py   |  6 ++++--
 .../_prototype_selection/_random_under_sampler.py      |  2 --
 imblearn/utils/_validation.py                          |  4 ++--
 6 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index 13033ca40..63bab5d74 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -14,7 +14,7 @@
 from sklearn.utils.multiclass import check_classification_targets
 
 from .utils import check_sampling_strategy, check_target_type
-from .utils._validation import _OutputReconstructor
+from .utils._validation import OutputFormater
 
 
 class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
@@ -73,6 +73,7 @@ def fit_resample(self, X, y):
             The corresponding label of `X_resampled`.
         """
         check_classification_targets(y)
+        self._formater = OutputFormater(X, y)
         X, y, binarize_y = self._check_X_y(X, y)
 
         self.sampling_strategy_ = check_sampling_strategy(
@@ -84,7 +85,7 @@ def fit_resample(self, X, y):
         y_ = (label_binarize(output[1], np.unique(y))
               if binarize_y else output[1])
 
-        X_, y_ = self._reconstructor.reconstruct(output[0], y_)
+        X_, y_ = self._formater.format(output[0], y_)
         return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
 
     #  define an alias for back-compatibility
@@ -127,7 +128,6 @@ def __init__(self, sampling_strategy="auto"):
         self.sampling_strategy = sampling_strategy
 
     def _check_X_y(self, X, y, accept_sparse=None):
-        self._reconstructor = _OutputReconstructor(X, y)
         if accept_sparse is None:
             accept_sparse = ["csr", "csc"]
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
@@ -240,7 +240,7 @@ def fit_resample(self, X, y):
         y_resampled : array-like of shape (n_samples_new,)
             The corresponding label of `X_resampled`.
         """
-        self._reconstructor = _OutputReconstructor(X, y)
+        self._formater = OutputFormater(X, y)
 
         if self.validate:
             check_classification_targets(y)
@@ -258,7 +258,7 @@ def fit_resample(self, X, y):
 
             y_ = (label_binarize(output[1], np.unique(y))
                   if binarize_y else output[1])
-            X_, y_ = self._reconstructor.reconstruct(output[0], y_)
+            X_, y_ = self._formater.format(output[0], y_)
             return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
 
         return output
diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py
index 8addb2a87..fbe2f17f9 100644
--- a/imblearn/over_sampling/_random_over_sampler.py
+++ b/imblearn/over_sampling/_random_over_sampler.py
@@ -15,8 +15,6 @@
 from ..utils import check_target_type
 from ..utils import Substitution
 from ..utils._docstring import _random_state_docstring
-from ..utils._validation import _OutputReconstructor
-
 
 @Substitution(
     sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
@@ -76,7 +74,6 @@ def __init__(self, sampling_strategy="auto", random_state=None):
         self.random_state = random_state
 
     def _check_X_y(self, X, y):
-        self._reconstructor = _OutputReconstructor(X, y)
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X = check_array(X, accept_sparse=["csr", "csc"], dtype=None,
                         force_all_finite=False)
diff --git a/imblearn/over_sampling/_smote.py b/imblearn/over_sampling/_smote.py
index 8d8de9a0a..961ce55c5 100644
--- a/imblearn/over_sampling/_smote.py
+++ b/imblearn/over_sampling/_smote.py
@@ -31,7 +31,6 @@
 from ..utils import Substitution
 from ..utils._docstring import _n_jobs_docstring
 from ..utils._docstring import _random_state_docstring
-from ..utils._validation import _OutputReconstructor
 
 
 class BaseSMOTE(BaseOverSampler):
@@ -892,7 +891,6 @@ def _check_X_y(self, X, y):
         """Overwrite the checking to let pass some string for categorical
         features.
         """
-        self._reconstructor = _OutputReconstructor(X, y)
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X, y = check_X_y(X, y, accept_sparse=["csr", "csc"], dtype=None)
         return X, y, binarize_y
diff --git a/imblearn/under_sampling/_prototype_selection/_nearmiss.py b/imblearn/under_sampling/_prototype_selection/_nearmiss.py
index af8a13dde..386463d5c 100644
--- a/imblearn/under_sampling/_prototype_selection/_nearmiss.py
+++ b/imblearn/under_sampling/_prototype_selection/_nearmiss.py
@@ -247,8 +247,10 @@ def _fit_resample(self, X, y):
                         _safe_indexing(X, minority_class_indices)
                     )
                     idx_vec_farthest = np.unique(idx_vec.reshape(-1))
-                    X_class_selected = _safe_indexing(X_class, idx_vec_farthest)
-                    y_class_selected = _safe_indexing(y_class, idx_vec_farthest)
+                    X_class_selected = _safe_indexing(
+                        X_class, idx_vec_farthest)
+                    y_class_selected = _safe_indexing(
+                        y_class, idx_vec_farthest)
 
                     dist_vec, idx_vec = self.nn_.kneighbors(
                         X_class_selected, n_neighbors=self.nn_.n_neighbors
diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
index fb90f2bfb..900d8e3fe 100644
--- a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
+++ b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
@@ -15,7 +15,6 @@
 from ...utils import check_target_type
 from ...utils import Substitution
 from ...utils._docstring import _random_state_docstring
-from ...utils._validation import _OutputReconstructor
 
 
 @Substitution(
@@ -82,7 +81,6 @@ def __init__(
         self.replacement = replacement
 
     def _check_X_y(self, X, y):
-        self._reconstructor = _OutputReconstructor(X, y)
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X = check_array(X, accept_sparse=["csr", "csc"], dtype=None,
                         force_all_finite=False)
diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index df25a52a0..c07474d95 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -27,14 +27,14 @@
 TARGET_KIND = ("binary", "multiclass", "multilabel-indicator")
 
 
-class _OutputReconstructor:
+class OutputFormater:
     """A class for converting input types to numpy and back."""
 
     def __init__(self, X, y):
         self.x_props = self._gets_props(X)
         self.y_props = self._gets_props(y)
 
-    def reconstruct(self, X, y):
+    def format(self, X, y):
         X = self._transfrom(X, self.x_props)
         y = self._transfrom(y, self.y_props)
         return X, y

From 8cb2a92dcc4badc6b1f744d2ebf85338859584fb Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Wed, 5 Feb 2020 11:35:15 +0200
Subject: [PATCH 7/9] Add tests

---
 imblearn/utils/tests/test_validation.py | 41 +++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/imblearn/utils/tests/test_validation.py b/imblearn/utils/tests/test_validation.py
index 634f502f0..76955d254 100644
--- a/imblearn/utils/tests/test_validation.py
+++ b/imblearn/utils/tests/test_validation.py
@@ -17,6 +17,7 @@
 from imblearn.utils import check_neighbors_object
 from imblearn.utils import check_sampling_strategy
 from imblearn.utils import check_target_type
+from imblearn.utils._validation import OutputFormater
 
 multiclass_target = np.array([1] * 50 + [2] * 100 + [3] * 25)
 binary_target = np.array([1] * 25 + [0] * 100)
@@ -315,3 +316,43 @@ def test_sampling_strategy_check_order(
         sampling_strategy, y, sampling_type
     )
     assert sampling_strategy_ == expected_result
+
+
+def test_output_formater_plain_list():
+    X = np.array([[0, 0], [1, 1]])
+    y = np.array([[0, 0], [1, 1]])
+
+    formater = OutputFormater(X.tolist(), y.tolist())
+    X_res, y_res = formater.format(X, y)
+    assert isinstance(X_res, list)
+    assert isinstance(y_res, list)
+
+
+def test_output_formater_pandas():
+    pd = pytest.importorskip("pandas")
+
+    X = np.array([[0, 0], [1, 1]])
+    y = np.array([0, 1])
+
+    X_df = pd.DataFrame(X, columns=["a", "b"])
+    X_df = X_df.astype(int)
+    y_df = pd.DataFrame(y, columns=["target", ])
+    y_df = y_df.astype(int)
+    y_s = pd.Series(y, name="target", dtype=int)
+
+    # DataFrame and DataFrame case
+    formater = OutputFormater(X_df, y_df)
+    X_res, y_res = formater.format(X, y)
+    assert isinstance(X_res, pd.DataFrame)
+    assert_array_equal(X_res.columns, X_df.columns)
+    assert_array_equal(X_res.dtypes, X_df.dtypes)
+    assert isinstance(y_res, pd.DataFrame)
+    assert_array_equal(y_res.columns, y_df.columns)
+    assert_array_equal(y_res.dtypes, y_df.dtypes)
+
+    # DataFrames and Series case
+    formater = OutputFormater(X_df, y_s)
+    _, y_res = formater.format(X, y)
+    assert isinstance(y_res, pd.Series)
+    assert_array_equal(y_res.name, y_s.name)
+    assert_array_equal(y_res.dtype, y_s.dtype)

From d9edfc4fc86c0126bf5245bf0a48b26abc36ecce Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Fri, 7 Feb 2020 15:59:49 +0200
Subject: [PATCH 8/9] Add test, rename class and detach the instance from the
 sampler

---
 imblearn/base.py                        | 10 ++++-----
 imblearn/utils/_validation.py           | 12 +++++------
 imblearn/utils/tests/test_validation.py | 28 +++++++++++++++++--------
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index 63bab5d74..014e4dd9f 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -14,7 +14,7 @@
 from sklearn.utils.multiclass import check_classification_targets
 
 from .utils import check_sampling_strategy, check_target_type
-from .utils._validation import OutputFormater
+from .utils._validation import ArraysTransformer
 
 
 class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
@@ -73,7 +73,7 @@ def fit_resample(self, X, y):
             The corresponding label of `X_resampled`.
         """
         check_classification_targets(y)
-        self._formater = OutputFormater(X, y)
+        arrays_transformer = ArraysTransformer(X, y)
         X, y, binarize_y = self._check_X_y(X, y)
 
         self.sampling_strategy_ = check_sampling_strategy(
@@ -85,7 +85,7 @@ def fit_resample(self, X, y):
         y_ = (label_binarize(output[1], np.unique(y))
               if binarize_y else output[1])
 
-        X_, y_ = self._formater.format(output[0], y_)
+        X_, y_ = arrays_transformer.transform(output[0], y_)
         return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
 
     #  define an alias for back-compatibility
@@ -240,7 +240,7 @@ def fit_resample(self, X, y):
         y_resampled : array-like of shape (n_samples_new,)
             The corresponding label of `X_resampled`.
         """
-        self._formater = OutputFormater(X, y)
+        arrays_transformer = ArraysTransformer(X, y)
 
         if self.validate:
             check_classification_targets(y)
@@ -258,7 +258,7 @@ def fit_resample(self, X, y):
 
             y_ = (label_binarize(output[1], np.unique(y))
                   if binarize_y else output[1])
-            X_, y_ = self._formater.format(output[0], y_)
+            X_, y_ = arrays_transformer.transform(output[0], y_)
             return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
 
         return output
diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py
index c07474d95..dccc0dd4d 100644
--- a/imblearn/utils/_validation.py
+++ b/imblearn/utils/_validation.py
@@ -27,16 +27,16 @@
 TARGET_KIND = ("binary", "multiclass", "multilabel-indicator")
 
 
-class OutputFormater:
-    """A class for converting input types to numpy and back."""
+class ArraysTransformer:
+    """A class to convert sampler ouput arrays to their orinal types."""
 
     def __init__(self, X, y):
         self.x_props = self._gets_props(X)
         self.y_props = self._gets_props(y)
 
-    def format(self, X, y):
-        X = self._transfrom(X, self.x_props)
-        y = self._transfrom(y, self.y_props)
+    def transform(self, X, y):
+        X = self._transfrom_one(X, self.x_props)
+        y = self._transfrom_one(y, self.y_props)
         return X, y
 
     def _gets_props(self, array):
@@ -47,7 +47,7 @@ def _gets_props(self, array):
         props["dtypes"] = getattr(array, "dtypes", None)
         return props
 
-    def _transfrom(self, array, props):
+    def _transfrom_one(self, array, props):
         type_ = props["type"].lower()
         if type_ == "list":
             ret = array.tolist()
diff --git a/imblearn/utils/tests/test_validation.py b/imblearn/utils/tests/test_validation.py
index 76955d254..3b4571862 100644
--- a/imblearn/utils/tests/test_validation.py
+++ b/imblearn/utils/tests/test_validation.py
@@ -17,7 +17,7 @@
 from imblearn.utils import check_neighbors_object
 from imblearn.utils import check_sampling_strategy
 from imblearn.utils import check_target_type
-from imblearn.utils._validation import OutputFormater
+from imblearn.utils._validation import ArraysTransformer
 
 multiclass_target = np.array([1] * 50 + [2] * 100 + [3] * 25)
 binary_target = np.array([1] * 25 + [0] * 100)
@@ -318,17 +318,27 @@ def test_sampling_strategy_check_order(
     assert sampling_strategy_ == expected_result
 
 
-def test_output_formater_plain_list():
+def test_arrays_transformer_plain_list():
     X = np.array([[0, 0], [1, 1]])
     y = np.array([[0, 0], [1, 1]])
 
-    formater = OutputFormater(X.tolist(), y.tolist())
-    X_res, y_res = formater.format(X, y)
+    arrays_transformer = ArraysTransformer(X.tolist(), y.tolist())
+    X_res, y_res = arrays_transformer.transform(X, y)
     assert isinstance(X_res, list)
     assert isinstance(y_res, list)
 
 
-def test_output_formater_pandas():
+def test_arrays_transformer_numpy():
+    X = np.array([[0, 0], [1, 1]])
+    y = np.array([[0, 0], [1, 1]])
+
+    arrays_transformer = ArraysTransformer(X, y)
+    X_res, y_res = arrays_transformer.transform(X, y)
+    assert isinstance(X_res, np.array)
+    assert isinstance(y_res, np.array)
+
+
+def test_arrays_transformer_pandas():
     pd = pytest.importorskip("pandas")
 
     X = np.array([[0, 0], [1, 1]])
@@ -341,8 +351,8 @@ def test_output_formater_pandas():
     y_s = pd.Series(y, name="target", dtype=int)
 
     # DataFrame and DataFrame case
-    formater = OutputFormater(X_df, y_df)
-    X_res, y_res = formater.format(X, y)
+    arrays_transformer = ArraysTransformer(X_df, y_df)
+    X_res, y_res = arrays_transformer.transform(X, y)
     assert isinstance(X_res, pd.DataFrame)
     assert_array_equal(X_res.columns, X_df.columns)
     assert_array_equal(X_res.dtypes, X_df.dtypes)
@@ -351,8 +361,8 @@ def test_output_formater_pandas():
     assert_array_equal(y_res.dtypes, y_df.dtypes)
 
     # DataFrames and Series case
-    formater = OutputFormater(X_df, y_s)
-    _, y_res = formater.format(X, y)
+    arrays_transformer = ArraysTransformer(X_df, y_s)
+    _, y_res = arrays_transformer.transform(X, y)
     assert isinstance(y_res, pd.Series)
     assert_array_equal(y_res.name, y_s.name)
     assert_array_equal(y_res.dtype, y_s.dtype)

From c5f40b46bff8cf7a4ed2fc3fc869b2c378c27efd Mon Sep 17 00:00:00 2001
From: chkoar <ichkoar@gmail.com>
Date: Fri, 7 Feb 2020 16:21:26 +0200
Subject: [PATCH 9/9] Fix type

---
 imblearn/utils/tests/test_validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/imblearn/utils/tests/test_validation.py b/imblearn/utils/tests/test_validation.py
index 3b4571862..a40b47f4b 100644
--- a/imblearn/utils/tests/test_validation.py
+++ b/imblearn/utils/tests/test_validation.py
@@ -334,8 +334,8 @@ def test_arrays_transformer_numpy():
 
     arrays_transformer = ArraysTransformer(X, y)
     X_res, y_res = arrays_transformer.transform(X, y)
-    assert isinstance(X_res, np.array)
-    assert isinstance(y_res, np.array)
+    assert isinstance(X_res, np.ndarray)
+    assert isinstance(y_res, np.ndarray)
 
 
 def test_arrays_transformer_pandas():