@@ -159,3 +159,107 @@ def fit(self, X, y):
159159 self .ratio_ = check_ratio (self .ratio , y , self ._sampling_type )
160160
161161 return self
162+
163+
164+ def _identity (X , y ):
165+ return X , y
166+
167+
168+ class FunctionSampler (SamplerMixin ):
169+ """Construct a sampler from calling an arbitrary callable.
170+
171+ Read more in the :ref:`User Guide <function_sampler>`.
172+
173+ Parameters
174+ ----------
175+ func : callable or None,
176+ The callable to use for the transformation. This will be passed the
177+ same arguments as transform, with args and kwargs forwarded. If func is
178+ None, then func will be the identity function.
179+
180+ accept_sparse : bool, optional (default=True)
181+ Whether sparse input are supported. By default, sparse inputs are
182+ supported.
183+
184+ kw_args : dict, optional (default=None)
185+ The keyword argument expected by ``func``.
186+
187+ Notes
188+ -----
189+
190+ See
191+ :ref:`sphx_glr_auto_examples_plot_outlier_rejections.py`
192+
193+ Examples
194+ --------
195+ >>> import numpy as np
196+ >>> from sklearn.datasets import make_classification
197+ >>> from imblearn import FunctionSampler
198+ >>> X, y = make_classification(n_classes=2, class_sep=2,
199+ ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
200+ ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
201+
202+ We can create to select only the first ten samples for instance.
203+
204+ >>> def func(X, y):
205+ ... return X[:10], y[:10]
206+ >>> sampler = FunctionSampler(func=func)
207+ >>> X_res, y_res = sampler.fit_sample(X, y)
208+ >>> np.all(X_res == X[:10])
209+ True
210+ >>> np.all(y_res == y[:10])
211+ True
212+
213+ We can also create a specific function which take some arguments.
214+
215+ >>> from collections import Counter
216+ >>> from imblearn.under_sampling import RandomUnderSampler
217+ >>> def func(X, y, ratio, random_state):
218+ ... return RandomUnderSampler(ratio=ratio,
219+ ... random_state=random_state).fit_sample(X, y)
220+ >>> sampler = FunctionSampler(func=func,
221+ ... kw_args={'ratio': 'auto', 'random_state': 0})
222+ >>> X_res, y_res = sampler.fit_sample(X, y)
223+ >>> print('Resampled dataset shape {}'.format(
224+ ... sorted(Counter(y_res).items())))
225+ Resampled dataset shape [(0, 100), (1, 100)]
226+
227+ """
228+
229+ def __init__ (self , func = None , accept_sparse = True , kw_args = None ):
230+ self .func = func
231+ self .accept_sparse = accept_sparse
232+ self .kw_args = kw_args
233+ self .logger = logging .getLogger (__name__ )
234+
235+ def _check_X_y (self , X , y ):
236+ if self .accept_sparse :
237+ X , y = check_X_y (X , y , accept_sparse = ['csr' , 'csc' ])
238+ else :
239+ X , y = check_X_y (X , y , accept_sparse = False )
240+ y = check_target_type (y )
241+
242+ return X , y
243+
244+ def fit (self , X , y ):
245+ X , y = self ._check_X_y (X , y )
246+ self .X_hash_ , self .y_hash_ = hash_X_y (X , y )
247+ # when using a sampler, ratio_ is supposed to exist after fit
248+ self .ratio_ = 'is_fitted'
249+
250+ return self
251+
252+ def _sample (self , X , y , func = None , kw_args = None ):
253+ X , y = self ._check_X_y (X , y )
254+ check_is_fitted (self , 'ratio_' )
255+ X_hash , y_hash = hash_X_y (X , y )
256+ if self .X_hash_ != X_hash or self .y_hash_ != y_hash :
257+ raise RuntimeError ("X and y need to be same array earlier fitted." )
258+
259+ if func is None :
260+ func = _identity
261+
262+ return func (X , y , ** (kw_args if self .kw_args else {}))
263+
264+ def sample (self , X , y ):
265+ return self ._sample (X , y , func = self .func , kw_args = self .kw_args )
0 commit comments