@@ -159,3 +159,107 @@ def fit(self, X, y):
159
159
self .ratio_ = check_ratio (self .ratio , y , self ._sampling_type )
160
160
161
161
return self
162
+
163
+
164
+ def _identity (X , y ):
165
+ return X , y
166
+
167
+
168
+ class FunctionSampler (SamplerMixin ):
169
+ """Construct a sampler from calling an arbitrary callable.
170
+
171
+ Read more in the :ref:`User Guide <function_sampler>`.
172
+
173
+ Parameters
174
+ ----------
175
+ func : callable or None,
176
+ The callable to use for the transformation. This will be passed the
177
+ same arguments as transform, with args and kwargs forwarded. If func is
178
+ None, then func will be the identity function.
179
+
180
+ accept_sparse : bool, optional (default=True)
181
+ Whether sparse input are supported. By default, sparse inputs are
182
+ supported.
183
+
184
+ kw_args : dict, optional (default=None)
185
+ The keyword argument expected by ``func``.
186
+
187
+ Notes
188
+ -----
189
+
190
+ See
191
+ :ref:`sphx_glr_auto_examples_plot_outlier_rejections.py`
192
+
193
+ Examples
194
+ --------
195
+ >>> import numpy as np
196
+ >>> from sklearn.datasets import make_classification
197
+ >>> from imblearn import FunctionSampler
198
+ >>> X, y = make_classification(n_classes=2, class_sep=2,
199
+ ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
200
+ ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
201
+
202
+ We can create to select only the first ten samples for instance.
203
+
204
+ >>> def func(X, y):
205
+ ... return X[:10], y[:10]
206
+ >>> sampler = FunctionSampler(func=func)
207
+ >>> X_res, y_res = sampler.fit_sample(X, y)
208
+ >>> np.all(X_res == X[:10])
209
+ True
210
+ >>> np.all(y_res == y[:10])
211
+ True
212
+
213
+ We can also create a specific function which take some arguments.
214
+
215
+ >>> from collections import Counter
216
+ >>> from imblearn.under_sampling import RandomUnderSampler
217
+ >>> def func(X, y, ratio, random_state):
218
+ ... return RandomUnderSampler(ratio=ratio,
219
+ ... random_state=random_state).fit_sample(X, y)
220
+ >>> sampler = FunctionSampler(func=func,
221
+ ... kw_args={'ratio': 'auto', 'random_state': 0})
222
+ >>> X_res, y_res = sampler.fit_sample(X, y)
223
+ >>> print('Resampled dataset shape {}'.format(
224
+ ... sorted(Counter(y_res).items())))
225
+ Resampled dataset shape [(0, 100), (1, 100)]
226
+
227
+ """
228
+
229
+ def __init__ (self , func = None , accept_sparse = True , kw_args = None ):
230
+ self .func = func
231
+ self .accept_sparse = accept_sparse
232
+ self .kw_args = kw_args
233
+ self .logger = logging .getLogger (__name__ )
234
+
235
+ def _check_X_y (self , X , y ):
236
+ if self .accept_sparse :
237
+ X , y = check_X_y (X , y , accept_sparse = ['csr' , 'csc' ])
238
+ else :
239
+ X , y = check_X_y (X , y , accept_sparse = False )
240
+ y = check_target_type (y )
241
+
242
+ return X , y
243
+
244
+ def fit (self , X , y ):
245
+ X , y = self ._check_X_y (X , y )
246
+ self .X_hash_ , self .y_hash_ = hash_X_y (X , y )
247
+ # when using a sampler, ratio_ is supposed to exist after fit
248
+ self .ratio_ = 'is_fitted'
249
+
250
+ return self
251
+
252
+ def _sample (self , X , y , func = None , kw_args = None ):
253
+ X , y = self ._check_X_y (X , y )
254
+ check_is_fitted (self , 'ratio_' )
255
+ X_hash , y_hash = hash_X_y (X , y )
256
+ if self .X_hash_ != X_hash or self .y_hash_ != y_hash :
257
+ raise RuntimeError ("X and y need to be same array earlier fitted." )
258
+
259
+ if func is None :
260
+ func = _identity
261
+
262
+ return func (X , y , ** (kw_args if self .kw_args else {}))
263
+
264
+ def sample (self , X , y ):
265
+ return self ._sample (X , y , func = self .func , kw_args = self .kw_args )
0 commit comments