Skip to content

Commit bea1915

Browse files
MattEdingglemaitre
authored andcommitted
MNT Pipeline Refactor - Reduce Code Footprint (#654)
1 parent a0ac84d commit bea1915

File tree

1 file changed

+22
-265
lines changed

1 file changed

+22
-265
lines changed

imblearn/pipeline.py

+22-265
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212
# Christos Aridas
1313
# Guillaume Lemaitre <[email protected]>
1414
# License: BSD
15-
1615
from sklearn import pipeline
1716
from sklearn.base import clone
18-
from sklearn.utils import Bunch, _print_elapsed_time
17+
from sklearn.utils import _print_elapsed_time
1918
from sklearn.utils.metaestimators import if_delegate_has_method
2019
from sklearn.utils.validation import check_memory
2120

@@ -145,7 +144,8 @@ def _validate_steps(self):
145144
):
146145
raise TypeError(
147146
"All intermediate steps of the chain should "
148-
"be estimators that implement fit and transform or sample."
147+
"be estimators that implement fit and transform or "
148+
"fit_resample."
149149
" '%s' implements both)" % (t)
150150
)
151151

@@ -167,6 +167,21 @@ def _validate_steps(self):
167167
% (estimator, type(estimator))
168168
)
169169

170+
def _iter(
171+
self, with_final=True, filter_passthrough=True, filter_resample=True
172+
):
173+
"""Generate (idx, (name, trans)) tuples from self.steps.
174+
175+
When `filter_passthrough` is `True`, 'passthrough' and None
176+
transformers are filtered out. When `filter_resample` is `True`,
177+
estimator with a method `fit_resample` are filtered out.
178+
"""
179+
it = super()._iter(with_final, filter_passthrough)
180+
if filter_resample:
181+
return filter(lambda x: not hasattr(x[-1], "fit_resample"), it)
182+
else:
183+
return it
184+
170185
# Estimator interface
171186

172187
def _fit(self, X, y=None, **fit_params):
@@ -175,7 +190,7 @@ def _fit(self, X, y=None, **fit_params):
175190
# Setup the memory
176191
memory = check_memory(self.memory)
177192

178-
fit_transform_one_cached = memory.cache(_fit_transform_one)
193+
fit_transform_one_cached = memory.cache(pipeline._fit_transform_one)
179194
fit_resample_one_cached = memory.cache(_fit_resample_one)
180195

181196
fit_params_steps = {
@@ -194,7 +209,8 @@ def _fit(self, X, y=None, **fit_params):
194209
for (step_idx,
195210
name,
196211
transformer) in self._iter(with_final=False,
197-
filter_passthrough=False):
212+
filter_passthrough=False,
213+
filter_resample=False):
198214
if (transformer is None or transformer == 'passthrough'):
199215
with _print_elapsed_time('Pipeline',
200216
self._log_message(step_idx)):
@@ -208,7 +224,7 @@ def _fit(self, X, y=None, **fit_params):
208224
else:
209225
cloned_transformer = clone(transformer)
210226
elif hasattr(memory, "cachedir"):
211-
# joblib < 0.11
227+
# joblib <= 0.11
212228
if memory.cachedir is None:
213229
# we do not clone when caching is disabled to
214230
# preserve backward compatibility
@@ -354,38 +370,6 @@ def fit_resample(self, X, y=None, **fit_params):
354370
elif hasattr(last_step, "fit_resample"):
355371
return last_step.fit_resample(Xt, yt, **fit_params)
356372

357-
@if_delegate_has_method(delegate="_final_estimator")
358-
def predict(self, X, **predict_params):
359-
"""Apply transformers/samplers to the data, and predict with the final
360-
estimator
361-
362-
Parameters
363-
----------
364-
X : iterable
365-
Data to predict on. Must fulfill input requirements of first step
366-
of the pipeline.
367-
368-
**predict_params : dict of string -> object
369-
Parameters to the ``predict`` called at the end of all
370-
transformations in the pipeline. Note that while this may be
371-
used to return uncertainties from some models with return_std
372-
or return_cov, uncertainties that are generated by the
373-
transformations in the pipeline are not propagated to the
374-
final estimator.
375-
376-
Returns
377-
-------
378-
y_pred : array-like
379-
380-
"""
381-
Xt = X
382-
for _, _, transform in self._iter(with_final=False):
383-
if hasattr(transform, "fit_resample"):
384-
pass
385-
else:
386-
Xt = transform.transform(Xt)
387-
return self.steps[-1][-1].predict(Xt, **predict_params)
388-
389373
@if_delegate_has_method(delegate="_final_estimator")
390374
def fit_predict(self, X, y=None, **fit_params):
391375
"""Applies fit_predict of last step in pipeline after transforms.
@@ -419,233 +403,6 @@ def fit_predict(self, X, y=None, **fit_params):
419403
y_pred = self.steps[-1][-1].fit_predict(Xt, yt, **fit_params)
420404
return y_pred
421405

422-
@if_delegate_has_method(delegate="_final_estimator")
423-
def predict_proba(self, X):
424-
"""Apply transformers/samplers, and predict_proba of the final
425-
estimator
426-
427-
Parameters
428-
----------
429-
X : iterable
430-
Data to predict on. Must fulfill input requirements of first step
431-
of the pipeline.
432-
433-
Returns
434-
-------
435-
y_proba : array-like, shape = [n_samples, n_classes]
436-
437-
"""
438-
Xt = X
439-
for _, _, transform in self._iter(with_final=False):
440-
if hasattr(transform, "fit_resample"):
441-
pass
442-
else:
443-
Xt = transform.transform(Xt)
444-
return self.steps[-1][-1].predict_proba(Xt)
445-
446-
@if_delegate_has_method(delegate="_final_estimator")
447-
def score_samples(self, X):
448-
"""Apply transforms, and score_samples of the final estimator.
449-
Parameters
450-
----------
451-
X : iterable
452-
Data to predict on. Must fulfill input requirements of first step
453-
of the pipeline.
454-
Returns
455-
-------
456-
y_score : ndarray, shape (n_samples,)
457-
"""
458-
Xt = X
459-
for _, _, transformer in self._iter(with_final=False):
460-
if hasattr(transformer, "fit_resample"):
461-
pass
462-
else:
463-
Xt = transformer.transform(Xt)
464-
return self.steps[-1][-1].score_samples(Xt)
465-
466-
@if_delegate_has_method(delegate="_final_estimator")
467-
def decision_function(self, X):
468-
"""Apply transformers/samplers, and decision_function of the final
469-
estimator
470-
471-
Parameters
472-
----------
473-
X : iterable
474-
Data to predict on. Must fulfill input requirements of first step
475-
of the pipeline.
476-
477-
Returns
478-
-------
479-
y_score : array-like, shape = [n_samples, n_classes]
480-
481-
"""
482-
Xt = X
483-
for _, _, transform in self._iter(with_final=False):
484-
if hasattr(transform, "fit_resample"):
485-
pass
486-
else:
487-
Xt = transform.transform(Xt)
488-
return self.steps[-1][-1].decision_function(Xt)
489-
490-
@if_delegate_has_method(delegate="_final_estimator")
491-
def predict_log_proba(self, X):
492-
"""Apply transformers/samplers, and predict_log_proba of the final
493-
estimator
494-
495-
Parameters
496-
----------
497-
X : iterable
498-
Data to predict on. Must fulfill input requirements of first step
499-
of the pipeline.
500-
501-
Returns
502-
-------
503-
y_score : array-like, shape = [n_samples, n_classes]
504-
505-
"""
506-
Xt = X
507-
for _, _, transform in self._iter(with_final=False):
508-
if hasattr(transform, "fit_resample"):
509-
pass
510-
else:
511-
Xt = transform.transform(Xt)
512-
return self.steps[-1][-1].predict_log_proba(Xt)
513-
514-
@property
515-
def transform(self):
516-
"""Apply transformers/samplers, and transform with the final estimator
517-
518-
This also works where final estimator is ``None``: all prior
519-
transformations are applied.
520-
521-
Parameters
522-
----------
523-
X : iterable
524-
Data to transform. Must fulfill input requirements of first step
525-
of the pipeline.
526-
527-
Returns
528-
-------
529-
Xt : array-like, shape = [n_samples, n_transformed_features]
530-
"""
531-
# _final_estimator is None or has transform, otherwise attribute error
532-
if self._final_estimator != "passthrough":
533-
self._final_estimator.transform
534-
return self._transform
535-
536-
def _transform(self, X):
537-
Xt = X
538-
for _, _, transform in self._iter():
539-
if hasattr(transform, "fit_resample"):
540-
pass
541-
else:
542-
Xt = transform.transform(Xt)
543-
return Xt
544-
545-
@property
546-
def inverse_transform(self):
547-
"""Apply inverse transformations in reverse order
548-
549-
All estimators in the pipeline must support ``inverse_transform``.
550-
551-
Parameters
552-
----------
553-
Xt : array-like, shape = [n_samples, n_transformed_features]
554-
Data samples, where ``n_samples`` is the number of samples and
555-
``n_features`` is the number of features. Must fulfill
556-
input requirements of last step of pipeline's
557-
``inverse_transform`` method.
558-
559-
Returns
560-
-------
561-
Xt : array-like, shape = [n_samples, n_features]
562-
"""
563-
# raise AttributeError if necessary for hasattr behaviour
564-
for _, _, transform in self._iter():
565-
transform.inverse_transform
566-
return self._inverse_transform
567-
568-
def _inverse_transform(self, X):
569-
Xt = X
570-
reverse_iter = reversed(list(self._iter()))
571-
for _, _, transform in reverse_iter:
572-
if hasattr(transform, "fit_resample"):
573-
pass
574-
else:
575-
Xt = transform.inverse_transform(Xt)
576-
return Xt
577-
578-
@if_delegate_has_method(delegate="_final_estimator")
579-
def score(self, X, y=None, sample_weight=None):
580-
"""Apply transformers/samplers, and score with the final estimator
581-
582-
Parameters
583-
----------
584-
X : iterable
585-
Data to predict on. Must fulfill input requirements of first step
586-
of the pipeline.
587-
588-
y : iterable, default=None
589-
Targets used for scoring. Must fulfill label requirements for all
590-
steps of the pipeline.
591-
592-
sample_weight : array-like, default=None
593-
If not None, this argument is passed as ``sample_weight`` keyword
594-
argument to the ``score`` method of the final estimator.
595-
596-
Returns
597-
-------
598-
score : float
599-
"""
600-
Xt = X
601-
for _, _, transform in self._iter(with_final=False):
602-
if hasattr(transform, "fit_resample"):
603-
pass
604-
else:
605-
Xt = transform.transform(Xt)
606-
score_params = {}
607-
if sample_weight is not None:
608-
score_params["sample_weight"] = sample_weight
609-
return self.steps[-1][-1].score(Xt, y, **score_params)
610-
611-
@if_delegate_has_method(delegate='_final_estimator')
612-
def score_samples(self, X):
613-
"""Apply transforms, and score_samples of the final estimator.
614-
Parameters
615-
----------
616-
X : iterable
617-
Data to predict on. Must fulfill input requirements of first step
618-
of the pipeline.
619-
Returns
620-
-------
621-
y_score : ndarray, shape (n_samples,)
622-
"""
623-
Xt = X
624-
for _, _, transformer in self._iter(with_final=False):
625-
if hasattr(transformer, "fit_resample"):
626-
pass
627-
else:
628-
Xt = transformer.transform(Xt)
629-
return self.steps[-1][-1].score_samples(Xt)
630-
631-
632-
def _fit_transform_one(transformer,
633-
X,
634-
y,
635-
weight,
636-
message_clsname='',
637-
message=None,
638-
**fit_params):
639-
with _print_elapsed_time(message_clsname, message):
640-
if hasattr(transformer, "fit_transform"):
641-
res = transformer.fit_transform(X, y, **fit_params)
642-
else:
643-
res = transformer.fit(X, y, **fit_params).transform(X)
644-
# if we have a weight for this transformer, multiply output
645-
if weight is None:
646-
return res, transformer
647-
return res * weight, transformer
648-
649406

650407
def _fit_resample_one(sampler,
651408
X,

0 commit comments

Comments
 (0)