12
12
# Christos Aridas
13
13
# Guillaume Lemaitre <[email protected] >
14
14
# License: BSD
15
-
16
15
from sklearn import pipeline
17
16
from sklearn .base import clone
18
- from sklearn .utils import Bunch , _print_elapsed_time
17
+ from sklearn .utils import _print_elapsed_time
19
18
from sklearn .utils .metaestimators import if_delegate_has_method
20
19
from sklearn .utils .validation import check_memory
21
20
@@ -145,7 +144,8 @@ def _validate_steps(self):
145
144
):
146
145
raise TypeError (
147
146
"All intermediate steps of the chain should "
148
- "be estimators that implement fit and transform or sample."
147
+ "be estimators that implement fit and transform or "
148
+ "fit_resample."
149
149
" '%s' implements both)" % (t )
150
150
)
151
151
@@ -167,6 +167,21 @@ def _validate_steps(self):
167
167
% (estimator , type (estimator ))
168
168
)
169
169
170
+ def _iter (
171
+ self , with_final = True , filter_passthrough = True , filter_resample = True
172
+ ):
173
+ """Generate (idx, (name, trans)) tuples from self.steps.
174
+
175
+ When `filter_passthrough` is `True`, 'passthrough' and None
176
+ transformers are filtered out. When `filter_resample` is `True`,
177
+ estimator with a method `fit_resample` are filtered out.
178
+ """
179
+ it = super ()._iter (with_final , filter_passthrough )
180
+ if filter_resample :
181
+ return filter (lambda x : not hasattr (x [- 1 ], "fit_resample" ), it )
182
+ else :
183
+ return it
184
+
170
185
# Estimator interface
171
186
172
187
def _fit (self , X , y = None , ** fit_params ):
@@ -175,7 +190,7 @@ def _fit(self, X, y=None, **fit_params):
175
190
# Setup the memory
176
191
memory = check_memory (self .memory )
177
192
178
- fit_transform_one_cached = memory .cache (_fit_transform_one )
193
+ fit_transform_one_cached = memory .cache (pipeline . _fit_transform_one )
179
194
fit_resample_one_cached = memory .cache (_fit_resample_one )
180
195
181
196
fit_params_steps = {
@@ -194,7 +209,8 @@ def _fit(self, X, y=None, **fit_params):
194
209
for (step_idx ,
195
210
name ,
196
211
transformer ) in self ._iter (with_final = False ,
197
- filter_passthrough = False ):
212
+ filter_passthrough = False ,
213
+ filter_resample = False ):
198
214
if (transformer is None or transformer == 'passthrough' ):
199
215
with _print_elapsed_time ('Pipeline' ,
200
216
self ._log_message (step_idx )):
@@ -208,7 +224,7 @@ def _fit(self, X, y=None, **fit_params):
208
224
else :
209
225
cloned_transformer = clone (transformer )
210
226
elif hasattr (memory , "cachedir" ):
211
- # joblib < 0.11
227
+ # joblib <= 0.11
212
228
if memory .cachedir is None :
213
229
# we do not clone when caching is disabled to
214
230
# preserve backward compatibility
@@ -354,38 +370,6 @@ def fit_resample(self, X, y=None, **fit_params):
354
370
elif hasattr (last_step , "fit_resample" ):
355
371
return last_step .fit_resample (Xt , yt , ** fit_params )
356
372
357
- @if_delegate_has_method (delegate = "_final_estimator" )
358
- def predict (self , X , ** predict_params ):
359
- """Apply transformers/samplers to the data, and predict with the final
360
- estimator
361
-
362
- Parameters
363
- ----------
364
- X : iterable
365
- Data to predict on. Must fulfill input requirements of first step
366
- of the pipeline.
367
-
368
- **predict_params : dict of string -> object
369
- Parameters to the ``predict`` called at the end of all
370
- transformations in the pipeline. Note that while this may be
371
- used to return uncertainties from some models with return_std
372
- or return_cov, uncertainties that are generated by the
373
- transformations in the pipeline are not propagated to the
374
- final estimator.
375
-
376
- Returns
377
- -------
378
- y_pred : array-like
379
-
380
- """
381
- Xt = X
382
- for _ , _ , transform in self ._iter (with_final = False ):
383
- if hasattr (transform , "fit_resample" ):
384
- pass
385
- else :
386
- Xt = transform .transform (Xt )
387
- return self .steps [- 1 ][- 1 ].predict (Xt , ** predict_params )
388
-
389
373
@if_delegate_has_method (delegate = "_final_estimator" )
390
374
def fit_predict (self , X , y = None , ** fit_params ):
391
375
"""Applies fit_predict of last step in pipeline after transforms.
@@ -419,233 +403,6 @@ def fit_predict(self, X, y=None, **fit_params):
419
403
y_pred = self .steps [- 1 ][- 1 ].fit_predict (Xt , yt , ** fit_params )
420
404
return y_pred
421
405
422
- @if_delegate_has_method (delegate = "_final_estimator" )
423
- def predict_proba (self , X ):
424
- """Apply transformers/samplers, and predict_proba of the final
425
- estimator
426
-
427
- Parameters
428
- ----------
429
- X : iterable
430
- Data to predict on. Must fulfill input requirements of first step
431
- of the pipeline.
432
-
433
- Returns
434
- -------
435
- y_proba : array-like, shape = [n_samples, n_classes]
436
-
437
- """
438
- Xt = X
439
- for _ , _ , transform in self ._iter (with_final = False ):
440
- if hasattr (transform , "fit_resample" ):
441
- pass
442
- else :
443
- Xt = transform .transform (Xt )
444
- return self .steps [- 1 ][- 1 ].predict_proba (Xt )
445
-
446
- @if_delegate_has_method (delegate = "_final_estimator" )
447
- def score_samples (self , X ):
448
- """Apply transforms, and score_samples of the final estimator.
449
- Parameters
450
- ----------
451
- X : iterable
452
- Data to predict on. Must fulfill input requirements of first step
453
- of the pipeline.
454
- Returns
455
- -------
456
- y_score : ndarray, shape (n_samples,)
457
- """
458
- Xt = X
459
- for _ , _ , transformer in self ._iter (with_final = False ):
460
- if hasattr (transformer , "fit_resample" ):
461
- pass
462
- else :
463
- Xt = transformer .transform (Xt )
464
- return self .steps [- 1 ][- 1 ].score_samples (Xt )
465
-
466
- @if_delegate_has_method (delegate = "_final_estimator" )
467
- def decision_function (self , X ):
468
- """Apply transformers/samplers, and decision_function of the final
469
- estimator
470
-
471
- Parameters
472
- ----------
473
- X : iterable
474
- Data to predict on. Must fulfill input requirements of first step
475
- of the pipeline.
476
-
477
- Returns
478
- -------
479
- y_score : array-like, shape = [n_samples, n_classes]
480
-
481
- """
482
- Xt = X
483
- for _ , _ , transform in self ._iter (with_final = False ):
484
- if hasattr (transform , "fit_resample" ):
485
- pass
486
- else :
487
- Xt = transform .transform (Xt )
488
- return self .steps [- 1 ][- 1 ].decision_function (Xt )
489
-
490
- @if_delegate_has_method (delegate = "_final_estimator" )
491
- def predict_log_proba (self , X ):
492
- """Apply transformers/samplers, and predict_log_proba of the final
493
- estimator
494
-
495
- Parameters
496
- ----------
497
- X : iterable
498
- Data to predict on. Must fulfill input requirements of first step
499
- of the pipeline.
500
-
501
- Returns
502
- -------
503
- y_score : array-like, shape = [n_samples, n_classes]
504
-
505
- """
506
- Xt = X
507
- for _ , _ , transform in self ._iter (with_final = False ):
508
- if hasattr (transform , "fit_resample" ):
509
- pass
510
- else :
511
- Xt = transform .transform (Xt )
512
- return self .steps [- 1 ][- 1 ].predict_log_proba (Xt )
513
-
514
- @property
515
- def transform (self ):
516
- """Apply transformers/samplers, and transform with the final estimator
517
-
518
- This also works where final estimator is ``None``: all prior
519
- transformations are applied.
520
-
521
- Parameters
522
- ----------
523
- X : iterable
524
- Data to transform. Must fulfill input requirements of first step
525
- of the pipeline.
526
-
527
- Returns
528
- -------
529
- Xt : array-like, shape = [n_samples, n_transformed_features]
530
- """
531
- # _final_estimator is None or has transform, otherwise attribute error
532
- if self ._final_estimator != "passthrough" :
533
- self ._final_estimator .transform
534
- return self ._transform
535
-
536
- def _transform (self , X ):
537
- Xt = X
538
- for _ , _ , transform in self ._iter ():
539
- if hasattr (transform , "fit_resample" ):
540
- pass
541
- else :
542
- Xt = transform .transform (Xt )
543
- return Xt
544
-
545
- @property
546
- def inverse_transform (self ):
547
- """Apply inverse transformations in reverse order
548
-
549
- All estimators in the pipeline must support ``inverse_transform``.
550
-
551
- Parameters
552
- ----------
553
- Xt : array-like, shape = [n_samples, n_transformed_features]
554
- Data samples, where ``n_samples`` is the number of samples and
555
- ``n_features`` is the number of features. Must fulfill
556
- input requirements of last step of pipeline's
557
- ``inverse_transform`` method.
558
-
559
- Returns
560
- -------
561
- Xt : array-like, shape = [n_samples, n_features]
562
- """
563
- # raise AttributeError if necessary for hasattr behaviour
564
- for _ , _ , transform in self ._iter ():
565
- transform .inverse_transform
566
- return self ._inverse_transform
567
-
568
- def _inverse_transform (self , X ):
569
- Xt = X
570
- reverse_iter = reversed (list (self ._iter ()))
571
- for _ , _ , transform in reverse_iter :
572
- if hasattr (transform , "fit_resample" ):
573
- pass
574
- else :
575
- Xt = transform .inverse_transform (Xt )
576
- return Xt
577
-
578
- @if_delegate_has_method (delegate = "_final_estimator" )
579
- def score (self , X , y = None , sample_weight = None ):
580
- """Apply transformers/samplers, and score with the final estimator
581
-
582
- Parameters
583
- ----------
584
- X : iterable
585
- Data to predict on. Must fulfill input requirements of first step
586
- of the pipeline.
587
-
588
- y : iterable, default=None
589
- Targets used for scoring. Must fulfill label requirements for all
590
- steps of the pipeline.
591
-
592
- sample_weight : array-like, default=None
593
- If not None, this argument is passed as ``sample_weight`` keyword
594
- argument to the ``score`` method of the final estimator.
595
-
596
- Returns
597
- -------
598
- score : float
599
- """
600
- Xt = X
601
- for _ , _ , transform in self ._iter (with_final = False ):
602
- if hasattr (transform , "fit_resample" ):
603
- pass
604
- else :
605
- Xt = transform .transform (Xt )
606
- score_params = {}
607
- if sample_weight is not None :
608
- score_params ["sample_weight" ] = sample_weight
609
- return self .steps [- 1 ][- 1 ].score (Xt , y , ** score_params )
610
-
611
- @if_delegate_has_method (delegate = '_final_estimator' )
612
- def score_samples (self , X ):
613
- """Apply transforms, and score_samples of the final estimator.
614
- Parameters
615
- ----------
616
- X : iterable
617
- Data to predict on. Must fulfill input requirements of first step
618
- of the pipeline.
619
- Returns
620
- -------
621
- y_score : ndarray, shape (n_samples,)
622
- """
623
- Xt = X
624
- for _ , _ , transformer in self ._iter (with_final = False ):
625
- if hasattr (transformer , "fit_resample" ):
626
- pass
627
- else :
628
- Xt = transformer .transform (Xt )
629
- return self .steps [- 1 ][- 1 ].score_samples (Xt )
630
-
631
-
632
- def _fit_transform_one (transformer ,
633
- X ,
634
- y ,
635
- weight ,
636
- message_clsname = '' ,
637
- message = None ,
638
- ** fit_params ):
639
- with _print_elapsed_time (message_clsname , message ):
640
- if hasattr (transformer , "fit_transform" ):
641
- res = transformer .fit_transform (X , y , ** fit_params )
642
- else :
643
- res = transformer .fit (X , y , ** fit_params ).transform (X )
644
- # if we have a weight for this transformer, multiply output
645
- if weight is None :
646
- return res , transformer
647
- return res * weight , transformer
648
-
649
406
650
407
def _fit_resample_one (sampler ,
651
408
X ,
0 commit comments