23
23
from sklearn .externals .joblib import Parallel , delayed
24
24
25
25
26
- def _calc_score (selector , X , y , indices , ** fit_params ):
26
+ def _calc_score (selector , X , y , indices , groups = None , ** fit_params ):
27
27
if selector .cv :
28
28
scores = cross_val_score (selector .est_ ,
29
29
X [:, indices ], y ,
30
+ groups = groups ,
30
31
cv = selector .cv ,
31
32
scoring = selector .scorer ,
32
33
n_jobs = 1 ,
@@ -242,7 +243,7 @@ def set_params(self, **params):
242
243
self ._set_params ('estimator' , 'named_estimators' , ** params )
243
244
return self
244
245
245
- def fit (self , X , y , custom_feature_names = None , ** fit_params ):
246
+ def fit (self , X , y , custom_feature_names = None , groups = None , ** fit_params ):
246
247
"""Perform feature selection and learn model from training data.
247
248
248
249
Parameters
@@ -260,6 +261,9 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
260
261
Custom feature names for `self.k_feature_names` and
261
262
`self.subsets_[i]['feature_names']`.
262
263
(new in v 0.13.0)
264
+ groups : array-like, with shape (n_samples,), optional
265
+ Group labels for the samples used while splitting the dataset into
266
+ train/test set. Passed to the fit method of the cross-validator.
263
267
fit_params : dict of string -> object, optional
264
268
Parameters to pass to to the fit method of classifier.
265
269
@@ -291,8 +295,8 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
291
295
if not isinstance (self .k_features , int ) and \
292
296
not isinstance (self .k_features , tuple )\
293
297
and not isinstance (self .k_features , str ):
294
- raise AttributeError ('k_features must be a positive integer'
295
- ', tuple, or string' )
298
+ raise AttributeError ('k_features must be a positive integer'
299
+ ', tuple, or string' )
296
300
297
301
if (isinstance (self .k_features , int ) and (
298
302
self .k_features < 1 or self .k_features > X_ .shape [1 ])):
@@ -351,7 +355,8 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
351
355
k_to_select = min_k
352
356
k_idx = tuple (range (X_ .shape [1 ]))
353
357
k = len (k_idx )
354
- k_idx , k_score = _calc_score (self , X_ , y , k_idx , ** fit_params )
358
+ k_idx , k_score = _calc_score (self , X_ , y , k_idx ,
359
+ groups = groups , ** fit_params )
355
360
self .subsets_ [k ] = {
356
361
'feature_idx' : k_idx ,
357
362
'cv_scores' : k_score ,
@@ -370,6 +375,7 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
370
375
subset = prev_subset ,
371
376
X = X_ ,
372
377
y = y ,
378
+ groups = groups ,
373
379
** fit_params
374
380
)
375
381
else :
@@ -378,6 +384,7 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
378
384
feature_set = prev_subset ,
379
385
X = X_ ,
380
386
y = y ,
387
+ groups = groups ,
381
388
** fit_params
382
389
)
383
390
@@ -404,6 +411,7 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
404
411
fixed_feature = new_feature ,
405
412
X = X_ ,
406
413
y = y ,
414
+ groups = groups ,
407
415
** fit_params
408
416
)
409
417
@@ -413,6 +421,7 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
413
421
subset = set (k_idx ),
414
422
X = X_ ,
415
423
y = y ,
424
+ groups = groups ,
416
425
** fit_params
417
426
)
418
427
@@ -472,7 +481,7 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
472
481
X )
473
482
raise KeyboardInterrupt
474
483
475
- except KeyboardInterrupt as e :
484
+ except KeyboardInterrupt :
476
485
self .interrupted_ = True
477
486
sys .stderr .write ('\n STOPPING EARLY DUE TO KEYBOARD INTERRUPT...' )
478
487
@@ -512,7 +521,7 @@ def fit(self, X, y, custom_feature_names=None, **fit_params):
512
521
return self
513
522
514
523
def _inclusion (self , orig_set , subset , X , y , ignore_feature = None ,
515
- ** fit_params ):
524
+ groups = None , ** fit_params ):
516
525
all_avg_scores = []
517
526
all_cv_scores = []
518
527
all_subsets = []
@@ -526,7 +535,7 @@ def _inclusion(self, orig_set, subset, X, y, ignore_feature=None,
526
535
work = parallel (delayed (_calc_score )
527
536
(self , X , y ,
528
537
tuple (subset | {feature }),
529
- ** fit_params )
538
+ groups = groups , ** fit_params )
530
539
for feature in remaining
531
540
if feature != ignore_feature )
532
541
@@ -541,7 +550,8 @@ def _inclusion(self, orig_set, subset, X, y, ignore_feature=None,
541
550
all_cv_scores [best ])
542
551
return res
543
552
544
- def _exclusion (self , feature_set , X , y , fixed_feature = None , ** fit_params ):
553
+ def _exclusion (self , feature_set , X , y , fixed_feature = None ,
554
+ groups = None , ** fit_params ):
545
555
n = len (feature_set )
546
556
res = (None , None , None )
547
557
if n > 1 :
@@ -552,7 +562,8 @@ def _exclusion(self, feature_set, X, y, fixed_feature=None, **fit_params):
552
562
n_jobs = min (self .n_jobs , features )
553
563
parallel = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
554
564
pre_dispatch = self .pre_dispatch )
555
- work = parallel (delayed (_calc_score )(self , X , y , p , ** fit_params )
565
+ work = parallel (delayed (_calc_score )(self , X , y , p ,
566
+ groups = groups , ** fit_params )
556
567
for p in combinations (feature_set , r = n - 1 )
557
568
if not fixed_feature or fixed_feature in set (p ))
558
569
@@ -591,7 +602,7 @@ def transform(self, X):
591
602
X_ = X
592
603
return X_ [:, self .k_feature_idx_ ]
593
604
594
- def fit_transform (self , X , y , ** fit_params ):
605
+ def fit_transform (self , X , y , groups = None , ** fit_params ):
595
606
"""Fit to training data then reduce X to its most important features.
596
607
597
608
Parameters
@@ -605,6 +616,9 @@ def fit_transform(self, X, y, **fit_params):
605
616
Target values.
606
617
New in v 0.13.0: a pandas Series are now also accepted as
607
618
argument for y.
619
+ groups : array-like, with shape (n_samples,), optional
620
+ Group labels for the samples used while splitting the dataset into
621
+ train/test set. Passed to the fit method of the cross-validator.
608
622
fit_params : dict of string -> object, optional
609
623
Parameters to pass to to the fit method of classifier.
610
624
@@ -613,7 +627,7 @@ def fit_transform(self, X, y, **fit_params):
613
627
Reduced feature subset of X, shape={n_samples, k_features}
614
628
615
629
"""
616
- self .fit (X , y , ** fit_params )
630
+ self .fit (X , y , groups = groups , ** fit_params )
617
631
return self .transform (X )
618
632
619
633
def get_metric_dict (self , confidence_interval = 0.95 ):
0 commit comments