@@ -550,7 +550,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
550
550
y : array-like of shape (n_samples,)
551
551
Target values.
552
552
553
- classes : array-like of shape (n_classes), default=None
553
+ classes : array-like of shape (n_classes, ), default=None
554
554
List of all the classes that can possibly appear in the y vector.
555
555
556
556
Must be provided at the first call to partial_fit, can be omitted
@@ -569,16 +569,19 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
569
569
if _check_partial_fit_first_call (self , classes ):
570
570
# This is the first call to partial_fit:
571
571
# initialize various cumulative counters
572
- n_effective_classes = len (classes ) if len ( classes ) > 1 else 2
573
- self ._init_counters (n_effective_classes , n_features )
572
+ n_classes = len (classes )
573
+ self ._init_counters (n_classes , n_features )
574
574
self .n_features_ = n_features
575
575
elif n_features != self .n_features_ :
576
576
msg = "Number of features %d does not match previous data %d."
577
577
raise ValueError (msg % (n_features , self .n_features_ ))
578
578
579
579
Y = label_binarize (y , classes = self .classes_ )
580
580
if Y .shape [1 ] == 1 :
581
- Y = np .concatenate ((1 - Y , Y ), axis = 1 )
581
+ if len (self .classes_ ) == 2 :
582
+ Y = np .concatenate ((1 - Y , Y ), axis = 1 )
583
+ else : # degenerate case: just one class
584
+ Y = np .ones_like (Y )
582
585
583
586
if X .shape [0 ] != Y .shape [0 ]:
584
587
msg = "X.shape[0]=%d and y.shape[0]=%d are incompatible."
@@ -634,7 +637,10 @@ def fit(self, X, y, sample_weight=None):
634
637
Y = labelbin .fit_transform (y )
635
638
self .classes_ = labelbin .classes_
636
639
if Y .shape [1 ] == 1 :
637
- Y = np .concatenate ((1 - Y , Y ), axis = 1 )
640
+ if len (self .classes_ ) == 2 :
641
+ Y = np .concatenate ((1 - Y , Y ), axis = 1 )
642
+ else : # degenerate case: just one class
643
+ Y = np .ones_like (Y )
638
644
639
645
# LabelBinarizer().fit_transform() returns arrays with dtype=np.int64.
640
646
# We convert it to np.float64 to support sample_weight consistently;
@@ -649,18 +655,17 @@ def fit(self, X, y, sample_weight=None):
649
655
650
656
# Count raw events from data before updating the class log prior
651
657
# and feature log probas
652
- n_effective_classes = Y .shape [1 ]
653
-
654
- self ._init_counters (n_effective_classes , n_features )
658
+ n_classes = Y .shape [1 ]
659
+ self ._init_counters (n_classes , n_features )
655
660
self ._count (X , Y )
656
661
alpha = self ._check_alpha ()
657
662
self ._update_feature_log_prob (alpha )
658
663
self ._update_class_log_prior (class_prior = class_prior )
659
664
return self
660
665
661
- def _init_counters (self , n_effective_classes , n_features ):
662
- self .class_count_ = np .zeros (n_effective_classes , dtype = np .float64 )
663
- self .feature_count_ = np .zeros ((n_effective_classes , n_features ),
666
+ def _init_counters (self , n_classes , n_features ):
667
+ self .class_count_ = np .zeros (n_classes , dtype = np .float64 )
668
+ self .feature_count_ = np .zeros ((n_classes , n_features ),
664
669
dtype = np .float64 )
665
670
666
671
# mypy error: Decorated property not supported
@@ -714,7 +719,7 @@ class MultinomialNB(_BaseDiscreteNB):
714
719
Number of samples encountered for each class during fitting. This
715
720
value is weighted by the sample weight when provided.
716
721
717
- class_log_prior_ : ndarray of shape (n_classes, )
722
+ class_log_prior_ : ndarray of shape (n_classes,)
718
723
Smoothed empirical log probability for each class.
719
724
720
725
classes_ : ndarray of shape (n_classes,)
@@ -962,11 +967,11 @@ class BernoulliNB(_BaseDiscreteNB):
962
967
963
968
Attributes
964
969
----------
965
- class_count_ : ndarray of shape (n_classes)
970
+ class_count_ : ndarray of shape (n_classes, )
966
971
Number of samples encountered for each class during fitting. This
967
972
value is weighted by the sample weight when provided.
968
973
969
- class_log_prior_ : ndarray of shape (n_classes)
974
+ class_log_prior_ : ndarray of shape (n_classes, )
970
975
Log probability of each class (smoothed).
971
976
972
977
classes_ : ndarray of shape (n_classes,)
@@ -1053,8 +1058,8 @@ def _update_feature_log_prob(self, alpha):
1053
1058
1054
1059
def _joint_log_likelihood (self , X ):
1055
1060
"""Calculate the posterior log probability of the samples X"""
1056
- n_classes , n_features = self .feature_log_prob_ .shape
1057
- n_samples , n_features_X = X .shape
1061
+ n_features = self .feature_log_prob_ .shape [ 1 ]
1062
+ n_features_X = X .shape [ 1 ]
1058
1063
1059
1064
if n_features_X != n_features :
1060
1065
raise ValueError ("Expected input with %d features, got %d instead"
@@ -1173,7 +1178,7 @@ def fit(self, X, y, sample_weight=None):
1173
1178
y : array-like of shape (n_samples,)
1174
1179
Target values.
1175
1180
1176
- sample_weight : array-like of shape (n_samples), default=None
1181
+ sample_weight : array-like of shape (n_samples, ), default=None
1177
1182
Weights applied to individual samples (1. for unweighted).
1178
1183
1179
1184
Returns
@@ -1207,16 +1212,16 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
1207
1212
total number of categories for the given feature. This can, for
1208
1213
instance, be achieved with the help of OrdinalEncoder.
1209
1214
1210
- y : array-like of shape (n_samples)
1215
+ y : array-like of shape (n_samples, )
1211
1216
Target values.
1212
1217
1213
- classes : array-like of shape (n_classes), default=None
1218
+ classes : array-like of shape (n_classes, ), default=None
1214
1219
List of all the classes that can possibly appear in the y vector.
1215
1220
1216
1221
Must be provided at the first call to partial_fit, can be omitted
1217
1222
in subsequent calls.
1218
1223
1219
- sample_weight : array-like of shape (n_samples), default=None
1224
+ sample_weight : array-like of shape (n_samples, ), default=None
1220
1225
Weights applied to individual samples (1. for unweighted).
1221
1226
1222
1227
Returns
@@ -1241,9 +1246,9 @@ def _check_X_y(self, X, y):
1241
1246
check_non_negative (X , "CategoricalNB (input X)" )
1242
1247
return X , y
1243
1248
1244
- def _init_counters (self , n_effective_classes , n_features ):
1245
- self .class_count_ = np .zeros (n_effective_classes , dtype = np .float64 )
1246
- self .category_count_ = [np .zeros ((n_effective_classes , 0 ))
1249
+ def _init_counters (self , n_classes , n_features ):
1250
+ self .class_count_ = np .zeros (n_classes , dtype = np .float64 )
1251
+ self .category_count_ = [np .zeros ((n_classes , 0 ))
1247
1252
for _ in range (n_features )]
1248
1253
1249
1254
@staticmethod
0 commit comments