ValueError: fill value must be in categories #422

IFV · 2019-01-20T17:15:40Z

Hello you all!

I am having the error below on version 2.9.10 when trying to train a model following this https://auto-ml.readthedocs.io/en/latest/analytics.html:

**---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-106-c3c8faf1013e> in <module>()
      1 ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions)
      2 
----> 3 ml_predictor.train(train_subset)
      4 
      5 # Score the model on test data

~\Anaconda3\lib\site-packages\auto_ml\predictor.py in train(***failed resolving arguments***)
    650                         estimator_names = self._get_estimator_names()
    651 
--> 652                     X_df = self.fit_transformation_pipeline(X_df, y, estimator_names)
    653             else:
    654                 X_df = self.transformation_pipeline.transform(X_df)

~\Anaconda3\lib\site-packages\auto_ml\predictor.py in fit_transformation_pipeline(self, X_df, y, model_names)
    901 
    902         # We are intentionally overwriting X_df here to try to save some memory space
--> 903         X_df = ppl.fit_transform(X_df, y)
    904 
    905         self.transformation_pipeline = self._consolidate_pipeline(ppl)

~\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit_transform(self, X, y, **fit_params)
    281         Xt, fit_params = self._fit(X, y, **fit_params)
    282         if hasattr(last_step, 'fit_transform'):
--> 283             return last_step.fit_transform(Xt, y, **fit_params)
    284         elif last_step is None:
    285             return Xt

~\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
    518         else:
    519             # fit method of arity 2 (supervised transformation)
--> 520             return self.fit(X, y, **fit_params).transform(X)
    521 
    522 

~\Anaconda3\lib\site-packages\auto_ml\DataFrameVectorizer.py in transform(self, X, y)
    269 
    270     def transform(self, X, y=None):
--> 271         return self._transform(X)
    272 
    273     def get_feature_names(self):

~\Anaconda3\lib\site-packages\auto_ml\DataFrameVectorizer.py in _transform(self, X)
    177                     X[col] = 0
    178 
--> 179             X.fillna(0, inplace=True)
    180 
    181             for idx, col in enumerate(self.numerical_columns):

~\Anaconda3\lib\site-packages\pandas\core\frame.py in fillna(self, value, method, axis, inplace, limit, downcast, **kwargs)
   3788                      self).fillna(value=value, method=method, axis=axis,
   3789                                   inplace=inplace, limit=limit,
-> 3790                                   downcast=downcast, **kwargs)
   3791 
   3792     @Appender(_shared_docs['replace'] % _shared_doc_kwargs)

~\Anaconda3\lib\site-packages\pandas\core\generic.py in fillna(self, value, method, axis, inplace, limit, downcast)
   5425                 new_data = self._data.fillna(value=value, limit=limit,
   5426                                              inplace=inplace,
-> 5427                                              downcast=downcast)
   5428             elif isinstance(value, DataFrame) and self.ndim == 2:
   5429                 new_data = self.where(self.notna(), value)

~\Anaconda3\lib\site-packages\pandas\core\internals.py in fillna(self, **kwargs)
   3706 
   3707     def fillna(self, **kwargs):
-> 3708         return self.apply('fillna', **kwargs)
   3709 
   3710     def downcast(self, **kwargs):

~\Anaconda3\lib\site-packages\pandas\core\internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
   3579 
   3580             kwargs['mgr'] = self
-> 3581             applied = getattr(b, f)(**kwargs)
   3582             result_blocks = _extend_blocks(applied, result_blocks)
   3583 

~\Anaconda3\lib\site-packages\pandas\core\internals.py in fillna(self, value, limit, inplace, downcast, mgr)
   2004                mgr=None):
   2005         values = self.values if inplace else self.values.copy()
-> 2006         values = values.fillna(value=value, limit=limit)
   2007         return [self.make_block_same_class(values=values,
   2008                                            placement=self.mgr_locs,

~\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
    176                 else:
    177                     kwargs[new_arg_name] = new_arg_value
--> 178             return func(*args, **kwargs)
    179         return wrapper
    180     return _deprecate_kwarg

~\Anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in fillna(self, value, method, limit)
   1754             elif is_hashable(value):
   1755                 if not isna(value) and value not in self.categories:
-> 1756                     raise ValueError("fill value must be in categories")
   1757 
   1758                 mask = values == -1

ValueError: fill value must be in categories

**

Am I missing any pre-processing step?

column_descriptions = {
'F11': 'output',
'F0': 'categorical',
'F2': 'categorical'}

Part of the dataset (no missing values) here:

Feat0	Feat1	Feat2	Feat3	Feat4	Feat5	Feat6	Feat7	Feat8	Feat9	Feat10	Feat11	Feat12	Feat13	Feat14	Feat15
1	1	1	557	557	736	720	5068	99	83	51	16	0	209	5	43
1	1	1	713	715	912	858	4069	59	43	24	14	-2	122	4	31
1	1	1	723	720	945	944	290	142	144	126	1	3	762	5	11
1	1	1	757	755	843	840	3038	46	45	20	3	2	89	6	20
1	1	1	822	805	1143	1123	287	141	138	112	20	17	936	4	25
1	1	1	1047	1045	1151	1155	570	64	70	53	-4	2	325	5	6
1	1	1	1246	1250	2030	2020	244	284	270	260	10	-4	2161	6	18
1	1	1	1323	1317	1644	1637	544	141	140	114	7	6	868	6	21
1	1	1	1405	1320	1528	1435	3604	83	75	57	53	45	281	13	13
1	1	1	1406	1406	1516	1521	43	70	75	60	-5	0	399	2	8
1	1	1	1510	1510	1607	1609	958	57	59	36	-2	0	193	6	15
1	1	1	1512	1435	1806	1732	2042	114	117	98	34	37	700	6	10
1	1	1	1656	1655	1944	1932	1771	228	217	206	12	1	1324	6	16
1	1	1	1805	1805	1955	1955	678	50	50	30	0	0	162	8	12
1	1	1	1857	1855	2121	2140	1143	144	165	126	-19	2	861	4	14
1	1	1	1915	1658	1949	1725	2476	94	87	72	144	137	369	3	19
1	2	2	608	610	726	714	110	78	64	52	12	-2	258	4	22
1	2	2	610	615	714	739	419	124	144	107	-25	-5	737	4	13
1	2	2	617	620	734	751	2451	77	91	64	-17	-3	451	4	9
1	2	2	620	610	723	717	17	63	67	46	6	10	196	2	15
1	2	2	646	630	814	750	981	88	80	75	24	16	405	6	7
1	2	2	653	651	845	833	1507	172	162	149	12	2	929	11	12
1	2	2	657	700	1145	1158	616	168	178	146	-13	-3	1175	9	13
1	2	2	713	645	853	826	524	100	101	72	27	28	454	7	21
1	2	2	714	715	913	915	1295	119	120	99	-2	-1	631	5	15
1	2	2	953	952	1034	1042	1763	101	110	82	-8	1	589	10	9
1	2	2	1111	1115	1556	1600	1480	225	225	186	-4	-4	1520	11	28
1	2	2	1156	1200	1232	1233	786	36	33	19	-1	-4	55	8	9
1	2	2	1210	1205	1414	1340	5192	124	95	75	34	5	264	17	32
1	2	2	1231	1232	1227	1233	4064	56	61	30	-6	-1	157	21	5
1	2	2	1233	1236	1338	1337	1978	65	61	35	1	-3	190	22	8
1	2	2	1310	1305	1405	1405	34	55	60	44	0	5	276	2	9
1	2	2	1441	1420	1547	1529	707	66	69	46	18	21	237	3	17
1	2	2	1447	1450	1719	1717	2577	152	147	111	2	-3	798	27	14
1	2	2	1457	1500	1604	1616	6866	67	76	40	-12	-3	214	7	20
1	2	2	1500	1500	1650	1743	143	290	343	268	-53	0	2288	7	15
1	2	2	1545	1530	1646	1648	2445	181	198	164	-2	15	1235	6	11
1	2	2	1643	1645	1935	1929	866	112	104	94	6	-2	599	6	12
1	2	2	1745	1700	1925	1831	653	100	91	83	54	45	432	5	12
1	2	2	1747	1748	1817	1811	526	30	23	11	6	-1	32	3	16
1	2	2	1756	1740	2118	2100	1108	142	140	122	18	16	861	8	12
1	2	2	1820	1744	1945	1901	3511	85	77	60	44	36	228	2	23
1	2	2	1844	1845	2100	2055	814	136	130	109	5	-1	786	10	17
1	2	2	1924	1920	2035	2028	758	71	68	35	7	4	193	4	32
1	2	2	1955	1825	2121	1953	1402	86	88	69	88	90	430	3	14
1	2	2	2030	2010	2145	2128	5283	75	78	46	17	20	178	5	24
1	2	2	2155	2147	114	101	70	139	134	117	13	8	872	6	16
1	3	3	305	200	1024	922	936	259	262	237	62	65	1979	6	16
1	3	3	615	615	726	717	1366	71	62	49	9	0	247	9	13

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ValueError: fill value must be in categories #422

ValueError: fill value must be in categories #422

IFV commented Jan 20, 2019

ValueError: fill value must be in categories #422

ValueError: fill value must be in categories #422

Comments

IFV commented Jan 20, 2019