Skip to content

Commit d1e8fae

Browse files
authored
Merge pull request #165 from scikit-learn-contrib/AssafBenDavid-master
Better treatment of unicode feature names in `get_names`
2 parents c5e4ae1 + 68d6b4c commit d1e8fae

File tree

3 files changed

+13
-1
lines changed

3 files changed

+13
-1
lines changed

README.rst

+2
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,7 @@ Changelog
411411

412412
Development
413413
******************
414+
* Fix issues with unicode names in ``get_names`` (#160).
414415
* Update to build using ``numpy==1.14`` and ``python==3.6`` (#154).
415416
* Add ``strategy`` and ``replacement`` parameters to ``CategoricalImputer`` to allow imputing
416417
with values other than the mode (#144).
@@ -495,6 +496,7 @@ Other contributors:
495496

496497
* Ariel Rossanigo (@arielrossanigo)
497498
* Arnau Gil Amat (@arnau126)
499+
* Assaf Ben-David (@AssafBenDavid)
498500
* Cal Paterson (@calpaterson)
499501
* @defvorfu
500502
* Gustavo Sena Mafra (@gsmafra)

sklearn_pandas/dataframe_mapper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def get_names(self, columns, transformer, x, alias=None):
253253
else:
254254
names = _get_feature_names(transformer)
255255
if names is not None and len(names) == num_cols:
256-
return [name + '_' + str(o) for o in names]
256+
return ['%s_%s' % (name, o) for o in names]
257257
# otherwise, return name concatenated with '_1', '_2', etc.
258258
else:
259259
return [name + '_' + str(o) for o in range(num_cols)]

tests/test_dataframe_mapper.py

+10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# -*- coding: utf8 -*-
2+
13
import pytest
24
from pkg_resources import parse_version
35

@@ -128,6 +130,14 @@ def test_transformed_names_binarizer(complex_dataframe):
128130
assert mapper.transformed_names_ == ['target_a', 'target_b', 'target_c']
129131

130132

133+
def test_transformed_names_binarizer_unicode():
134+
df = pd.DataFrame({'target': [u'ñ', u'á', u'é']})
135+
mapper = DataFrameMapper([('target', LabelBinarizer())])
136+
mapper.fit_transform(df)
137+
expected_names = {u'target_ñ', u'target_á', u'target_é'}
138+
assert set(mapper.transformed_names_) == expected_names
139+
140+
131141
def test_transformed_names_transformers_list(complex_dataframe):
132142
"""
133143
When using a list of transformers, use them in inverse order to get the

0 commit comments

Comments
 (0)