Skip to content

Commit 6c28f1e

Browse files
committed
Alters feature 'combining' behavior, see issue scikit-learn-contrib#51.
1 parent d3fb586 commit 6c28f1e

File tree

1 file changed

+12
-9
lines changed

1 file changed

+12
-9
lines changed

sklearn_pandas/dataframe_mapper.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ def _build_transformer(transformers):
2727
return transformers
2828

2929

30+
def _sparse_to_dense(extracted):
31+
return [x.toarray() if sparse.issparse(x) else x for x in extracted]
32+
33+
3034
class DataFrameMapper(BaseEstimator, TransformerMixin):
3135
"""
3236
Map Pandas data frame column subsets to their own
@@ -120,13 +124,12 @@ def transform(self, X):
120124

121125
# If any of the extracted features is sparse, combine sparsely.
122126
# Otherwise, combine as normal arrays.
123-
if any(sparse.issparse(fea) for fea in extracted):
124-
stacked = sparse.hstack(extracted).tocsr()
125-
# return a sparse matrix only if the mapper was initialized
126-
# with sparse=True
127-
if not self.sparse:
128-
stacked = stacked.toarray()
127+
if self.sparse:
128+
if any(sparse.issparse(fea) for fea in extracted):
129+
# fails if array in extracted has dtype=object
130+
return sparse.hstack(extracted).tocsr()
131+
else:
132+
# convert to sparse
133+
return sparse.csr_matrix(np.hstack(extracted))
129134
else:
130-
stacked = np.hstack(extracted)
131-
132-
return stacked
135+
return np.hstack(_sparse_to_dense(extracted))

0 commit comments

Comments
 (0)