Skip to content

Commit aa070c7

Browse files
committed
Add a test with unicode data (to check Python 3)
1 parent 0f3b606 commit aa070c7

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

tests/test_data/cars.csv.gz

68.5 KB
Binary file not shown.

tests/test_dataframe_mapper.py

+20
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import pytest
22

33
from pandas import DataFrame
4+
import pandas as pd
45
from sklearn.datasets import load_iris
56
from sklearn.pipeline import Pipeline
67
from sklearn.svm import SVC
8+
from sklearn.feature_extraction.text import CountVectorizer
79
import numpy as np
810

911
from sklearn_pandas import (
@@ -27,6 +29,11 @@ def iris_dataframe():
2729
)
2830

2931

32+
@pytest.fixture
33+
def cars_dataframe():
34+
return pd.read_csv("tests/test_data/cars.csv.gz")
35+
36+
3037
def test_with_iris_dataframe(iris_dataframe):
3138
pipeline = Pipeline([
3239
("preprocess", DataFrameMapper([
@@ -42,3 +49,16 @@ def test_with_iris_dataframe(iris_dataframe):
4249
scores = cross_val_score(pipeline, data, labels)
4350
assert scores.mean() > 0.96
4451
assert (scores.std() * 2) < 0.04
52+
53+
54+
def test_with_car_dataframe(cars_dataframe):
55+
pipeline = Pipeline([
56+
("preprocess", DataFrameMapper([
57+
("description", CountVectorizer()),
58+
])),
59+
("classify", SVC(kernel='linear'))
60+
])
61+
data = cars_dataframe.drop("model", axis=1)
62+
labels = cars_dataframe["model"]
63+
scores = cross_val_score(pipeline, data, labels)
64+
assert scores.mean() > 0.30

0 commit comments

Comments
 (0)