1
1
import pytest
2
2
3
3
from pandas import DataFrame
4
+ import pandas as pd
4
5
from sklearn .datasets import load_iris
5
6
from sklearn .pipeline import Pipeline
6
7
from sklearn .svm import SVC
8
+ from sklearn .feature_extraction .text import CountVectorizer
7
9
import numpy as np
8
10
9
11
from sklearn_pandas import (
@@ -27,6 +29,11 @@ def iris_dataframe():
27
29
)
28
30
29
31
32
+ @pytest .fixture
33
+ def cars_dataframe ():
34
+ return pd .read_csv ("tests/test_data/cars.csv.gz" )
35
+
36
+
30
37
def test_with_iris_dataframe (iris_dataframe ):
31
38
pipeline = Pipeline ([
32
39
("preprocess" , DataFrameMapper ([
@@ -42,3 +49,16 @@ def test_with_iris_dataframe(iris_dataframe):
42
49
scores = cross_val_score (pipeline , data , labels )
43
50
assert scores .mean () > 0.96
44
51
assert (scores .std () * 2 ) < 0.04
52
+
53
+
54
+ def test_with_car_dataframe (cars_dataframe ):
55
+ pipeline = Pipeline ([
56
+ ("preprocess" , DataFrameMapper ([
57
+ ("description" , CountVectorizer ()),
58
+ ])),
59
+ ("classify" , SVC (kernel = 'linear' ))
60
+ ])
61
+ data = cars_dataframe .drop ("model" , axis = 1 )
62
+ labels = cars_dataframe ["model" ]
63
+ scores = cross_val_score (pipeline , data , labels )
64
+ assert scores .mean () > 0.30
0 commit comments