|
| 1 | +import pytest |
| 2 | + |
| 3 | +from pandas import DataFrame |
| 4 | +from sklearn.datasets import load_iris |
| 5 | +from sklearn.preprocessing import LabelBinarizer |
| 6 | +from sklearn.svm import SVC |
| 7 | +import numpy as np |
| 8 | + |
| 9 | +from sklearn_pandas import ( |
| 10 | + DataFrameMapper, |
| 11 | + PassthroughTransformer, |
| 12 | + cross_val_score, |
| 13 | +) |
| 14 | + |
| 15 | +@pytest.fixture |
| 16 | +def iris_dataframe(): |
| 17 | + iris = load_iris() |
| 18 | + return DataFrame( |
| 19 | + data={ |
| 20 | + iris.feature_names[0]: iris.data[:,0], |
| 21 | + iris.feature_names[1]: iris.data[:,1], |
| 22 | + iris.feature_names[2]: iris.data[:,2], |
| 23 | + iris.feature_names[3]: iris.data[:,3], |
| 24 | + "species": np.array([iris.target_names[e] for e in iris.target]) |
| 25 | + } |
| 26 | + ) |
| 27 | + |
| 28 | +def test_with_iris_dataframe(iris_dataframe): |
| 29 | + pipeline = DataFrameMapper([ |
| 30 | + ("petal length (cm)", PassthroughTransformer()), |
| 31 | + ("petal width (cm)", PassthroughTransformer()), |
| 32 | + ("sepal length (cm)", PassthroughTransformer()), |
| 33 | + ("sepal width (cm)", PassthroughTransformer()), |
| 34 | + ("species", LabelBinarizer()), |
| 35 | + ]) |
| 36 | + data = iris_dataframe.drop("species", axis=1) |
| 37 | + labels = iris_dataframe["species"] |
| 38 | + clf = SVC(kernel='linear', C=1) |
| 39 | + scores = cross_val_score(clf, data, labels) |
| 40 | + assert scores.mean > 0.96 |
| 41 | + assert (scores.std() * 2) < 0.04 |
0 commit comments