Skip to content

Commit 0314941

Browse files
committed
TST add minhash unit test
1 parent 6b89449 commit 0314941

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

tests/test_titanic.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22
import numpy as np
33
from pathlib import Path
4-
from playtime import feats, onehot, bag_of_words
4+
from playtime import feats, onehot, bag_of_words, minhash
55
import pandas as pd
66
import polars as pl
77
from sklearn.pipeline import make_pipeline
@@ -25,6 +25,14 @@ def test_onehot(df):
2525
assert onehot("sex", "pclass").fit_transform(df).shape[1] == 5
2626

2727

28+
@pytest.mark.parametrize("df", [pd.read_csv(titanic_path), pl.read_csv(titanic_path)])
29+
def test_minhash(df):
30+
print(df)
31+
assert minhash("sex", n_components=10).fit_transform(df).shape[1] == 10
32+
assert minhash("name", n_components=10).fit_transform(df).shape[1] == 10
33+
assert minhash("sex", "name", n_components=10).fit_transform(df).shape[1] == 20
34+
35+
2836
@pytest.mark.parametrize("df", [pd.read_csv(titanic_path), pl.read_csv(titanic_path)])
2937
def test_bow(df):
3038
assert bag_of_words("name").fit_transform(df).shape[1] > 10
@@ -58,3 +66,5 @@ def test_pipeline(df, feat_pipe):
5866
# Confirm that we can gridsearch too
5967
grid = GridSearchCV(full_pipe, {}, cv=2)
6068
assert grid.fit(df, y).predict(df).shape
69+
70+

0 commit comments

Comments
 (0)