-
Notifications
You must be signed in to change notification settings - Fork 260
/
Copy pathtest_boruta.py
67 lines (52 loc) · 1.84 KB
/
test_boruta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
import pandas as pd
import pytest
from sklearn.ensemble import RandomForestClassifier
from boruta import BorutaPy
@pytest.mark.parametrize("tree_n,expected", [(10, 44), (100, 141)])
def test_get_tree_num(tree_n, expected):
rfc = RandomForestClassifier(max_depth=10)
bt = BorutaPy(rfc)
assert bt._get_tree_num(tree_n) == expected
@pytest.fixture(scope="module")
def Xy():
np.random.seed(42)
y = np.random.binomial(1, 0.5, 1000)
X = np.zeros((1000, 10))
z = (y - np.random.binomial(1, 0.1, 1000) +
np.random.binomial(1, 0.1, 1000))
z[z == -1] = 0
z[z == 2] = 1
# 5 relevant features
X[:, 0] = z
X[:, 1] = (y * np.abs(np.random.normal(0, 1, 1000))
+ np.random.normal(0, 0.1, 1000))
X[:, 2] = y + np.random.normal(0, 1, 1000)
X[:, 3] = y**2 + np.random.normal(0, 1, 1000)
X[:, 4] = np.sqrt(y) + np.random.binomial(2, 0.1, 1000)
# 5 irrelevant features
X[:, 5] = np.random.normal(0, 1, 1000)
X[:, 6] = np.random.poisson(1, 1000)
X[:, 7] = np.random.binomial(1, 0.3, 1000)
X[:, 8] = np.random.normal(0, 1, 1000)
X[:, 9] = np.random.poisson(1, 1000)
return X, y
def test_if_boruta_extracts_relevant_features(Xy):
X, y = Xy
rfc = RandomForestClassifier()
bt = BorutaPy(rfc)
bt.fit(X, y)
assert list(range(5)) == list(np.where(bt.support_)[0])
def test_if_it_works_with_dataframe_input(Xy):
X, y = Xy
X_df, y_df = pd.DataFrame(X), pd.Series(y)
bt = BorutaPy(RandomForestClassifier())
bt.fit(X_df, y_df)
assert list(range(5)) == list(np.where(bt.support_)[0])
def test_dataframe_is_returned(Xy):
X, y = Xy
X_df, y_df = pd.DataFrame(X), pd.Series(y)
rfc = RandomForestClassifier()
bt = BorutaPy(rfc)
bt.fit(X_df, y_df)
assert isinstance(bt.transform(X_df, return_df=True), pd.DataFrame)