-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexperiment1.py
94 lines (65 loc) · 3.11 KB
/
experiment1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from pathlib import Path
from frozendict import frozendict
import numpy as np
from scipy import stats
from uplift.ensemble import RandomForestClassifier
from uplift.metrics import qini_q
import dataset
from json_store import JsonReader
from json_store import JsonWriter
experiment_folder = Path('experiment') / 'experiment1'
computed_jsons = experiment_folder / 'computed.jsonl'
def generator(frozen_rv, seed):
z = np.random.RandomState(seed=seed)
while True:
random_state = z.randint(low=0, high=2**32 - 1)
yield from frozen_rv.rvs(1000, random_state=random_state)
def power(base, generator_):
for g in generator_:
yield base ** g
def integer(generator_):
for g in generator_:
yield int(g)
hypergenerators = {'max_depth': integer(power(10, generator(stats.uniform(0, 3), seed=1))),
'min_samples_split': integer(power(10, generator(stats.uniform(0, 3), seed=2))),
'min_samples_leaf': integer(power(10, generator(stats.uniform(0, 3), seed=3)))}
hyperparameters = [{name: next(gen) for name, gen in hypergenerators.items()} for _ in range(1000)]
def parameters_to_compute():
for dataset_id in ['dataset1', 'dataset2']:
for shuffle_seed in range(9):
for n_estimators in [100, 1000]:
for criterion in ['uplift_gini', 'uplift_entropy']:
for params in hyperparameters:
yield {'dataset_id': dataset_id,
'shuffle_seed': shuffle_seed,
'n_estimators': n_estimators,
'criterion': criterion,
**params}
def compute_qini(parameters):
X_original, t_original, y_original = dataset.load(parameters['dataset_id'])
X, t, y = dataset.shuffled(X_original, t_original, y_original, seed=parameters['shuffle_seed'])
((X_train, t_train, y_train),
(X_test, t_test, y_test)) = dataset.train_test_split(X, t, y, train_proportion=2/3)
rfc = RandomForestClassifier(n_estimators=parameters['n_estimators'],
criterion=parameters['criterion'],
max_depth=parameters['max_depth'],
min_samples_split=parameters['min_samples_split'],
min_samples_leaf=parameters['min_samples_leaf'])
rfc.fit(X_train, y_train, t_train)
uplift_test = rfc.predict_uplift(X_test)
return qini_q(y_test, uplift_test, t_test)
if __name__ == '__main__':
if not experiment_folder.exists():
experiment_folder.mkdir(parents=True)
computed = set()
if computed_jsons.exists():
with computed_jsons.open() as f:
for computed_parameters in JsonReader(f):
del computed_parameters['qini']
computed.add(frozendict(computed_parameters))
for ps in parameters_to_compute():
if frozenset(ps) not in computed:
qini = compute_qini(ps)
with computed_jsons.open('a') as f:
JsonWriter(f).writerow({'qini': qini,
**ps})