forked from ZhiningLiu1998/self-paced-ensemble
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_example.py
123 lines (109 loc) · 4.45 KB
/
run_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
In this python script we provided an example of how to use our
implementation of ensemble methods to perform classification.
Usage:
```
python run_example.py --method=SPEnsemble --n_estimators=10 --runs=10
```
or with shortopts:
```
python run_example.py -m SPEnsemble -n 10 -r 10
```
run arguments:
-m / --methods: string
| Specify which method were used to build the ensemble classifier.
| support: 'SPEnsemble', 'SMOTEBoost', 'SMOTEBagging', 'RUSBoost', 'UnderBagging', 'Cascade'
-n / --n_estimators: integer
| Specify how much base estimators were used in the ensemble.
-r / --runs: integer
| Specify the number of independent runs (to obtain mean and std)
"""
from time import clock
import pandas as pd
import numpy as np
import sklearn
import warnings
warnings.filterwarnings("ignore")
from self_paced_ensemble import SelfPacedEnsemble
from canonical_ensemble import *
from utils import *
import argparse
from tqdm import trange
METHODS = ['SPEnsemble', 'SMOTEBoost', 'SMOTEBagging', 'RUSBoost', 'UnderBagging', 'Cascade']
RANDOM_STATE = 42
def parse():
'''Parse system arguments.'''
parser = argparse.ArgumentParser(
description='Self-paced Ensemble',
usage='run_example.py --method <method> --n_estimators <integer> --runs <integer>'
)
parser.add_argument('--method', type=str, default='SPEnsemble',
choices=METHODS+['all'], help='Name of ensmeble method')
parser.add_argument('--n_estimators', type=int, default=10, help='Number of base estimators')
parser.add_argument('--runs', type=int, default=10, help='Number of independent runs')
return parser.parse_args()
def init_model(method, base_estimator, n_estimators):
'''return a model specified by "method".'''
if method == 'SPEnsemble':
model = SelfPacedEnsemble(base_estimator = base_estimator, n_estimators = n_estimators)
elif method == 'SMOTEBoost':
model = SMOTEBoost(base_estimator = base_estimator, n_estimators = n_estimators)
elif method == 'SMOTEBagging':
model = SMOTEBagging(base_estimator = base_estimator, n_estimators = n_estimators)
elif method == 'RUSBoost':
model = RUSBoost(base_estimator = base_estimator, n_estimators = n_estimators)
elif method == 'UnderBagging':
model = UnderBagging(base_estimator = base_estimator, n_estimators = n_estimators)
elif method == 'Cascade':
model = BalanceCascade(base_estimator = base_estimator, n_estimators = n_estimators)
else:
raise Error('No such method support: {}'.format(method))
return model
def main():
# Parse arguments
args = parse()
method_used = args.method
n_estimators = args.n_estimators
runs = args.runs
# Load train/test data
X_train, X_test, y_train, y_test = load_covtype_dataset(
subset=0.1, random_state=RANDOM_STATE)
# Train & Record
method_list = METHODS if method_used=='all' else [method_used]
for method in method_list:
print('\nRunning method:\t\t{} - {} estimators in {} independent run(s) ...'.format(
method, n_estimators, runs))
# print('Running ...')
scores = []; times = []
try:
with trange(runs) as t:
for _ in t:
model = init_model(
method=method,
n_estimators=n_estimators,
base_estimator=sklearn.tree.DecisionTreeClassifier(),
)
start_time = clock()
model.fit(X_train, y_train)
times.append(clock()-start_time)
y_pred = model.predict_proba(X_test)[:, 1]
scores.append([
auc_prc(y_test, y_pred),
f1_optim(y_test, y_pred),
gm_optim(y_test, y_pred),
mcc_optim(y_test, y_pred)
])
except KeyboardInterrupt:
t.close()
raise
t.close
# Print results to console
print('ave_run_time:\t\t{:.3f}s'.format(np.mean(times)))
print('------------------------------')
print('Metrics:')
df_scores = pd.DataFrame(scores, columns=['AUCPRC', 'F1', 'G-mean', 'MCC'])
for metric in df_scores.columns.tolist():
print ('{}\tmean:{:.3f} std:{:.3f}'.format(metric, df_scores[metric].mean(), df_scores[metric].std()))
return
if __name__ == '__main__':
main()