-
Notifications
You must be signed in to change notification settings - Fork 71
/
Copy pathAuto_Run.py
148 lines (126 loc) · 4.95 KB
/
Auto_Run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import pandas as pd
import numpy as np
import yaml
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from FS.pso import jfs as jfs_pso
from FS.ga import jfs as jfs_ga
from FS.de import jfs as jfs_de
from FS.ba import jfs as jfs_ba
from FS.cs import jfs as jfs_cs
from FS.fa import jfs as jfs_fa
from FS.fpa import jfs as jfs_fpa
from FS.sca import jfs as jfs_sca
from FS.woa import jfs as jfs_woa
class DataPipeline(object):
"""
Data Model with Pipeline to Process and Feature Select.
"""
def __init__(self, _ori_data_path:str, _train_test_ratio:float, _str_read_meta_para:str) -> None:
"""
para1:_ori_data_path:Read_RawData_Path(CSV_Format) => Convert to DataFrame
para2:_train_test_ratio:Split DataSet Ratio
"""
self._ori_data_path = _ori_data_path
self._train_test_ratio = _train_test_ratio
self._str_read_meta_para = _str_read_meta_para
self.data = None
self.feat = None
self.label = None
self.fold = None # Contain Splited Train/Test Data
self.meta_para = None
self.sf = None # Select_Feature
self.fmdl = None # Feature Model
def _load_ori_file(self, _ori_path) -> None:
'''
Load Raw Data File By CSV Path
'''
self.data = pd.read_csv(_ori_path)
self.data = self.data.values
#All Feature
self.feat = np.asarray(self.data[:, 0:-1])
#Predict Y Value
self.label = np.asarray(self.data[:, -1])
def _data_split(self, ratio:float) -> None:
'''
Setting Train/Test Ratio
'''
xtrain, xtest, ytrain, ytest = train_test_split(self.feat, self.label, test_size=ratio, stratify=self.label)
self.fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}
def _read_algo_parameter(self, _str_read_meta_para:str) ->None:
'''
Read Meta Para File
'''
with open('algo_para.yaml', 'r') as _str_read_meta_para:
self.meta_para = yaml.full_load(_str_read_meta_para)['meta_para']
def _build_fmdl(self, algo_name):
'''
Construct Each Meta Model
'''
interface_fmdl = {'pso': jfs_pso,'ga':jfs_ga, 'de':jfs_de, 'ba':jfs_ba, 'cs':jfs_cs, 'fa': jfs_fa, 'fpa':jfs_fpa, 'sca':jfs_sca, 'woa':jfs_woa}
return interface_fmdl[algo_name]
def _process_feature_select(self, strMetaName:str, listMetaOpts:list) -> None:
"""
Prepare MetaModel and X Feature Data
"""
# perform feature selection
## Append Fold Data Into OPTS
listMetaOpts['fold'] = self.fold
#self.fmdl = jfs_ga(self.feat, self.label, listMetaOpts)
self.fmdl = self._build_fmdl(strMetaName)(self.feat, self.label, listMetaOpts)
self.sf = self.fmdl['sf']
def _data_feature_select(self, strMetaName:str, listMetaOpts:list)->None:
"""
model with selected features
"""
num_train = np.size(self.fold['xt'], 0)
num_valid = np.size(self.fold['xv'], 0)
x_train = self.fold['xt'][:, self.sf]
y_train = self.fold['yt'].reshape(num_train) # Solve bug
x_valid = self.fold['xv'][:, self.sf]
y_valid = self.fold['yv'].reshape(num_valid) # Solve bug
mdl = KNeighborsClassifier(n_neighbors = listMetaOpts['k'])
mdl.fit(x_train, y_train)
# accuracy
y_pred = mdl.predict(x_valid)
Acc = np.sum(y_valid == y_pred) / num_valid
print("Accuracy:", 100 * Acc)
# number of selected features
num_feat = self.fmdl['nf']
print("Feature Size:", num_feat)
def _plot_converege(self, strMetaName:str, listMetaOpts:list)-> None:
'''
plot convergence
'''
curve = self.fmdl['c']
curve = curve.reshape(np.size(curve,1))
x = np.arange(0, listMetaOpts['T'], 1.0) + 1.0
fig, ax = plt.subplots()
ax.plot(x, curve, 'o-')
ax.set_xlabel('Number of Iterations')
ax.set_ylabel('Fitness')
ax.set_title(strMetaName)
ax.grid()
plt.show()
def proceed(self)->None:
'''
Execute Function
'''
self._load_ori_file(self._ori_data_path)
self._data_split(self._train_test_ratio)
self._read_algo_parameter(self._str_read_meta_para)
for meta in self.meta_para:
print(meta['name'])
print(meta['opts'])
self._process_feature_select(meta['name'], meta['opts'])
self._data_feature_select(meta['name'], meta['opts'])
self._plot_converege(meta['name'], meta['opts'])
def main():
str_read_file_path = './ionosphere.csv'
str_read_meta_para = './algo_para.yaml'
float_split_ratio = 0.3
auto_run = DataPipeline(str_read_file_path, float_split_ratio, str_read_meta_para)
auto_run.proceed()
if __name__ == '__main__':
main()