7
7
import pandas as pd
8
8
import numpy as np
9
9
10
- from sklearn .metrics import mean_squared_error
11
- from sklearn .metrics import r2_score
12
- from scipy .stats .stats import pearsonr
13
-
14
10
file_path = os .path .dirname (os .path .realpath (__file__ ))
15
- #lib_path = os.path.abspath(os.path.join(file_path, '..'))
16
- #sys.path.append(lib_path)
17
11
lib_path2 = os .path .abspath (os .path .join (file_path , '..' , '..' , 'common' ))
18
12
sys .path .append (lib_path2 )
19
13
22
16
logger = logging .getLogger (__name__ )
23
17
candle .set_parallelism_threads ()
24
18
25
- additional_definitions = [
26
- {'name' :'latent_dim' ,
27
- 'action' :'store' ,
28
- 'type' : int ,
29
- 'help' :'latent dimensions' },
30
- {'name' :'residual' ,
31
- 'type' : candle .str2bool ,
32
- 'default' : False ,
33
- 'help' :'add skip connections to the layers' },
34
- {'name' :'reduce_lr' ,
35
- 'type' : candle .str2bool ,
36
- 'default' : False ,
37
- 'help' :'reduce learning rate on plateau' },
38
- {'name' :'warmup_lr' ,
39
- 'type' : candle .str2bool ,
40
- 'default' : False ,
41
- 'help' :'gradually increase learning rate on start' },
42
- {'name' :'base_lr' ,
43
- 'type' : float ,
44
- 'help' :'base learning rate' },
45
- {'name' :'epsilon_std' ,
46
- 'type' : float ,
47
- 'help' :'epsilon std for sampling latent noise' },
48
- {'name' :'use_cp' ,
49
- 'type' : candle .str2bool ,
50
- 'default' : False ,
51
- 'help' :'checkpoint models with best val_loss' },
52
- #{'name':'shuffle',
53
- #'type': candle.str2bool,
54
- #'default': False,
55
- #'help':'shuffle data'},
56
- {'name' :'use_tb' ,
57
- 'type' : candle .str2bool ,
58
- 'default' : False ,
59
- 'help' :'use tensorboard' },
60
- {'name' :'tsne' ,
61
- 'type' : candle .str2bool ,
62
- 'default' : False ,
63
- 'help' :'generate tsne plot of the latent representation' }
19
+ additional_definitions = [
20
+ {'name' : 'latent_dim' ,
21
+ 'action' : 'store' ,
22
+ 'type' : int ,
23
+ 'help' : 'latent dimensions' },
24
+ {'name' : 'residual' ,
25
+ 'type' : candle .str2bool ,
26
+ 'default' : False ,
27
+ 'help' : 'add skip connections to the layers' },
28
+ {'name' : 'reduce_lr' ,
29
+ 'type' : candle .str2bool ,
30
+ 'default' : False ,
31
+ 'help' : 'reduce learning rate on plateau' },
32
+ {'name' : 'warmup_lr' ,
33
+ 'type' : candle .str2bool ,
34
+ 'default' : False ,
35
+ 'help' : 'gradually increase learning rate on start' },
36
+ {'name' : 'base_lr' ,
37
+ 'type' : float ,
38
+ 'help' : 'base learning rate' },
39
+ {'name' : 'epsilon_std' ,
40
+ 'type' : float ,
41
+ 'help' : 'epsilon std for sampling latent noise' },
42
+ {'name' : 'use_cp' ,
43
+ 'type' : candle .str2bool ,
44
+ 'default' : False ,
45
+ 'help' : 'checkpoint models with best val_loss' },
46
+ {'name' : 'use_tb' ,
47
+ 'type' : candle .str2bool ,
48
+ 'default' : False ,
49
+ 'help' : 'use tensorboard' },
50
+ {'name' : 'tsne' ,
51
+ 'type' : candle .str2bool ,
52
+ 'default' : False ,
53
+ 'help' : 'generate tsne plot of the latent representation' }
64
54
]
65
55
66
56
required = [
80
70
'batch_normalization' ,
81
71
'epsilon_std' ,
82
72
'timeout'
83
- ]
73
+ ]
74
+
84
75
85
76
class BenchmarkAttn (candle .Benchmark ):
86
77
@@ -102,7 +93,7 @@ def extension_from_parameters(params, framework=''):
102
93
ext = framework
103
94
for i , n in enumerate (params ['dense' ]):
104
95
if n :
105
- ext += '.D{}={}' .format (i + 1 , n )
96
+ ext += '.D{}={}' .format (i + 1 , n )
106
97
ext += '.A={}' .format (params ['activation' ][0 ])
107
98
ext += '.B={}' .format (params ['batch_size' ])
108
99
ext += '.E={}' .format (params ['epochs' ])
@@ -124,15 +115,17 @@ def extension_from_parameters(params, framework=''):
124
115
ext += '.Res'
125
116
126
117
return ext
118
+
119
+
127
120
def load_data (params , seed ):
128
121
129
122
# start change #
130
123
if params ['train_data' ].endswith ('h5' ) or params ['train_data' ].endswith ('hdf5' ):
131
- print ('processing h5 in file {}' .format (params ['train_data' ]))
124
+ print ('processing h5 in file {}' .format (params ['train_data' ]))
132
125
133
126
url = params ['data_url' ]
134
127
file_train = params ['train_data' ]
135
- train_file = candle .get_file (file_train , url + file_train , cache_subdir = 'Pilot1' )
128
+ train_file = candle .get_file (file_train , url + file_train , cache_subdir = 'Pilot1' )
136
129
137
130
df_x_train_0 = pd .read_hdf (train_file , 'x_train_0' ).astype (np .float32 )
138
131
df_x_train_1 = pd .read_hdf (train_file , 'x_train_1' ).astype (np .float32 )
@@ -152,7 +145,7 @@ def load_data(params, seed):
152
145
Y_train = pd .read_hdf (train_file , 'y_train' )
153
146
Y_test = pd .read_hdf (train_file , 'y_test' )
154
147
Y_val = pd .read_hdf (train_file , 'y_val' )
155
-
148
+
156
149
# assumes AUC is in the third column at index 2
157
150
# df_y = df['AUC'].astype('int')
158
151
# df_x = df.iloc[:,3:].astype(np.float32)
@@ -161,18 +154,17 @@ def load_data(params, seed):
161
154
# scaler = StandardScaler()
162
155
# df_x = scaler.fit_transform(df_x)
163
156
else :
164
- print ('expecting in file file suffix h5' )
157
+ print ('expecting in file file suffix h5' )
165
158
sys .exit ()
166
-
167
-
159
+
168
160
print ('x_train shape:' , X_train .shape )
169
161
print ('x_test shape:' , X_test .shape )
170
-
162
+
171
163
return X_train , Y_train , X_val , Y_val , X_test , Y_test
172
164
173
165
# start change #
174
166
if train_file .endswith ('h5' ) or train_file .endswith ('hdf5' ):
175
- print ('processing h5 in file {}' .format (train_file ))
167
+ print ('processing h5 in file {}' .format (train_file ))
176
168
177
169
df_x_train_0 = pd .read_hdf (train_file , 'x_train_0' ).astype (np .float32 )
178
170
df_x_train_1 = pd .read_hdf (train_file , 'x_train_1' ).astype (np .float32 )
@@ -192,23 +184,19 @@ def load_data(params, seed):
192
184
Y_train = pd .read_hdf (train_file , 'y_train' )
193
185
Y_test = pd .read_hdf (train_file , 'y_test' )
194
186
Y_val = pd .read_hdf (train_file , 'y_val' )
195
-
187
+
196
188
# assumes AUC is in the third column at index 2
197
189
# df_y = df['AUC'].astype('int')
198
190
# df_x = df.iloc[:,3:].astype(np.float32)
199
191
200
192
# assumes dataframe has already been scaled
201
193
# scaler = StandardScaler()
202
194
# df_x = scaler.fit_transform(df_x)
203
-
204
195
else :
205
- print ('expecting in file file suffix h5' )
196
+ print ('expecting in file file suffix h5' )
206
197
sys .exit ()
207
-
208
-
198
+
209
199
print ('x_train shape:' , X_train .shape )
210
200
print ('x_test shape:' , X_test .shape )
211
-
212
- return X_train , Y_train , X_val , Y_val , X_test , Y_test
213
-
214
201
202
+ return X_train , Y_train , X_val , Y_val , X_test , Y_test
0 commit comments