1
1
from __future__ import print_function
2
2
3
+ import logging
3
4
import os
4
5
import sys
5
- import logging
6
6
7
- import pandas as pd
8
7
import numpy as np
8
+ import pandas as pd
9
9
10
10
file_path = os .path .dirname (os .path .realpath (__file__ ))
11
11
15
15
candle .set_parallelism_threads ()
16
16
17
17
additional_definitions = [
18
- {'name' : 'latent_dim' ,
19
- 'action' : 'store' ,
20
- 'type' : int ,
21
- 'help' : 'latent dimensions' },
22
- {'name' : 'residual' ,
23
- 'type' : candle .str2bool ,
24
- 'default' : False ,
25
- 'help' : 'add skip connections to the layers' },
26
- {'name' : 'reduce_lr' ,
27
- 'type' : candle .str2bool ,
28
- 'default' : False ,
29
- 'help' : 'reduce learning rate on plateau' },
30
- {'name' : 'warmup_lr' ,
31
- 'type' : candle .str2bool ,
32
- 'default' : False ,
33
- 'help' : 'gradually increase learning rate on start' },
34
- {'name' : 'base_lr' ,
35
- 'type' : float ,
36
- 'help' : 'base learning rate' },
37
- {'name' : 'epsilon_std' ,
38
- 'type' : float ,
39
- 'help' : 'epsilon std for sampling latent noise' },
40
- {'name' : 'use_cp' ,
41
- 'type' : candle .str2bool ,
42
- 'default' : False ,
43
- 'help' : 'checkpoint models with best val_loss' },
44
- {'name' : 'use_tb' ,
45
- 'type' : candle .str2bool ,
46
- 'default' : False ,
47
- 'help' : 'use tensorboard' },
48
- {'name' : 'tsne' ,
49
- 'type' : candle .str2bool ,
50
- 'default' : False ,
51
- 'help' : 'generate tsne plot of the latent representation' }
18
+ {"name" : "latent_dim" , "action" : "store" , "type" : int , "help" : "latent dimensions" },
19
+ {
20
+ "name" : "residual" ,
21
+ "type" : candle .str2bool ,
22
+ "default" : False ,
23
+ "help" : "add skip connections to the layers" ,
24
+ },
25
+ {
26
+ "name" : "reduce_lr" ,
27
+ "type" : candle .str2bool ,
28
+ "default" : False ,
29
+ "help" : "reduce learning rate on plateau" ,
30
+ },
31
+ {
32
+ "name" : "warmup_lr" ,
33
+ "type" : candle .str2bool ,
34
+ "default" : False ,
35
+ "help" : "gradually increase learning rate on start" ,
36
+ },
37
+ {"name" : "base_lr" , "type" : float , "help" : "base learning rate" },
38
+ {
39
+ "name" : "epsilon_std" ,
40
+ "type" : float ,
41
+ "help" : "epsilon std for sampling latent noise" ,
42
+ },
43
+ {
44
+ "name" : "use_cp" ,
45
+ "type" : candle .str2bool ,
46
+ "default" : False ,
47
+ "help" : "checkpoint models with best val_loss" ,
48
+ },
49
+ {
50
+ "name" : "use_tb" ,
51
+ "type" : candle .str2bool ,
52
+ "default" : False ,
53
+ "help" : "use tensorboard" ,
54
+ },
55
+ {
56
+ "name" : "tsne" ,
57
+ "type" : candle .str2bool ,
58
+ "default" : False ,
59
+ "help" : "generate tsne plot of the latent representation" ,
60
+ },
52
61
]
53
62
54
63
required = [
55
- ' activation' ,
56
- ' batch_size' ,
57
- ' dense' ,
58
- ' dropout' ,
59
- ' epochs' ,
60
- ' initialization' ,
61
- ' learning_rate' ,
62
- ' loss' ,
63
- ' optimizer' ,
64
- ' rng_seed' ,
65
- ' scaling' ,
66
- ' val_split' ,
67
- ' latent_dim' ,
68
- ' batch_normalization' ,
69
- ' epsilon_std' ,
70
- ' timeout'
64
+ " activation" ,
65
+ " batch_size" ,
66
+ " dense" ,
67
+ " dropout" ,
68
+ " epochs" ,
69
+ " initialization" ,
70
+ " learning_rate" ,
71
+ " loss" ,
72
+ " optimizer" ,
73
+ " rng_seed" ,
74
+ " scaling" ,
75
+ " val_split" ,
76
+ " latent_dim" ,
77
+ " batch_normalization" ,
78
+ " epsilon_std" ,
79
+ " timeout" ,
71
80
]
72
81
73
82
74
83
class BenchmarkAttn (candle .Benchmark ):
75
-
76
84
def set_locals (self ):
77
85
"""Functionality to set variables specific for the benchmark
78
86
- required: set of required parameters for the benchmark.
@@ -86,63 +94,65 @@ def set_locals(self):
86
94
self .additional_definitions = additional_definitions
87
95
88
96
89
- def extension_from_parameters (params , framework = '' ):
97
+ def extension_from_parameters (params , framework = "" ):
90
98
"""Construct string for saving model with annotation of parameters"""
91
99
ext = framework
92
- for i , n in enumerate (params [' dense' ]):
100
+ for i , n in enumerate (params [" dense" ]):
93
101
if n :
94
- ext += ' .D{}={}' .format (i + 1 , n )
95
- ext += ' .A={}' .format (params [' activation' ][0 ])
96
- ext += ' .B={}' .format (params [' batch_size' ])
97
- ext += ' .E={}' .format (params [' epochs' ])
98
- ext += ' .L={}' .format (params [' latent_dim' ])
99
- ext += ' .LR={}' .format (params [' learning_rate' ])
100
- ext += ' .S={}' .format (params [' scaling' ])
101
-
102
- if params [' epsilon_std' ] != 1.0 :
103
- ext += ' .EPS={}' .format (params [' epsilon_std' ])
104
- if params [' dropout' ]:
105
- ext += ' .DR={}' .format (params [' dropout' ])
106
- if params [' batch_normalization' ]:
107
- ext += ' .BN'
108
- if params [' warmup_lr' ]:
109
- ext += ' .WU_LR'
110
- if params [' reduce_lr' ]:
111
- ext += ' .Re_LR'
112
- if params [' residual' ]:
113
- ext += ' .Res'
102
+ ext += " .D{}={}" .format (i + 1 , n )
103
+ ext += " .A={}" .format (params [" activation" ][0 ])
104
+ ext += " .B={}" .format (params [" batch_size" ])
105
+ ext += " .E={}" .format (params [" epochs" ])
106
+ ext += " .L={}" .format (params [" latent_dim" ])
107
+ ext += " .LR={}" .format (params [" learning_rate" ])
108
+ ext += " .S={}" .format (params [" scaling" ])
109
+
110
+ if params [" epsilon_std" ] != 1.0 :
111
+ ext += " .EPS={}" .format (params [" epsilon_std" ])
112
+ if params [" dropout" ]:
113
+ ext += " .DR={}" .format (params [" dropout" ])
114
+ if params [" batch_normalization" ]:
115
+ ext += " .BN"
116
+ if params [" warmup_lr" ]:
117
+ ext += " .WU_LR"
118
+ if params [" reduce_lr" ]:
119
+ ext += " .Re_LR"
120
+ if params [" residual" ]:
121
+ ext += " .Res"
114
122
115
123
return ext
116
124
117
125
118
126
def load_data (params , seed ):
119
127
120
128
# start change #
121
- if params [' train_data' ].endswith ('h5' ) or params [' train_data' ].endswith (' hdf5' ):
122
- print (' processing h5 in file {}' .format (params [' train_data' ]))
129
+ if params [" train_data" ].endswith ("h5" ) or params [" train_data" ].endswith (" hdf5" ):
130
+ print (" processing h5 in file {}" .format (params [" train_data" ]))
123
131
124
- url = params ['data_url' ]
125
- file_train = params ['train_data' ]
126
- train_file = candle .get_file (file_train , url + file_train , cache_subdir = 'Pilot1' )
132
+ url = params ["data_url" ]
133
+ file_train = params ["train_data" ]
134
+ train_file = candle .get_file (
135
+ file_train , url + file_train , cache_subdir = "Pilot1"
136
+ )
127
137
128
- df_x_train_0 = pd .read_hdf (train_file , ' x_train_0' ).astype (np .float32 )
129
- df_x_train_1 = pd .read_hdf (train_file , ' x_train_1' ).astype (np .float32 )
138
+ df_x_train_0 = pd .read_hdf (train_file , " x_train_0" ).astype (np .float32 )
139
+ df_x_train_1 = pd .read_hdf (train_file , " x_train_1" ).astype (np .float32 )
130
140
X_train = pd .concat ([df_x_train_0 , df_x_train_1 ], axis = 1 , sort = False )
131
141
del df_x_train_0 , df_x_train_1
132
142
133
- df_x_test_0 = pd .read_hdf (train_file , ' x_test_0' ).astype (np .float32 )
134
- df_x_test_1 = pd .read_hdf (train_file , ' x_test_1' ).astype (np .float32 )
143
+ df_x_test_0 = pd .read_hdf (train_file , " x_test_0" ).astype (np .float32 )
144
+ df_x_test_1 = pd .read_hdf (train_file , " x_test_1" ).astype (np .float32 )
135
145
X_test = pd .concat ([df_x_test_0 , df_x_test_1 ], axis = 1 , sort = False )
136
146
del df_x_test_0 , df_x_test_1
137
147
138
- df_x_val_0 = pd .read_hdf (train_file , ' x_val_0' ).astype (np .float32 )
139
- df_x_val_1 = pd .read_hdf (train_file , ' x_val_1' ).astype (np .float32 )
148
+ df_x_val_0 = pd .read_hdf (train_file , " x_val_0" ).astype (np .float32 )
149
+ df_x_val_1 = pd .read_hdf (train_file , " x_val_1" ).astype (np .float32 )
140
150
X_val = pd .concat ([df_x_val_0 , df_x_val_1 ], axis = 1 , sort = False )
141
151
del df_x_val_0 , df_x_val_1
142
152
143
- Y_train = pd .read_hdf (train_file , ' y_train' )
144
- Y_test = pd .read_hdf (train_file , ' y_test' )
145
- Y_val = pd .read_hdf (train_file , ' y_val' )
153
+ Y_train = pd .read_hdf (train_file , " y_train" )
154
+ Y_test = pd .read_hdf (train_file , " y_test" )
155
+ Y_val = pd .read_hdf (train_file , " y_val" )
146
156
147
157
# assumes AUC is in the third column at index 2
148
158
# df_y = df['AUC'].astype('int')
@@ -152,36 +162,36 @@ def load_data(params, seed):
152
162
# scaler = StandardScaler()
153
163
# df_x = scaler.fit_transform(df_x)
154
164
else :
155
- print (' expecting in file file suffix h5' )
165
+ print (" expecting in file file suffix h5" )
156
166
sys .exit ()
157
167
158
- print (' x_train shape:' , X_train .shape )
159
- print (' x_test shape:' , X_test .shape )
168
+ print (" x_train shape:" , X_train .shape )
169
+ print (" x_test shape:" , X_test .shape )
160
170
161
171
return X_train , Y_train , X_val , Y_val , X_test , Y_test
162
172
163
173
# start change #
164
- if train_file .endswith ('h5' ) or train_file .endswith (' hdf5' ):
165
- print (' processing h5 in file {}' .format (train_file ))
174
+ if train_file .endswith ("h5" ) or train_file .endswith (" hdf5" ):
175
+ print (" processing h5 in file {}" .format (train_file ))
166
176
167
- df_x_train_0 = pd .read_hdf (train_file , ' x_train_0' ).astype (np .float32 )
168
- df_x_train_1 = pd .read_hdf (train_file , ' x_train_1' ).astype (np .float32 )
177
+ df_x_train_0 = pd .read_hdf (train_file , " x_train_0" ).astype (np .float32 )
178
+ df_x_train_1 = pd .read_hdf (train_file , " x_train_1" ).astype (np .float32 )
169
179
X_train = pd .concat ([df_x_train_0 , df_x_train_1 ], axis = 1 , sort = False )
170
180
del df_x_train_0 , df_x_train_1
171
181
172
- df_x_test_0 = pd .read_hdf (train_file , ' x_test_0' ).astype (np .float32 )
173
- df_x_test_1 = pd .read_hdf (train_file , ' x_test_1' ).astype (np .float32 )
182
+ df_x_test_0 = pd .read_hdf (train_file , " x_test_0" ).astype (np .float32 )
183
+ df_x_test_1 = pd .read_hdf (train_file , " x_test_1" ).astype (np .float32 )
174
184
X_test = pd .concat ([df_x_test_0 , df_x_test_1 ], axis = 1 , sort = False )
175
185
del df_x_test_0 , df_x_test_1
176
186
177
- df_x_val_0 = pd .read_hdf (train_file , ' x_val_0' ).astype (np .float32 )
178
- df_x_val_1 = pd .read_hdf (train_file , ' x_val_1' ).astype (np .float32 )
187
+ df_x_val_0 = pd .read_hdf (train_file , " x_val_0" ).astype (np .float32 )
188
+ df_x_val_1 = pd .read_hdf (train_file , " x_val_1" ).astype (np .float32 )
179
189
X_val = pd .concat ([df_x_val_0 , df_x_val_1 ], axis = 1 , sort = False )
180
190
del df_x_val_0 , df_x_val_1
181
191
182
- Y_train = pd .read_hdf (train_file , ' y_train' )
183
- Y_test = pd .read_hdf (train_file , ' y_test' )
184
- Y_val = pd .read_hdf (train_file , ' y_val' )
192
+ Y_train = pd .read_hdf (train_file , " y_train" )
193
+ Y_test = pd .read_hdf (train_file , " y_test" )
194
+ Y_val = pd .read_hdf (train_file , " y_val" )
185
195
186
196
# assumes AUC is in the third column at index 2
187
197
# df_y = df['AUC'].astype('int')
@@ -191,10 +201,10 @@ def load_data(params, seed):
191
201
# scaler = StandardScaler()
192
202
# df_x = scaler.fit_transform(df_x)
193
203
else :
194
- print (' expecting in file file suffix h5' )
204
+ print (" expecting in file file suffix h5" )
195
205
sys .exit ()
196
206
197
- print (' x_train shape:' , X_train .shape )
198
- print (' x_test shape:' , X_test .shape )
207
+ print (" x_train shape:" , X_train .shape )
208
+ print (" x_test shape:" , X_test .shape )
199
209
200
210
return X_train , Y_train , X_val , Y_val , X_test , Y_test
0 commit comments