77import pandas as pd
88import numpy as np
99
10- from sklearn .metrics import mean_squared_error
11- from sklearn .metrics import r2_score
12- from scipy .stats .stats import pearsonr
13-
1410file_path = os .path .dirname (os .path .realpath (__file__ ))
15- #lib_path = os.path.abspath(os.path.join(file_path, '..'))
16- #sys.path.append(lib_path)
1711lib_path2 = os .path .abspath (os .path .join (file_path , '..' , '..' , 'common' ))
1812sys .path .append (lib_path2 )
1913
2216logger = logging .getLogger (__name__ )
2317candle .set_parallelism_threads ()
2418
25- additional_definitions = [
26- {'name' :'latent_dim' ,
27- 'action' :'store' ,
28- 'type' : int ,
29- 'help' :'latent dimensions' },
30- {'name' :'residual' ,
31- 'type' : candle .str2bool ,
32- 'default' : False ,
33- 'help' :'add skip connections to the layers' },
34- {'name' :'reduce_lr' ,
35- 'type' : candle .str2bool ,
36- 'default' : False ,
37- 'help' :'reduce learning rate on plateau' },
38- {'name' :'warmup_lr' ,
39- 'type' : candle .str2bool ,
40- 'default' : False ,
41- 'help' :'gradually increase learning rate on start' },
42- {'name' :'base_lr' ,
43- 'type' : float ,
44- 'help' :'base learning rate' },
45- {'name' :'epsilon_std' ,
46- 'type' : float ,
47- 'help' :'epsilon std for sampling latent noise' },
48- {'name' :'use_cp' ,
49- 'type' : candle .str2bool ,
50- 'default' : False ,
51- 'help' :'checkpoint models with best val_loss' },
52- #{'name':'shuffle',
53- #'type': candle.str2bool,
54- #'default': False,
55- #'help':'shuffle data'},
56- {'name' :'use_tb' ,
57- 'type' : candle .str2bool ,
58- 'default' : False ,
59- 'help' :'use tensorboard' },
60- {'name' :'tsne' ,
61- 'type' : candle .str2bool ,
62- 'default' : False ,
63- 'help' :'generate tsne plot of the latent representation' }
19+ additional_definitions = [
20+ {'name' : 'latent_dim' ,
21+ 'action' : 'store' ,
22+ 'type' : int ,
23+ 'help' : 'latent dimensions' },
24+ {'name' : 'residual' ,
25+ 'type' : candle .str2bool ,
26+ 'default' : False ,
27+ 'help' : 'add skip connections to the layers' },
28+ {'name' : 'reduce_lr' ,
29+ 'type' : candle .str2bool ,
30+ 'default' : False ,
31+ 'help' : 'reduce learning rate on plateau' },
32+ {'name' : 'warmup_lr' ,
33+ 'type' : candle .str2bool ,
34+ 'default' : False ,
35+ 'help' : 'gradually increase learning rate on start' },
36+ {'name' : 'base_lr' ,
37+ 'type' : float ,
38+ 'help' : 'base learning rate' },
39+ {'name' : 'epsilon_std' ,
40+ 'type' : float ,
41+ 'help' : 'epsilon std for sampling latent noise' },
42+ {'name' : 'use_cp' ,
43+ 'type' : candle .str2bool ,
44+ 'default' : False ,
45+ 'help' : 'checkpoint models with best val_loss' },
46+ {'name' : 'use_tb' ,
47+ 'type' : candle .str2bool ,
48+ 'default' : False ,
49+ 'help' : 'use tensorboard' },
50+ {'name' : 'tsne' ,
51+ 'type' : candle .str2bool ,
52+ 'default' : False ,
53+ 'help' : 'generate tsne plot of the latent representation' }
6454]
6555
6656required = [
8070 'batch_normalization' ,
8171 'epsilon_std' ,
8272 'timeout'
83- ]
73+ ]
74+
8475
8576class BenchmarkAttn (candle .Benchmark ):
8677
@@ -102,7 +93,7 @@ def extension_from_parameters(params, framework=''):
10293 ext = framework
10394 for i , n in enumerate (params ['dense' ]):
10495 if n :
105- ext += '.D{}={}' .format (i + 1 , n )
96+ ext += '.D{}={}' .format (i + 1 , n )
10697 ext += '.A={}' .format (params ['activation' ][0 ])
10798 ext += '.B={}' .format (params ['batch_size' ])
10899 ext += '.E={}' .format (params ['epochs' ])
@@ -124,15 +115,17 @@ def extension_from_parameters(params, framework=''):
124115 ext += '.Res'
125116
126117 return ext
118+
119+
127120def load_data (params , seed ):
128121
129122 # start change #
130123 if params ['train_data' ].endswith ('h5' ) or params ['train_data' ].endswith ('hdf5' ):
131- print ('processing h5 in file {}' .format (params ['train_data' ]))
124+ print ('processing h5 in file {}' .format (params ['train_data' ]))
132125
133126 url = params ['data_url' ]
134127 file_train = params ['train_data' ]
135- train_file = candle .get_file (file_train , url + file_train , cache_subdir = 'Pilot1' )
128+ train_file = candle .get_file (file_train , url + file_train , cache_subdir = 'Pilot1' )
136129
137130 df_x_train_0 = pd .read_hdf (train_file , 'x_train_0' ).astype (np .float32 )
138131 df_x_train_1 = pd .read_hdf (train_file , 'x_train_1' ).astype (np .float32 )
@@ -152,7 +145,7 @@ def load_data(params, seed):
152145 Y_train = pd .read_hdf (train_file , 'y_train' )
153146 Y_test = pd .read_hdf (train_file , 'y_test' )
154147 Y_val = pd .read_hdf (train_file , 'y_val' )
155-
148+
156149 # assumes AUC is in the third column at index 2
157150 # df_y = df['AUC'].astype('int')
158151 # df_x = df.iloc[:,3:].astype(np.float32)
@@ -161,18 +154,17 @@ def load_data(params, seed):
161154 # scaler = StandardScaler()
162155 # df_x = scaler.fit_transform(df_x)
163156 else :
164- print ('expecting in file file suffix h5' )
157+ print ('expecting in file file suffix h5' )
165158 sys .exit ()
166-
167-
159+
168160 print ('x_train shape:' , X_train .shape )
169161 print ('x_test shape:' , X_test .shape )
170-
162+
171163 return X_train , Y_train , X_val , Y_val , X_test , Y_test
172164
173165 # start change #
174166 if train_file .endswith ('h5' ) or train_file .endswith ('hdf5' ):
175- print ('processing h5 in file {}' .format (train_file ))
167+ print ('processing h5 in file {}' .format (train_file ))
176168
177169 df_x_train_0 = pd .read_hdf (train_file , 'x_train_0' ).astype (np .float32 )
178170 df_x_train_1 = pd .read_hdf (train_file , 'x_train_1' ).astype (np .float32 )
@@ -192,23 +184,19 @@ def load_data(params, seed):
192184 Y_train = pd .read_hdf (train_file , 'y_train' )
193185 Y_test = pd .read_hdf (train_file , 'y_test' )
194186 Y_val = pd .read_hdf (train_file , 'y_val' )
195-
187+
196188 # assumes AUC is in the third column at index 2
197189 # df_y = df['AUC'].astype('int')
198190 # df_x = df.iloc[:,3:].astype(np.float32)
199191
200192 # assumes dataframe has already been scaled
201193 # scaler = StandardScaler()
202194 # df_x = scaler.fit_transform(df_x)
203-
204195 else :
205- print ('expecting in file file suffix h5' )
196+ print ('expecting in file file suffix h5' )
206197 sys .exit ()
207-
208-
198+
209199 print ('x_train shape:' , X_train .shape )
210200 print ('x_test shape:' , X_test .shape )
211-
212- return X_train , Y_train , X_val , Y_val , X_test , Y_test
213-
214201
202+ return X_train , Y_train , X_val , Y_val , X_test , Y_test
0 commit comments