Skip to content

Commit 8b0b27c

Browse files
committed
Merge remote-tracking branch 'origin/release_05'
2 parents a48c85a + ff812ec commit 8b0b27c

File tree

129 files changed

+31745
-626
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+31745
-626
lines changed

Pilot1/Attn/attn.py

-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
import numpy as np
99

1010
file_path = os.path.dirname(os.path.realpath(__file__))
11-
lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
12-
sys.path.append(lib_path2)
1311

1412
import candle
1513

Pilot1/Attn/attn_abstention_keras2.py

+12-38
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import tensorflow as tf
99

1010
from tensorflow.keras import backend as K
11-
from tensorflow.keras.models import model_from_json, model_from_yaml
11+
from tensorflow.keras.models import model_from_json
1212
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping, TensorBoard
1313

1414
from sklearn.utils.class_weight import compute_class_weight
@@ -214,7 +214,7 @@ def run(params):
214214

215215
# Try class weight and abstention classifier
216216
y_integers = np.argmax(Y_train, axis=1)
217-
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
217+
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_integers), y=y_integers)
218218
d_class_weights = dict(enumerate(class_weights))
219219

220220
print('X_train shape:', X_train.shape)
@@ -444,12 +444,6 @@ def save_and_test_saved_model(params, model, root_fname, nb_classes, alpha, mask
444444
with open(params['save_path'] + root_fname + '.model.json', "w") as json_file:
445445
json_file.write(model_json)
446446

447-
# serialize model to YAML
448-
model_yaml = model.to_yaml()
449-
with open(params['save_path'] + root_fname + '.model.yaml', "w") as yaml_file:
450-
451-
yaml_file.write(model_yaml)
452-
453447
# serialize weights to HDF5
454448
model.save_weights(params['save_path'] + root_fname + '.model.h5')
455449
print("Saved model to disk")
@@ -460,18 +454,8 @@ def save_and_test_saved_model(params, model, root_fname, nb_classes, alpha, mask
460454
json_file.close()
461455
loaded_model_json = model_from_json(loaded_model_json)
462456

463-
# load yaml and create model
464-
yaml_file = open(params['save_path'] + root_fname + '.model.yaml', 'r')
465-
loaded_model_yaml = yaml_file.read()
466-
yaml_file.close()
467-
loaded_model_yaml = model_from_yaml(loaded_model_yaml)
468-
# yaml.load(input, Loader=yaml.FullLoader)
469-
470457
# load weights into new model
471458
loaded_model_json.load_weights(params['save_path'] + root_fname + '.model.h5')
472-
# input = params['save_path'] + root_fname + '.model.h5'
473-
# loaded_model_json.load(input, Loader=yaml.FullLoader)
474-
# print("Loaded json model from disk")
475459

476460
# evaluate json loaded model on test data
477461
loaded_model_json.compile(loss=candle.abstention_loss(alpha, mask), optimizer='SGD', metrics=[candle.abstention_acc_metric(nb_classes)])
@@ -480,27 +464,17 @@ def save_and_test_saved_model(params, model, root_fname, nb_classes, alpha, mask
480464
print('json Validation abstention accuracy:', score_json[1])
481465
print("json %s: %.2f%%" % (loaded_model_json.metrics_names[1], score_json[1] * 100))
482466

483-
# load weights into new model
484-
loaded_model_yaml.load_weights(params['save_path'] + root_fname + '.model.h5')
485-
print("Loaded yaml model from disk")
486-
# evaluate yaml loaded model on test data
487-
loaded_model_yaml.compile(loss=candle.abstention_loss(alpha, mask), optimizer='SGD', metrics=[candle.abstention_acc_metric(nb_classes)])
488-
score_yaml = loaded_model_yaml.evaluate(X_test, Y_test, verbose=0)
489-
print('yaml Validation abstention loss:', score_yaml[0])
490-
print('yaml Validation abstention accuracy:', score_yaml[1])
491-
print("yaml %s: %.2f%%" % (loaded_model_yaml.metrics_names[1], score_yaml[1] * 100))
492-
493467
# predict using loaded yaml model on test and training data
494-
predict_yaml_train = loaded_model_yaml.predict(X_train)
495-
predict_yaml_test = loaded_model_yaml.predict(X_test)
496-
print('Yaml_train_shape:', predict_yaml_train.shape)
497-
print('Yaml_test_shape:', predict_yaml_test.shape)
498-
predict_yaml_train_classes = np.argmax(predict_yaml_train, axis=1)
499-
predict_yaml_test_classes = np.argmax(predict_yaml_test, axis=1)
500-
np.savetxt(params['save_path'] + root_fname + '_predict_yaml_train.csv', predict_yaml_train, delimiter=",", fmt="%.3f")
501-
np.savetxt(params['save_path'] + root_fname + '_predict_yaml_test.csv', predict_yaml_test, delimiter=",", fmt="%.3f")
502-
np.savetxt(params['save_path'] + root_fname + '_predict_yaml_train_classes.csv', predict_yaml_train_classes, delimiter=",", fmt="%d")
503-
np.savetxt(params['save_path'] + root_fname + '_predict_yaml_test_classes.csv', predict_yaml_test_classes, delimiter=",", fmt="%d")
468+
predict_train = loaded_model_json.predict(X_train)
469+
predict_test = loaded_model_json.predict(X_test)
470+
print('train_shape:', predict_train.shape)
471+
print('test_shape:', predict_test.shape)
472+
predict_train_classes = np.argmax(predict_train, axis=1)
473+
predict_test_classes = np.argmax(predict_test, axis=1)
474+
np.savetxt(params['save_path'] + root_fname + '_predict_train.csv', predict_train, delimiter=",", fmt="%.3f")
475+
np.savetxt(params['save_path'] + root_fname + '_predict_test.csv', predict_test, delimiter=",", fmt="%.3f")
476+
np.savetxt(params['save_path'] + root_fname + '_predict_train_classes.csv', predict_train_classes, delimiter=",", fmt="%d")
477+
np.savetxt(params['save_path'] + root_fname + '_predict_test_classes.csv', predict_test_classes, delimiter=",", fmt="%d")
504478

505479

506480
def main():

Pilot1/Attn/attn_baseline_keras2.py

+15-37
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from tensorflow.keras import backend as K
1111

1212
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
13-
from tensorflow.keras.models import Model, model_from_json, model_from_yaml
13+
from tensorflow.keras.models import Model, model_from_json
1414
from tensorflow.keras.utils import to_categorical
1515

1616
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping, TensorBoard
@@ -197,7 +197,7 @@ def run(params):
197197
Y_val = to_categorical(Y_val, nb_classes)
198198

199199
y_integers = np.argmax(Y_train, axis=1)
200-
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
200+
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_integers), y=y_integers)
201201
d_class_weights = dict(enumerate(class_weights))
202202

203203
print('X_train shape:', X_train.shape)
@@ -363,11 +363,6 @@ def save_and_test_saved_model(params, model, root_fname, X_train, X_test, Y_test
363363
with open(params['save_path'] + root_fname + ".model.json", "w") as json_file:
364364
json_file.write(model_json)
365365

366-
# serialize model to YAML
367-
model_yaml = model.to_yaml()
368-
with open(params['save_path'] + root_fname + ".model.yaml", "w") as yaml_file:
369-
yaml_file.write(model_yaml)
370-
371366
# serialize weights to HDF5
372367
model.save_weights(params['save_path'] + root_fname + ".model.h5")
373368
print("Saved model to disk")
@@ -378,12 +373,6 @@ def save_and_test_saved_model(params, model, root_fname, X_train, X_test, Y_test
378373
json_file.close()
379374
loaded_model_json = model_from_json(loaded_model_json)
380375

381-
# load yaml and create model
382-
yaml_file = open(params['save_path'] + root_fname + '.model.yaml', 'r')
383-
loaded_model_yaml = yaml_file.read()
384-
yaml_file.close()
385-
loaded_model_yaml = model_from_yaml(loaded_model_yaml)
386-
387376
# load weights into new model
388377
loaded_model_json.load_weights(params['save_path'] + root_fname + ".model.h5")
389378
print("Loaded json model from disk")
@@ -397,30 +386,19 @@ def save_and_test_saved_model(params, model, root_fname, X_train, X_test, Y_test
397386

398387
print("json %s: %.2f%%" % (loaded_model_json.metrics_names[1], score_json[1] * 100))
399388

400-
# load weights into new model
401-
loaded_model_yaml.load_weights(params['save_path'] + root_fname + ".model.h5")
402-
print("Loaded yaml model from disk")
403-
404-
# evaluate loaded model on test data
405-
loaded_model_yaml.compile(loss='binary_crossentropy', optimizer=params['optimizer'], metrics=['accuracy'])
406-
score_yaml = loaded_model_yaml.evaluate(X_test, Y_test, verbose=0)
407-
print('yaml Validation loss:', score_yaml[0])
408-
print('yaml Validation accuracy:', score_yaml[1])
409-
print("yaml %s: %.2f%%" % (loaded_model_yaml.metrics_names[1], score_yaml[1] * 100))
410-
411-
# predict using loaded yaml model on test and training data
412-
predict_yaml_train = loaded_model_yaml.predict(X_train)
413-
predict_yaml_test = loaded_model_yaml.predict(X_test)
414-
print('Yaml_train_shape:', predict_yaml_train.shape)
415-
print('Yaml_test_shape:', predict_yaml_test.shape)
416-
417-
predict_yaml_train_classes = np.argmax(predict_yaml_train, axis=1)
418-
predict_yaml_test_classes = np.argmax(predict_yaml_test, axis=1)
419-
np.savetxt(params['save_path'] + root_fname + "_predict_yaml_train.csv", predict_yaml_train, delimiter=",", fmt="%.3f")
420-
np.savetxt(params['save_path'] + root_fname + "_predict_yaml_test.csv", predict_yaml_test, delimiter=",", fmt="%.3f")
421-
422-
np.savetxt(params['save_path'] + root_fname + "_predict_yaml_train_classes.csv", predict_yaml_train_classes, delimiter=",", fmt="%d")
423-
np.savetxt(params['save_path'] + root_fname + "_predict_yaml_test_classes.csv", predict_yaml_test_classes, delimiter=",", fmt="%d")
389+
# predict using loaded model on test and training data
390+
predict_train = loaded_model_json.predict(X_train)
391+
predict_test = loaded_model_json.predict(X_test)
392+
print('train_shape:', predict_train.shape)
393+
print('test_shape:', predict_test.shape)
394+
395+
predict_train_classes = np.argmax(predict_train, axis=1)
396+
predict_test_classes = np.argmax(predict_test, axis=1)
397+
np.savetxt(params['save_path'] + root_fname + "_predict_train.csv", predict_train, delimiter=",", fmt="%.3f")
398+
np.savetxt(params['save_path'] + root_fname + "_predict_test.csv", predict_test, delimiter=",", fmt="%.3f")
399+
400+
np.savetxt(params['save_path'] + root_fname + "_predict_train_classes.csv", predict_train_classes, delimiter=",", fmt="%d")
401+
np.savetxt(params['save_path'] + root_fname + "_predict_test_classes.csv", predict_test_classes, delimiter=",", fmt="%d")
424402

425403

426404
def main():

Pilot1/Attn/test.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
set -x
12
python attn_baseline_keras2.py
2-
# python attn_abstention_keras2.py --epochs 1
3+
python attn_abstention_keras2.py --epochs 1
34
python attn_bin_working_jan7_h5.py --in ../../Data/Pilot1/top_21_1fold_001.h5 --ep 1 --save_dir "./save"

Pilot1/Combo/NCI60.py

+10-13
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import collections
44
import os
5-
import sys
65

76
import numpy as np
87
import pandas as pd
@@ -14,8 +13,6 @@
1413
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
1514

1615
file_path = os.path.dirname(os.path.realpath(__file__))
17-
lib_path = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
18-
sys.path.append(lib_path)
1916

2017
import candle
2118

@@ -286,7 +283,7 @@ def load_drug_set_descriptors(drug_set='ALMANAC', ncols=None, scaling='std', add
286283
# df1['NAME'] = df1['NAME'].map(lambda x: x[4:])
287284
df1.rename(columns={'NAME': 'Drug'}, inplace=True)
288285

289-
df2 = df.drop('NAME', 1)
286+
df2 = df.drop('NAME', axis=1)
290287
if add_prefix:
291288
df2 = df2.add_prefix('dragon7.')
292289

@@ -336,7 +333,7 @@ def load_drug_descriptors_new(ncols=None, scaling='std', add_prefix=True):
336333
# df1['NAME'] = df1['NAME'].map(lambda x: x[4:])
337334
df1.rename(columns={'NAME': 'Drug'}, inplace=True)
338335

339-
df2 = df.drop('NAME', 1)
336+
df2 = df.drop('NAME', axis=1)
340337
if add_prefix:
341338
df2 = df2.add_prefix('dragon7.')
342339

@@ -383,7 +380,7 @@ def load_drug_descriptors(ncols=None, scaling='std', add_prefix=True):
383380
df1['NAME'] = df1['NAME'].map(lambda x: x[4:])
384381
df1.rename(columns={'NAME': 'NSC'}, inplace=True)
385382

386-
df2 = df.drop('NAME', 1)
383+
df2 = df.drop('NAME', axis=1)
387384
if add_prefix:
388385
df2 = df2.add_prefix('dragon7.')
389386

@@ -427,7 +424,7 @@ def load_drug_descriptors_old(ncols=None, scaling='std', add_prefix=True):
427424
df1 = pd.DataFrame(df.loc[:, 'NAME'].astype(int).astype(str))
428425
df1.rename(columns={'NAME': 'NSC'}, inplace=True)
429426

430-
df2 = df.drop('NAME', 1)
427+
df2 = df.drop('NAME', axis=1)
431428
if add_prefix:
432429
df2 = df2.add_prefix('dragon7.')
433430

@@ -489,7 +486,7 @@ def load_sample_rnaseq(ncols=None, scaling='std', add_prefix=True, use_landmark_
489486

490487
df1 = df['Sample']
491488

492-
df2 = df.drop('Sample', 1)
489+
df2 = df.drop('Sample', axis=1)
493490
if add_prefix:
494491
df2 = df2.add_prefix('rnaseq.')
495492

@@ -541,7 +538,7 @@ def load_cell_expression_rnaseq(ncols=None, scaling='std', add_prefix=True, use_
541538
df1 = df['CELLNAME']
542539
df1 = df1.map(lambda x: x.replace(':', '.'))
543540

544-
df2 = df.drop('CELLNAME', 1)
541+
df2 = df.drop('CELLNAME', axis=1)
545542
if add_prefix:
546543
df2 = df2.add_prefix('rnaseq.')
547544

@@ -589,7 +586,7 @@ def load_cell_expression_u133p2(ncols=None, scaling='std', add_prefix=True, use_
589586
df1 = df['CELLNAME']
590587
df1 = df1.map(lambda x: x.replace(':', '.'))
591588

592-
df2 = df.drop('CELLNAME', 1)
589+
df2 = df.drop('CELLNAME', axis=1)
593590
if add_prefix:
594591
df2 = df2.add_prefix('expr.')
595592

@@ -639,7 +636,7 @@ def load_cell_expression_5platform(ncols=None, scaling='std', add_prefix=True, u
639636
df1 = df['CellLine']
640637
df1.name = 'CELLNAME'
641638

642-
df2 = df.drop('CellLine', 1)
639+
df2 = df.drop('CellLine', axis=1)
643640
if add_prefix:
644641
df2 = df2.add_prefix('expr_5p.')
645642

@@ -680,7 +677,7 @@ def load_cell_mirna(ncols=None, scaling='std', add_prefix=True):
680677
df1 = df['CellLine']
681678
df1.name = 'CELLNAME'
682679

683-
df2 = df.drop('CellLine', 1)
680+
df2 = df.drop('CellLine', axis=1)
684681
if add_prefix:
685682
df2 = df2.add_prefix('mRNA.')
686683

@@ -775,7 +772,7 @@ def load_drug_autoencoded_AG(ncols=None, scaling='std', add_prefix=True):
775772
global_cache[path] = df
776773

777774
df1 = pd.DataFrame(df.loc[:, 'NSC'].astype(int).astype(str))
778-
df2 = df.drop('NSC', 1)
775+
df2 = df.drop('NSC', axis=1)
779776
if add_prefix:
780777
df2 = df2.add_prefix('smiles_latent_AG.')
781778

Pilot1/Combo/combo.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
from __future__ import print_function
22

33
import os
4-
import sys
54
import logging
65

76
file_path = os.path.dirname(os.path.realpath(__file__))
8-
lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
9-
sys.path.append(lib_path2)
107

118
import candle
129

Pilot1/Combo/combo_baseline_keras2.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import collections
66
import logging
7+
import sys
78
import os
89
import threading
910

@@ -46,7 +47,7 @@ def set_up_logger(logfile, verbose):
4647
fh.setFormatter(logging.Formatter("[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
4748
fh.setLevel(logging.DEBUG)
4849

49-
sh = logging.StreamHandler()
50+
sh = logging.StreamHandler(sys.stdout)
5051
sh.setFormatter(logging.Formatter(''))
5152
sh.setLevel(logging.DEBUG if verbose else logging.INFO)
5253

Pilot1/Combo/combo_dose.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import collections
77
import logging
88
import os
9+
import sys
910
import threading
1011

1112
import numpy as np
@@ -49,7 +50,7 @@ def set_up_logger(logfile, verbose):
4950
fh.setFormatter(logging.Formatter("[%(asctime)s %(process)d] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
5051
fh.setLevel(logging.DEBUG)
5152

52-
sh = logging.StreamHandler()
53+
sh = logging.StreamHandler(sys.stdout)
5354
sh.setFormatter(logging.Formatter(''))
5455
sh.setLevel(logging.DEBUG if verbose else logging.INFO)
5556

Pilot1/Combo/test.sh

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#!/bin/bash
22

3+
set -x
4+
35
python combo_baseline_keras2.py --use_landmark_genes True --warmup_lr True --reduce_lr True -z 256 --epochs 4
4-
python infer.py --sample_set NCIPDM --drug_set ALMANAC --use_landmark_genes -m ./save/combo.A=relu.B=256.E=10.O=adam.LR=None.CF=e.DF=d.wu_lr.re_lr.L1000.D1=1000.D2=1000.D3=1000.model.h5 -w ./save/combo.A=relu.B=256.E=10.O=adam.LR=None.CF=e.DF=d.wu_lr.re_lr.L1000.D1=1000.D2=1000.D3=1000.weights.h5 --epochs 4
6+
python infer.py --sample_set NCIPDM --drug_set ALMANAC --use_landmark_genes -m ./save/combo.A=relu.B=256.E=4.O=adam.LR=None.CF=e.DF=d.wu_lr.re_lr.L1000.D1=1000.D2=1000.D3=1000.model.h5 -w ./save/combo.A=relu.B=256.E=4.O=adam.LR=None.CF=e.DF=d.wu_lr.re_lr.L1000.D1=1000.D2=1000.D3=1000.weights.h5
57

68
# Need to revisit combo_dose.py and infer_dose.py
79
# python combo_dose.py --use_landmark_genes True -z 256

Pilot1/NT3/abstain_functions.py

+3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
'nargs': '+',
4646
'type': int,
4747
'help': 'list of names corresponding to each task to use'},
48+
{'name': 'cf_noise',
49+
'type': str,
50+
'help': 'input file with cf noise'}
4851
]
4952

5053

0 commit comments

Comments
 (0)