From b4c2f0a157f643e8fd7deab6c5762f3eedb1bde5 Mon Sep 17 00:00:00 2001 From: Adam Yala Date: Wed, 25 Nov 2020 15:09:59 -0500 Subject: [PATCH 1/8] update set args --- configs/mirai_base.json | 6 ------ configs/mirai_full.json | 16 ---------------- demo/finetune_end2end.sh | 0 onconet/datasets/csv_mammo_cancer.py | 5 ++++- 4 files changed, 4 insertions(+), 23 deletions(-) create mode 100644 demo/finetune_end2end.sh diff --git a/configs/mirai_base.json b/configs/mirai_base.json index 66bf0e4..80b58f5 100644 --- a/configs/mirai_base.json +++ b/configs/mirai_base.json @@ -4,8 +4,6 @@ "batch_splits": [2], "max_followup":[5], "cuda": [true], - "predict_birads":[false], - "use_c_view_if_available": [false], "dataset": ["mgh_mammo_risk_full_future"], "pred_risk_factors": [true], "use_pred_risk_factors_at_test": [false], @@ -15,12 +13,8 @@ "weight_decay": [5e-5], "momentum": [0.9], "epochs": [15], - "mask_prob": [0, 0.20], "lr_decay": [0.1], "img_dir": ["/home/administrator/Mounts/pngs16"], - "train_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], - "dev_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], - "test_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], "img_mean": [7047.99], "img_size": [[1664, 2048]], "img_std": [12005.5], diff --git a/configs/mirai_full.json b/configs/mirai_full.json index 955a955..43dd565 100644 --- a/configs/mirai_full.json +++ b/configs/mirai_full.json @@ -4,19 +4,13 @@ "cuda": [true], "predict_birads":[false], "dataset": ["mgh_mammo_risk_full_future_all_images_both_sides"], - "pred_both_sides":[false], "pred_risk_factors": [true], "use_pred_risk_factors_at_test": [true], "survival_analysis_setup": [true], - "use_c_view_if_available": [false], "num_images": [4], "min_num_images":[4], "batch_size": [64], "pred_risk_factors_lambda": [0.2, 0.5], - "pred_missing_mammos":[false], - "also_pred_given_mammos": [false], - "mask_prob": [0, 0.2], - "pred_missing_mammos_lambda":[0], "cluster_exams": [true], "weight_decay": [5e-05], "momentum": [0.9], @@ -26,9 +20,6 @@ "transfomer_hidden_dim": [512, 1024], "num_heads": [8,16], "img_dir": ["/home/administrator/Mounts/pngs16"], - "train_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], - "dev_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], - "test_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], "img_mean": [7047.99], "img_size": [[1664, 2048]], "img_std": [12005.5], @@ -38,33 +29,26 @@ "use_precomputed_hiddens": [true], "hiddens_results_path": ["/Mounts/Isilon/precomputed_hiddens/hiddens_mirai_base_jan08.p"], "pool_name": ["GlobalMaxPool", "Simple_AttentionPool", "GlobalAvgPool"], - "replace_bn_with_gn":[false], "num_chan": [3], "tuning_metric": ["c_index"], "num_workers": [12], "objective": ["cross_entropy"], "optimizer": ["adam"], "patience": [10], - "use_region_annotation": [false], "use_adv": [true], - "use_mmd_adv":[false], - "use_temporal_mmd":[false], "adv_loss_lambda": [1], "num_adv_steps": [3], "train_adv_seperate":[true], "max_batches_per_train_epoch": [1500], "max_batches_per_dev_epoch": [15000], - "mask_mechanism": ["default", "slice", "indep", "linear"], "run_prefix": ["snapshot"], "save_dir": ["snapshot/"], "train": [true], "test": [true], "class_bal": [true], - "year_weighted_class_bal": [false], "resume": [false], "image_transformers": ["scale_2d align_to_left rand_ver_flip rotate_range/min=-20/max=20"], "tensor_transformers": ["force_num_chan_2d normalize_2d"], - "shift_class_bal_towards_imediate_cancers": [false], "test_image_transformers": [["scale_2d align_to_left"]], "test_tensor_transformers": [["force_num_chan_2d normalize_2d"]], "ignore_warnings": [false], diff --git a/demo/finetune_end2end.sh b/demo/finetune_end2end.sh new file mode 100644 index 0000000..e69de29 diff --git a/onconet/datasets/csv_mammo_cancer.py b/onconet/datasets/csv_mammo_cancer.py index a6f6e7d..906bc59 100644 --- a/onconet/datasets/csv_mammo_cancer.py +++ b/onconet/datasets/csv_mammo_cancer.py @@ -28,7 +28,7 @@ def create_dataset(self, split_group, img_dir): :split_group: - ['train'|'dev'|'test']. :img_dir: - The path to the dir containing the images. """ - + dict_dataset = defaultdict(dict) for _row in self.metadata_json: @@ -151,8 +151,11 @@ def set_args(args): args.num_images = 4 args.multi_image = True args.min_num_images = 4 + args.class_bal = True args.test_image_transformers = ["scale_2d", "align_to_left"] args.test_tensor_transformers = ["force_num_chan_2d", "normalize_2d"] + args.image_transformers = ["scale_2d", "align_to_left", "rand_ver_flip", "rotate_range/min=-20/max=20"] + args.tensor_transformers = ["force_num_chan_2d", "normalize_2d"] @property def task(self): From 4315eff4a69bcd80daa0845bd3fa8cf2fbfcc146 Mon Sep 17 00:00:00 2001 From: Adam Yala Date: Wed, 25 Nov 2020 15:16:37 -0500 Subject: [PATCH 2/8] debug rf pool --- onconet/models/pools/risk_factor_pool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onconet/models/pools/risk_factor_pool.py b/onconet/models/pools/risk_factor_pool.py index c169f2c..87ebdd4 100644 --- a/onconet/models/pools/risk_factor_pool.py +++ b/onconet/models/pools/risk_factor_pool.py @@ -54,7 +54,7 @@ def forward(self, x, risk_factors): if not self.training and self.args.use_pred_risk_factors_if_unk: is_rf_known = (torch.sum(gold_rf, dim=-1) > 0).unsqueeze(-1).float() key_probs = (is_rf_known * gold_rf) + (1 - is_rf_known)*key_probs - elif self.training and self.args.mask_prob > 0: + elif self.training and self.args.mask_prob > 0 and gold_rf is not None: is_rf_known = np.random.random() > self.args.mask_prob key_probs = (is_rf_known * gold_rf) + (1 - is_rf_known) * key_probs From 7c68d957559d0a30b477280ab0592dea3d3e9946 Mon Sep 17 00:00:00 2001 From: Adam Yala Date: Wed, 25 Nov 2020 15:17:07 -0500 Subject: [PATCH 3/8] update finetune_end2end script --- demo/finetune_end2end.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/demo/finetune_end2end.sh b/demo/finetune_end2end.sh index e69de29..65d7411 100644 --- a/demo/finetune_end2end.sh +++ b/demo/finetune_end2end.sh @@ -0,0 +1 @@ +python scripts/main.py --cuda --model_name mirai_full --img_encoder_snapshot snapshots/mgh_mammo_MIRAI_Base_May20_2019.p --transformer_snapshot snapshots/mgh_mammo_cancer_MIRAI_Transformer_Jan13_2020.p --callibrator_snapshot snapshots/callibrators/MIRAI_FULL_PRED_RF.callibrator.p --batch_size 1 --dataset csv_mammo_risk_all_full_future --img_mean 7047.99 --img_size 1664 2048 --img_std 12005.5 --metadata_path demo/sample_metadata.csv --train --epochs 1 --init_lr 1e-6 \ No newline at end of file From 6cbd4eb49cbc16c7361fc8ce076112c9d433e5d9 Mon Sep 17 00:00:00 2001 From: Adam Yala Date: Wed, 25 Nov 2020 15:20:22 -0500 Subject: [PATCH 4/8] add support to fine tune only image aggregator --- demo/finetune_image_aggregator.sh | 1 + onconet/models/mirai_full.py | 4 ++++ onconet/utils/parsing.py | 1 + 3 files changed, 6 insertions(+) diff --git a/demo/finetune_image_aggregator.sh b/demo/finetune_image_aggregator.sh index e69de29..1ffae3f 100644 --- a/demo/finetune_image_aggregator.sh +++ b/demo/finetune_image_aggregator.sh @@ -0,0 +1 @@ +python scripts/main.py --cuda --model_name mirai_full --img_encoder_snapshot snapshots/mgh_mammo_MIRAI_Base_May20_2019.p --transformer_snapshot snapshots/mgh_mammo_cancer_MIRAI_Transformer_Jan13_2020.p --callibrator_snapshot snapshots/callibrators/MIRAI_FULL_PRED_RF.callibrator.p --batch_size 1 --dataset csv_mammo_risk_all_full_future --img_mean 7047.99 --img_size 1664 2048 --img_std 12005.5 --metadata_path demo/sample_metadata.csv --train --epochs 1 --freeze_image_encoder --init_lr 1e-6 \ No newline at end of file diff --git a/onconet/models/mirai_full.py b/onconet/models/mirai_full.py index 9c9cdce..b79a724 100644 --- a/onconet/models/mirai_full.py +++ b/onconet/models/mirai_full.py @@ -16,6 +16,10 @@ def __init__(self, args): else: self.image_encoder = get_model_by_name('custom_resnet', False, args) + if self.args.freeze_image_encoder: + for param in self.image_encoder.parameters(): + param.requires_grad = False + self.image_repr_dim = self.image_encoder._model.args.img_only_dim if args.transformer_snapshot is not None: self.transformer = load_model(args.transformer_snapshot, args, do_wrap_model=False) diff --git a/onconet/utils/parsing.py b/onconet/utils/parsing.py index 5ca8e9f..42d2ddd 100644 --- a/onconet/utils/parsing.py +++ b/onconet/utils/parsing.py @@ -313,6 +313,7 @@ def parse_args(): parser.add_argument('--snapshot', type=str, default=None, help='filename of model snapshot to load[default: None]') parser.add_argument('--state_dict_path', type=str, default=None, help='filename of model snapshot to load[default: None]') parser.add_argument('--img_encoder_snapshot', type=str, default=None, help='filename of img_feat_extractor model snapshot to load. Only used for mirai_full type models [default: None]') + parser.add_argument('--freeze_image_encoder', action='store_true', default=False, help='Whether freeze image_encoder.') # parser.add_argument('--transformer_snapshot', type=str, default=None, help='filename of transformer model snapshot to load. Only used for mirai_full type models [default: None]') parser.add_argument('--callibrator_snapshot', type=str, default=None, help='filename of callibrator. Produced for a single model on development set using Platt Scaling') parser.add_argument('--patch_snapshot', type=str, default=None, help='filename of patch model snapshot to load. Only used for aggregator type models [default: None]') From 0ee7e25c0fc69f377d7e30d79afaf892d44c2251 Mon Sep 17 00:00:00 2001 From: Adam Yala Date: Wed, 25 Nov 2020 15:56:04 -0500 Subject: [PATCH 5/8] remove twilio depedency --- requirements.txt | 1 - scripts/dispatcher.py | 43 +++++-------------------------------------- 2 files changed, 5 insertions(+), 39 deletions(-) diff --git a/requirements.txt b/requirements.txt index 31ea870..9997ec3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,6 @@ torchvision==0.2.0 sklearn scipy numpy -twilio Pillow tqdm mock diff --git a/scripts/dispatcher.py b/scripts/dispatcher.py index 44a0581..8f5dc2d 100644 --- a/scripts/dispatcher.py +++ b/scripts/dispatcher.py @@ -4,7 +4,6 @@ import multiprocessing import pickle import csv -from twilio.rest import Client import json import sys from os.path import dirname, realpath @@ -31,31 +30,13 @@ parser = argparse.ArgumentParser(description='OncoNet Grid Search Dispatcher. For use information, see `doc/README.md`') parser.add_argument("--experiment_config_path", required=True, type=str, help="Path of experiment config") -parser.add_argument("--alert_config_path", type=str, default='configs/alert_config.json', help="Path of alert config") parser.add_argument('--log_dir', type=str, default="logs", help="path to store logs and detailed job level result files") parser.add_argument('--result_path', type=str, default="results/grid_search.csv", help="path to store grid_search table. This is preferably on shared storage") parser.add_argument('--rerun_experiments', action='store_true', default=False, help='whether to rerun experiments with the same result file location') parser.add_argument('--shuffle_experiment_order', action='store_true', default=False, help='whether to shuffle order of experiments') -def send_text_msg(msg, alert_config, twilio_config): - ''' - Send a text message using twilio acct specified twilio conf to numbers - specified in alert_conf. - If suppress_alerts is turned on, do nothing - :msg: - body of text message - :alert_config: - dictionary with a list fo numbers to send message to - :twilio-config: - dictionary with twilio SID, TOKEN, and phone number - ''' - if alert_config['suppress_alerts']: - return - client = Client(twilio_config['ACCOUNT_SID'], twilio_config['AUTH_TOKEN']) - for number in [alert_config['alert_nums']]: - client.messages.create( - to=number, from_=twilio_config['twilio_num'], body=msg) - - -def launch_experiment(gpu, flag_string, alert_conf, twilio_conf): +def launch_experiment(gpu, flag_string): ''' Launch an experiment and direct logs and results to a unique filepath. Alert of something goes wrong. @@ -89,12 +70,12 @@ def launch_experiment(gpu, flag_string, alert_conf, twilio_conf): if not os.path.exists(results_path): # running this process failed, alert me job_fail_msg = EXPERIMENT_CRASH_MSG.format(experiment_string, log_path) - send_text_msg(job_fail_msg, alert_conf, twilio_conf) + print(job_fail_msg) return results_path, log_path -def worker(gpu, job_queue, done_queue, alert_config, twilio_config): +def worker(gpu, job_queue, done_queue): ''' Worker thread for each gpu. Consumes all jobs and pushes results to done_queue. :gpu - gpu this worker can access. @@ -106,7 +87,7 @@ def worker(gpu, job_queue, done_queue, alert_config, twilio_config): if params is None: return done_queue.put( - launch_experiment(gpu, params, alert_config, twilio_config)) + launch_experiment(gpu, params)) def update_sumary_with_results(result_path, log_path, experiment_axies, summary): assert result_path is not None @@ -181,19 +162,6 @@ def update_sumary_with_results(result_path, log_path, experiment_axies, summary print(RESULTS_PATH_APPEAR_ERR) sys.exit(1) - if not os.path.exists(args.alert_config_path): - print(CONFIG_NOT_FOUND_MSG.format("alert", args.alert_config_path)) - sys.exit(1) - alert_config = json.load(open(args.alert_config_path, 'r')) - - twilio_conf_path = alert_config['path_to_twilio_secret'] - if not os.path.exists(twilio_conf_path): - print(CONFIG_NOT_FOUND_MSG.format("twilio", twilio_conf_path)) - - twilio_config = None - if not alert_config['suppress_alerts']: - twilio_config = json.load(open(twilio_conf_path, 'r')) - job_list, experiment_axies = parsing.parse_dispatcher_config(experiment_config) if args.shuffle_experiment_order: random.shuffle(job_list) @@ -206,7 +174,7 @@ def update_sumary_with_results(result_path, log_path, experiment_axies, summary print() for gpu in experiment_config['available_gpus']: print("Start gpu worker {}".format(gpu)) - multiprocessing.Process(target=worker, args=(gpu, job_queue, done_queue, alert_config, twilio_config)).start() + multiprocessing.Process(target=worker, args=(gpu, job_queue, done_queue)).start() print() summary = [] @@ -219,4 +187,3 @@ def update_sumary_with_results(result_path, log_path, experiment_axies, summary summary = update_sumary_with_results(result_path, log_path, experiment_axies, summary) dump_result_string = SUCESSFUL_SEARCH_STR.format(args.result_path) print("({}/{}) \t {}".format(i+1, len(job_list), dump_result_string)) - send_text_msg(dump_result_string, alert_config, twilio_config) From f1ab32617b09ff30b7a1dac6673510a99f0901dd Mon Sep 17 00:00:00 2001 From: Adam Yala Date: Wed, 25 Nov 2020 16:06:54 -0500 Subject: [PATCH 6/8] add config for finetune search --- configs/eval_mirai_full.json | 65 ---------------------------------- configs/finetune_mirai.json | 25 +++++++++++++ configs/validate_mirai.json | 21 +++++++++++ demo/finetune_image_encoder.sh | 0 scripts/main.py | 8 +++-- 5 files changed, 52 insertions(+), 67 deletions(-) delete mode 100644 configs/eval_mirai_full.json create mode 100644 configs/finetune_mirai.json create mode 100644 configs/validate_mirai.json delete mode 100644 demo/finetune_image_encoder.sh diff --git a/configs/eval_mirai_full.json b/configs/eval_mirai_full.json deleted file mode 100644 index 0e7def7..0000000 --- a/configs/eval_mirai_full.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "search_space": { - "model_name":["mirai_full"], - "img_encoder_snapshot":["snapshots/mgh_mammo_MIRAI_Base_May20_2019.p"], - "transformer_snapshot":["snapshots/mgh_mammo_cancer_MIRAI_Transformer_Jan13_2020.p"], - "metadata_dir":["/data/rsg/mammogram/BMCS_data/"], - "batch_splits": [1], - "batch_size": [2], - "cuda": [true], - "predict_birads":[false], - "dataset": ["bmcs_all_full_future", "bmcs_ge_all_full_future", "bmcs_ge_essential_full_future", "bmcs_hologic_full_future"], - "pred_both_sides":[false], - "pred_risk_factors": [true], - "use_pred_risk_factors_at_test": [true], - "survival_analysis_setup": [true], - "num_images": [4], - "multi_image":[true], - "min_num_images":[4], - "batch_size": [4], - "pred_risk_factors_lambda": [0.1], - "pred_missing_mammos":[false], - "also_pred_given_mammos": [false], - "mask_prob": [0], - "pred_missing_mammos_lambda":[0], - "cluster_exams": [false], - "weight_decay": [5e-05], - "momentum": [0.9], - "epochs": [15], - "lr_decay": [0.1], - "train_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], - "dev_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], - "test_years":[[2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]], - "img_mean": [7047.99], - "img_size": [[1664, 2048]], - "img_std": [12005.5], - "init_lr": [1e-03], - "num_chan": [3], - "tuning_metric": ["c_index"], - "num_workers": [24], - "objective": ["cross_entropy"], - "optimizer": ["adam"], - "train": [false], - "dev":[false], - "test": [true], - "class_bal": [true], - "year_weighted_class_bal": [false], - "resume": [false], - "image_transformers": ["scale_2d align_to_left rand_ver_flip rotate_range/min=-20/max=20"], - "tensor_transformers": ["force_num_chan_2d normalize_2d"], - "shift_class_bal_towards_imediate_cancers": [false], - "test_image_transformers": [["scale_2d align_to_left"]], - "test_tensor_transformers": [["force_num_chan_2d normalize_2d"]], - "ignore_warnings": [false], - "data_parallel": [false], - "model_parallel": [false], - "num_gpus":[1], - "num_shards":[1], - "use_risk_factors": [false], - "dropout": [0], - "max_followup": [5], - "risk_factor_metadata_path": ["/Mounts/Isilon/metadata/risk_factors_aug06_2018_mammo_and_mri.json"], - "risk_factor_keys": ["density binary_family_history binary_biopsy_benign binary_biopsy_LCIS binary_biopsy_atypical_hyperplasia age menarche_age menopause_age first_pregnancy_age prior_hist race parous menopausal_status weight height ovarian_cancer ovarian_cancer_age ashkenazi brca mom_bc_cancer_history m_aunt_bc_cancer_history p_aunt_bc_cancer_history m_grandmother_bc_cancer_history p_grantmother_bc_cancer_history sister_bc_cancer_history mom_oc_cancer_history m_aunt_oc_cancer_history p_aunt_oc_cancer_history m_grandmother_oc_cancer_history p_grantmother_oc_cancer_history sister_oc_cancer_history hrt_type hrt_duration hrt_years_ago_stopped"] - }, - "available_gpus": [0,1,2,3] -} diff --git a/configs/finetune_mirai.json b/configs/finetune_mirai.json new file mode 100644 index 0000000..bcf5517 --- /dev/null +++ b/configs/finetune_mirai.json @@ -0,0 +1,25 @@ +{ + "search_space": { + "model_name":["mirai_full"], + "img_encoder_snapshot":["snapshots/mgh_mammo_MIRAI_Base_May20_2019.p"], + "transformer_snapshot":["snapshots/mgh_mammo_cancer_MIRAI_Transformer_Jan13_2020.p"], + "batch_size": [32], + "batch_splits": [16], + "cuda": [true], + "dataset": ["csv_mammo_risk_all_full_future"], + "metadata_path": ["demo/sample_metadata.csv"], + "img_mean": [7047.99], + "img_size": [[1664, 2048]], + "img_std": [12005.5], + "num_workers": [24], + "train": [true], + "dev":[false], + "test": [true], + "init_lr": [1e-6, 1e-5, 1e-4], + "epochs": [15], + "dropout": [0, 0.1], + "weight_decay": [5e-05], + "freeze_image_encoder": [true, false] + }, + "available_gpus": [0,1,2,3] +} diff --git a/configs/validate_mirai.json b/configs/validate_mirai.json new file mode 100644 index 0000000..f8a8d68 --- /dev/null +++ b/configs/validate_mirai.json @@ -0,0 +1,21 @@ +{ + "search_space": { + "model_name":["mirai_full"], + "img_encoder_snapshot":["snapshots/mgh_mammo_MIRAI_Base_May20_2019.p"], + "transformer_snapshot":["snapshots/mgh_mammo_cancer_MIRAI_Transformer_Jan13_2020.p"], + "callibrator_snapshot": ["snapshots/callibrators/MIRAI_FULL_PRED_RF.callibrator.p"], + "prediction_save_path": ["demo/validation_output.csv"], + "batch_size": [1], + "cuda": [true], + "dataset": ["csv_mammo_risk_all_full_future"], + "metadata_path": ["demo/sample_metadata.csv"], + "img_mean": [7047.99], + "img_size": [[1664, 2048]], + "img_std": [12005.5], + "num_workers": [24], + "train": [false], + "dev":[false], + "test": [true] + }, + "available_gpus": [0,1,2,3] +} diff --git a/demo/finetune_image_encoder.sh b/demo/finetune_image_encoder.sh deleted file mode 100644 index e69de29..0000000 diff --git a/scripts/main.py b/scripts/main.py index d2cf418..d2e082a 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -120,7 +120,8 @@ exams.extend( args.test_stats['exams']) probs.extend( args.test_stats['probs']) legend = ['patient_exam_id'] - callibrator = pickle.load(open(args.callibrator_snapshot,'rb')) + if args.callibrator_snapshot is not None: + callibrator = pickle.load(open(args.callibrator_snapshot,'rb')) for i in range(args.max_followup): legend.append("{}_year_risk".format(i+1)) export = {} @@ -132,7 +133,10 @@ for i in range(args.max_followup): key = "{}_year_risk".format(i+1) raw_val = arr[i] - val = callibrator[i].predict_proba([[raw_val]])[0,1] + if args.callibrator_snapshot is not None: + val = callibrator[i].predict_proba([[raw_val]])[0,1] + else: + val = raw_val export[key] = val writer.writerow(export) print("Exported predictions to {}".format(args.prediction_save_path)) From 76586af1311bf7c894d7d6c3999b3951017cf44a Mon Sep 17 00:00:00 2001 From: Adam Yala Date: Wed, 25 Nov 2020 16:18:40 -0500 Subject: [PATCH 7/8] add finetune docs --- README.md | 22 ++++++++++++++++++++-- configs/finetune_mirai.json | 2 +- demo/finetune_end2end.sh | 2 +- demo/finetune_image_aggregator.sh | 1 - 4 files changed, 22 insertions(+), 5 deletions(-) delete mode 100644 demo/finetune_image_aggregator.sh diff --git a/README.md b/README.md index a9381b5..0ee9cd4 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,11 @@ The grid searches are shown in : The grid searches were run using our job-dispatcher, as shown bellow. -`python scripts/dispatcher.py --alert_config_path /path/to/secret_for_sms.json --experiment_config_path configs/mirai_base.json --result_path mirai_base_sweep.csv` +`python scripts/dispatcher.py --experiment_config_path configs/mirai_base.json --result_path mirai_base_sweep.csv` We selected the image encoder with the highest C-index on the development set, and leveraged it for the second stage hyper-parameter sweep. -`python scripts/dispatcher.py --alert_config_path /path/to/secret_for_sms.json --experiment_config_path configs/mirai_full.json --result_path mirai_full_sweep.csv` +`python scripts/dispatcher.py --experiment_config_path configs/mirai_full.json --result_path mirai_full_sweep.csv` We note that this command run relies on integrations that were specific to the MGH data, and so the exact line above will not run on your system. The configs above are meant to specify exact implementation details and our experimental procedure. @@ -55,6 +55,11 @@ The full bash command (inside the validate.sh file) is: python scripts/main.py --model_name mirai_full --img_encoder_snapshot snapshots/mgh_mammo_MIRAI_Base_May20_2019.p --transformer_snapshot snapshots/mgh_mammo_cancer_MIRAI_Transformer_Jan13_2020.p --callibrator_snapshot snapshots/callibrators/MIRAI_FULL_PRED_RF.callibrator.p --batch_size 1 --dataset csv_mammo_risk_all_full_future --img_mean 7047.99 --img_size 1664 2048 --img_std 12005.5 --metadata_path demo/sample_metadata.csv --test --prediction_save_path demo/validation_output.csv ``` +Alternatively, you could launch the same validation script using our job-dispatcher with the following command: +``` +python scripts/dispatcher.py --experiment_config_path configs/validate_mirai.json --result_path finetune_results.csv +``` + What you need to validate the model: - Install the dependencies (see above) - Get access to the snapshot files (email adamyala@mit.edu) @@ -69,7 +74,20 @@ What you need to validate the model: - `years_to_last_followup`: Integer reflecting how many years from the mammogram we know the patient is cancer free. For example, if a patient had a negative mammogram in 2010 (and this row corresponds to that mammogram), and we have negative followup until 2020, then enter 10. - `split_group`: Can take values `train`, `dev` or `test` to note the training, validation and testing samples. +Before running `validate.sh`, make sure to replace `demo/sample_metadata.csv` with the path to your metadata path and to replace `demo/validation_output.csv` to wherever you want predictions will be saved. + After running `validate.sh`, our code-base will print out the AUC for each time-point and save the predictions for each mammogram in `prediction_save_path`. For an example of the output file format, see `demo/validation_output.csv`. The key `patient_exam_id` is defined as `patient_id \tab exam_id`. ## How to fine-tune the model +To finetune Mirai, you can use the following commands: `sh demo/finetune.sh` +The full bash command (inside the validate.sh file) is: + +``` +python scripts/dispatcher.py --experiment_config_path configs/fine_tune_mirai.json --result_path finetune_results.csv +``` + +It create a grid search over possible fine-tuning hyperparameters (see `configs/finetune_mirai.json`) and launches jobs across the available GPUs (as defined in `available_gpus`). The results will be summarized in `finetune_results.csv` or wherever you set `results_path`. We note that each job launches just just a shell command. By editing `configs/finetune_mirai.json` or creating your own config json file, you can explore any hyper-parameters or architecture supported in the code base. + +What finetune the model, you will need the same dependencies, preprocessing and CSV file as listed above to validate Mirai. We recommend you first evaluate Mirai before you try to finetune it. + diff --git a/configs/finetune_mirai.json b/configs/finetune_mirai.json index bcf5517..ff58067 100644 --- a/configs/finetune_mirai.json +++ b/configs/finetune_mirai.json @@ -21,5 +21,5 @@ "weight_decay": [5e-05], "freeze_image_encoder": [true, false] }, - "available_gpus": [0,1,2,3] + "available_gpus": [0] } diff --git a/demo/finetune_end2end.sh b/demo/finetune_end2end.sh index 65d7411..d53f87d 100644 --- a/demo/finetune_end2end.sh +++ b/demo/finetune_end2end.sh @@ -1 +1 @@ -python scripts/main.py --cuda --model_name mirai_full --img_encoder_snapshot snapshots/mgh_mammo_MIRAI_Base_May20_2019.p --transformer_snapshot snapshots/mgh_mammo_cancer_MIRAI_Transformer_Jan13_2020.p --callibrator_snapshot snapshots/callibrators/MIRAI_FULL_PRED_RF.callibrator.p --batch_size 1 --dataset csv_mammo_risk_all_full_future --img_mean 7047.99 --img_size 1664 2048 --img_std 12005.5 --metadata_path demo/sample_metadata.csv --train --epochs 1 --init_lr 1e-6 \ No newline at end of file +python scripts/dispatcher.py --experiment_config_path configs/fine_tune_mirai.json --result_path finetune_results.csv \ No newline at end of file diff --git a/demo/finetune_image_aggregator.sh b/demo/finetune_image_aggregator.sh deleted file mode 100644 index 1ffae3f..0000000 --- a/demo/finetune_image_aggregator.sh +++ /dev/null @@ -1 +0,0 @@ -python scripts/main.py --cuda --model_name mirai_full --img_encoder_snapshot snapshots/mgh_mammo_MIRAI_Base_May20_2019.p --transformer_snapshot snapshots/mgh_mammo_cancer_MIRAI_Transformer_Jan13_2020.p --callibrator_snapshot snapshots/callibrators/MIRAI_FULL_PRED_RF.callibrator.p --batch_size 1 --dataset csv_mammo_risk_all_full_future --img_mean 7047.99 --img_size 1664 2048 --img_std 12005.5 --metadata_path demo/sample_metadata.csv --train --epochs 1 --freeze_image_encoder --init_lr 1e-6 \ No newline at end of file From aab661f2e8815c1cc0281e1a44ddd6c96febb66e Mon Sep 17 00:00:00 2001 From: Adam Yala Date: Wed, 25 Nov 2020 16:24:17 -0500 Subject: [PATCH 8/8] debug freeze integration --- onconet/models/mirai_full.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onconet/models/mirai_full.py b/onconet/models/mirai_full.py index b79a724..790917e 100644 --- a/onconet/models/mirai_full.py +++ b/onconet/models/mirai_full.py @@ -16,7 +16,7 @@ def __init__(self, args): else: self.image_encoder = get_model_by_name('custom_resnet', False, args) - if self.args.freeze_image_encoder: + if hasattr(self.args, "freeze_image_encoder") and self.args.freeze_image_encoder: for param in self.image_encoder.parameters(): param.requires_grad = False