train_models.py

# -*- coding: utf-8 -*-
"""Script2Run.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/12g-9YEQ3pJnKFjmOr7pTvOcfGpzTskPv

ML FINAL PROJECT
=============================


TODO CODE: add saving models protocol. Save at each epoch.
add file naming protocol.
hyperparameters will be: 
- dataset size
- model name (resnet, alexnet, naive)
- data augmentation (none, some) #only on train set
- feature type (scratch, finetune, feature extract)
"""

# from google.colab import drive
# drive.mount('/content/drive')

# cd /content/drive/MyDrive/ML_Final_Project/

from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from copy import deepcopy
import random
import gc
import importlib

print("PyTorch Version: ", torch.__version__)
print("Torchvision Version: ", torchvision.__version__)

import utils
from utils import *

importlib.reload(utils)

### This might crash a couple of times, but re-run until it works. 

data_augmentation = True  # Harsha = False, Gurion = True

data_dir = "data"

if data_augmentation:
    level = 1
else:
    level = 0

data_transforms = get_transform(level, input_size=224)  ### Need to dad transformation level.
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x]) for x in ['train', 'val']}

num_classes = 4
batch_size = 10
num_epochs = 30

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


for train_size in [256, 1024, 4096]: # TODO: Modulate this

    subsets = {}
    subsets['train'] = torch.utils.data.Subset(image_datasets['train'],
                                               np.random.choice(len(image_datasets['train']), train_size,
                                                                replace=False))
    subsets['val'] = image_datasets['val']
    dataloaders_dict = {x: torch.utils.data.DataLoader(subsets[x], batch_size=batch_size,
                                                       shuffle=True) for x in ['train', 'val']}
    for model_name in ['resnet', 'alexnet']: #TODO: Modulate this
        for feature_type in ['scratch', 'fine_tune', 'feature_extract']: #TODO: Modulate this

            if feature_type == 'feature_extract':
                feature_extract = True
                use_pretrained = True

            if feature_type == 'fine_tune':
                feature_extract = False
                use_pretrained = True

            if feature_type == 'scratch':
                feature_extract = False
                use_pretrained = False

            model_ft, input_size = initialize_model(model_name, num_classes,
                                                    feature_extract, use_pretrained=use_pretrained)

            gc.collect()
            torch.cuda.empty_cache()
            model_ft = model_ft.to(device)

            params_to_update = model_ft.parameters()

            if feature_extract:
                params_to_update = []
                for name, param in model_ft.named_parameters():
                    if param.requires_grad == True:
                        params_to_update.append(param)
                        # print("\t",name)

            optimizer_ft = optim.Adam(params_to_update, lr=1e-3)

            # Setup the loss fxn
            criterion = nn.CrossEntropyLoss()

            saving_name = '{}_DA{}_TS{}_FS{}.pth'.format(model_name, data_augmentation, train_size, feature_type)

            print('MODEL:{}'.format(saving_name))

            # Train and evaluate
            model, val_acc_history, train_acc_history, state_dicts, time_elapsed = train_model(model_ft,
                                                                                               dataloaders_dict,
                                                                                               criterion, optimizer_ft,
                                                                                               device,
                                                                                               num_epochs=num_epochs)

            torch.save({
                'model': model_ft.state_dict(),
                'all_state_dicts': state_dicts,
                'val_hist': val_acc_history,
                'train_hist': train_acc_history,
                'batch_size': batch_size,
                'train_size': train_size,
                'model_type': model_name,
                'epochs': num_epochs,
                'data_augmentation': data_augmentation,
                'feature_type': feature_type,
                'time (s)': time_elapsed,
                'notes': 'trained on OCT images'
            }, 'Trained_Models/{}'.format(saving_name))
            print(torch.cuda.memory_summary())
            #model_ft.destroy()
            #model.destroy()
            del model_ft, model, val_acc_history, train_acc_history, state_dicts, time_elapsed
            gc.collect()

            torch.cuda.empty_cache()
            print('MODEL:{} finished training'.format(saving_name))