diff --git a/htmresearch/frameworks/pytorch/continual_learning_experiment.py b/htmresearch/frameworks/pytorch/continual_learning_experiment.py index 5186a4fac..324d20684 100644 --- a/htmresearch/frameworks/pytorch/continual_learning_experiment.py +++ b/htmresearch/frameworks/pytorch/continual_learning_experiment.py @@ -148,7 +148,7 @@ def initialize(self, params, repetition): self.k = params.get("k", self.n) self.k_inference_factor = params.get("k_inference_factor", 1.0) - self.boost_strength = params.get("boost_strength", 1.0) + self.boost_strength = params.get("boost_strength", 0.0) self.boost_strength_factor = params.get("boost_strength_factor", 1.0) self.weight_sparsity = params.get("weight_sparsity", 1.0) self.weight_sparsity_cnn = params.get("weight_sparsity_cnn", 1.0) @@ -165,19 +165,21 @@ def createDenseCNNModel(self): nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels[0], kernel_size=self.kernel_size[0], stride=self.stride[0], padding=self.padding[0]), - nn.MaxPool2d(kernel_size=2), - nn.ReLU(), + nn.BatchNorm2d(self.out_channels[0]), + nn.ReLU(inplace=True), + nn.MaxPool2d(2), nn.Conv2d(in_channels=self.out_channels[0], out_channels=self.out_channels[1], kernel_size=self.kernel_size[1], stride=self.stride[1], padding=self.padding[1]), - nn.MaxPool2d(kernel_size=2), - nn.ReLU(), + nn.BatchNorm2d(self.out_channels[1]), + nn.ReLU(inplace=True), + nn.MaxPool2d(2), Flatten(), nn.Linear(self.cnn_output_len[1], self.n), - nn.ReLU(), + nn.ReLU(inplace=True), nn.Linear(self.n, self.output_size), nn.LogSoftmax(dim=1) @@ -199,22 +201,22 @@ def createSparseCNNModel(self): nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels[0], kernel_size=self.kernel_size[0], stride=self.stride[0], padding=self.padding[0]), - nn.MaxPool2d(kernel_size=2), KWinners2d(n=self.cnn_output_len[0], k=self.cnn_k[0], channels=self.out_channels[0], kInferenceFactor=self.k_inference_factor, boostStrength=self.boost_strength, boostStrengthFactor=self.boost_strength_factor), + nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=self.out_channels[0], out_channels=self.out_channels[1], kernel_size=self.kernel_size[1], stride=self.stride[1], padding=self.padding[1]), - nn.MaxPool2d(kernel_size=2), KWinners2d(n=self.cnn_output_len[1], k=self.cnn_k[1], channels=self.out_channels[1], kInferenceFactor=self.k_inference_factor, boostStrength=self.boost_strength, boostStrengthFactor=self.boost_strength_factor), + nn.MaxPool2d(kernel_size=2), Flatten(), @@ -256,7 +258,7 @@ def iterate(self, params, repetition, iteration): # Use 'iterations' to represent the task (0=[0-1], ..,5=[8-9]) task = iteration - + training_loss = [] position = self.cfgparser.sections().index(self.name) * 2 for epoch in tqdm.trange(self.epochs, position=position, desc="{}:{}".format(self.name, task)): @@ -272,11 +274,13 @@ def iterate(self, params, repetition, iteration): batch_size=batch_size, shuffle=True) self.preEpoch() - trainModel(model=self.model, loader=train_loader, - optimizer=self.optimizer, device=self.device, - batches_in_epoch=batches_in_epoch, - criterion=self.loss_function, - progress_bar={"desc": "training", "position": position + 1}) + loss = trainModel(model=self.model, loader=train_loader, + optimizer=self.optimizer, device=self.device, + batches_in_epoch=batches_in_epoch, + criterion=self.loss_function, + progress_bar={ + "desc": "training", "position": position + 1}) + training_loss.append(loss) self.postEpoch() # Test on all trained tasks combined @@ -284,10 +288,12 @@ def iterate(self, params, repetition, iteration): test_loader = torch.utils.data.DataLoader(dataset=combined_datasets, batch_size=self.test_batch_size, shuffle=True) - return evaluateModel(model=self.model, device=self.device, - loader=test_loader, - criterion=self.loss_function, - progress={"desc": "testing", "position": position + 1}) + res = {"training_loss": training_loss} + res.update(evaluateModel(model=self.model, device=self.device, + loader=test_loader, + criterion=self.loss_function, + progress={"desc": "testing", "position": position + 1})) + return res def preEpoch(self): diff --git a/htmresearch/frameworks/pytorch/dataset_utils.py b/htmresearch/frameworks/pytorch/dataset_utils.py index 6c29e85a9..679fca5b0 100644 --- a/htmresearch/frameworks/pytorch/dataset_utils.py +++ b/htmresearch/frameworks/pytorch/dataset_utils.py @@ -112,7 +112,7 @@ def splitDataset(dataset, groupby): # Split dataset based on the group by function and keep track of indices indicesByGroup = collections.defaultdict(list) for k, g in itertools.groupby(enumerate(dataset), key=lambda x: groupby(x[1])): - indicesByGroup[k].extend([i[0] for i in g]) + indicesByGroup[k.item()].extend([i[0] for i in g]) # Sort by group and create a Subset dataset for each of the group indices _, indices = zip(*(sorted(indicesByGroup.items(), key=lambda x: x[0]))) diff --git a/htmresearch/frameworks/pytorch/model_utils.py b/htmresearch/frameworks/pytorch/model_utils.py index e671f575d..fefca179a 100644 --- a/htmresearch/frameworks/pytorch/model_utils.py +++ b/htmresearch/frameworks/pytorch/model_utils.py @@ -24,6 +24,7 @@ import logging import sys +import numpy as np import torch import torch.nn.functional as F from tqdm import tqdm @@ -58,6 +59,8 @@ def trainModel(model, loader, optimizer, device, criterion=F.nll_loss, :param progress_bar: Optional :class:`tqdm` progress bar args. None for no progress bar :type progress_bar: dict or None + + :returns mean loss value """ model.train() if progress_bar is not None: @@ -66,11 +69,13 @@ def trainModel(model, loader, optimizer, device, criterion=F.nll_loss, if batches_in_epoch < len(loader): loader.total = batches_in_epoch + losses = [] for batch_idx, (data, target) in enumerate(loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) + losses.append(loss.item()) loss.backward() optimizer.step() @@ -83,6 +88,8 @@ def trainModel(model, loader, optimizer, device, criterion=F.nll_loss, loader.n = loader.total loader.close() + return np.mean(losses) + def evaluateModel(model, loader, device, criterion=F.nll_loss, progress=None): diff --git a/htmresearch/frameworks/pytorch/sparse_speech_experiment.py b/htmresearch/frameworks/pytorch/sparse_speech_experiment.py index fc8965153..35ad96a0f 100644 --- a/htmresearch/frameworks/pytorch/sparse_speech_experiment.py +++ b/htmresearch/frameworks/pytorch/sparse_speech_experiment.py @@ -20,6 +20,9 @@ # ---------------------------------------------------------------------- from __future__ import print_function + +import copy +import json import os import time @@ -148,6 +151,9 @@ def reset(self, params, repetition): self.optimizer = self.createOptimizer(params, self.model) self.lr_scheduler = self.createLearningRateScheduler(params, self.optimizer) + self.best_score = 0.0 + self.best_model = None + self.best_epoch = -1 def iterate(self, params, repetition, iteration): """ @@ -200,6 +206,13 @@ def iterate(self, params, repetition, iteration): "loss=", testResults["test_loss"]) ret.update({"testerror": testResults["testerror"]}) + score = testResults["testerror"] + if score > self.best_score: + self.best_epoch = iteration + self.best_score = score + self.best_model = copy.deepcopy(self.model) + + # Run bg noise set if self.bg_noise_loader is not None: bgResults = self.test(params, self.bg_noise_loader) @@ -230,12 +243,34 @@ def iterate(self, params, repetition, iteration): def finalize(self, params, rep): """ - Save the full model once we are done. + Save best model once we are done. """ if params.get("saveNet", True): saveDir = os.path.join(params["path"], params["name"], - "model_{}.pt".format(rep)) - torch.save(self.model, saveDir) + "best_model_{}.pt".format(rep)) + torch.save(self.best_model, saveDir) + + # Run noise test on best model at the end + if params.get("run_noise_tests_best_model", False): + self.model = self.best_model + results = self.runNoiseTests(params) + + # Update bets epoch log with noise results + fullpath = os.path.join(params['path'], params['name']) + logfile = os.path.join(fullpath, '%i.log' % rep) + log = [] + + with open(logfile, 'r') as f: + for line in f: + log.append(json.loads(line)) + + with open(logfile, 'w') as f: + log[self.best_epoch].update(results) + for entry in log: + json.dump(entry, f) + f.write('\n') + f.flush() + def createLearningRateScheduler(self, params, optimizer):