-
Notifications
You must be signed in to change notification settings - Fork 36
Wikitext - [WIP] #150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: development
Are you sure you want to change the base?
Wikitext - [WIP] #150
Changes from 8 commits
2fe5e58
83957a7
918b0f9
9c5090b
f4d4413
a34d681
9cb6a33
68cd917
812bdd0
5f67bb2
49663ce
5d3d75e
ac9547a
3f08eb2
4c4f1d9
9e471d9
1dee0c3
c862cd6
2a5d35d
29c4377
ab5f484
6e6af73
9767d5c
f28e783
8ad4eaf
4412b70
c44bdfc
130282e
e7eb353
f259291
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"lm": [ | ||
"torch==1.3.0", | ||
"tqdm" | ||
] | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,368 @@ | ||||||||||
""" | ||||||||||
Changelog: | ||||||||||
========== | ||||||||||
|
||||||||||
0.0.1: | ||||||||||
* First implementation of the Multi-Objective Language Model Benchmark. | ||||||||||
""" | ||||||||||
from typing import Union, Dict, List | ||||||||||
import ConfigSpace as CS | ||||||||||
import numpy as np | ||||||||||
import torch | ||||||||||
import torch.nn as nn | ||||||||||
import logging | ||||||||||
import hpobench.util.rng_helper as rng_helper | ||||||||||
from hpobench.abstract_benchmark import AbstractMultiObjectiveBenchmark | ||||||||||
from hpobench.util.data_manager import LanguageModelDataManager | ||||||||||
from hpobench.dependencies.lm.tokenize_util import batchify | ||||||||||
from hpobench.dependencies.lm.model import TransformerModel | ||||||||||
import time | ||||||||||
import math | ||||||||||
import tqdm | ||||||||||
import random | ||||||||||
|
||||||||||
__version__ = '0.0.1' | ||||||||||
|
||||||||||
logger = logging.getLogger('LM_Bench') | ||||||||||
|
||||||||||
|
||||||||||
class LanguageModelBenchmark(AbstractMultiObjectiveBenchmark): | ||||||||||
|
||||||||||
def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): | ||||||||||
super(LanguageModelBenchmark, self).__init__(rng=rng) | ||||||||||
PhMueller marked this conversation as resolved.
Show resolved
Hide resolved
PhMueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
|
||||||||||
self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | ||||||||||
data_manager = LanguageModelDataManager(self.device) | ||||||||||
self.X_train, self.X_valid, self.X_test = data_manager.load() | ||||||||||
self.ntokens = len(data_manager.corpus.dictionary) | ||||||||||
self.__seed_everything() | ||||||||||
self.variable = {"eval_batch_size": 10, | ||||||||||
"nlayers": 2, | ||||||||||
"bptt": 35, | ||||||||||
"tied": True, | ||||||||||
# number of attention head | ||||||||||
"nhead": 2, | ||||||||||
"ntoken": self.ntokens | ||||||||||
} | ||||||||||
print("len of corpus dict", self.ntokens) | ||||||||||
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
|
||||||||||
def __seed_everything(self): | ||||||||||
"""Helperfunction: Make the benchmark deterministic by setting the correct seeds""" | ||||||||||
seed = self.rng.randint(0, 100000) | ||||||||||
print("seed obtained", seed) | ||||||||||
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
random.seed(seed) | ||||||||||
np.random.seed(seed) | ||||||||||
torch.manual_seed(seed) | ||||||||||
torch.cuda.manual_seed_all(seed) | ||||||||||
torch.backends.cudnn.deterministic = True | ||||||||||
|
||||||||||
@staticmethod | ||||||||||
def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: | ||||||||||
"""Parameter space to be optimized --- contains the hyperparameters | ||||||||||
""" | ||||||||||
cs = CS.ConfigurationSpace(seed=seed) | ||||||||||
|
||||||||||
cs.add_hyperparameters([ | ||||||||||
CS.UniformIntegerHyperparameter( | ||||||||||
'batch_size', default_value=128, lower=8, upper=256 | ||||||||||
), | ||||||||||
CS.UniformIntegerHyperparameter( | ||||||||||
'emsize', default_value=128, lower=32, upper=1024, log=True | ||||||||||
), | ||||||||||
CS.UniformIntegerHyperparameter( | ||||||||||
'lr_factor', default_value=50, lower=1, upper=100, log=True | ||||||||||
), | ||||||||||
CS.UniformFloatHyperparameter( | ||||||||||
'lr', default_value=5, lower=1, upper=50, log=True | ||||||||||
), | ||||||||||
CS.UniformFloatHyperparameter( | ||||||||||
'dropout', default_value=0.99, lower=0, upper=0.99 | ||||||||||
), | ||||||||||
CS.UniformFloatHyperparameter( | ||||||||||
'clip', default_value=0.99, lower=0.1, upper=2 | ||||||||||
) | ||||||||||
|
||||||||||
]) | ||||||||||
return cs | ||||||||||
|
||||||||||
@staticmethod | ||||||||||
def get_objective_names(self) -> List[str]: | ||||||||||
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
return ['log_perplexity', 'accuracy', 'time'] | ||||||||||
|
||||||||||
@staticmethod | ||||||||||
def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: | ||||||||||
|
||||||||||
PhMueller marked this conversation as resolved.
Show resolved
Hide resolved
PhMueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
fidelity_space = CS.ConfigurationSpace(seed=seed) | ||||||||||
fidelity_space.add_hyperparameters([ | ||||||||||
CS.UniformIntegerHyperparameter( | ||||||||||
'budget', lower=1, upper=81, default_value=81, log=False | ||||||||||
) | ||||||||||
]) | ||||||||||
return fidelity_space | ||||||||||
|
||||||||||
@staticmethod | ||||||||||
def get_meta_information() -> Dict: | ||||||||||
""" Returns the meta information for the benchmark """ | ||||||||||
return { | ||||||||||
'name': 'Multi-objective Asynchronous Successive Halving', | ||||||||||
'references': ['@article{schmucker2021multi,' | ||||||||||
'title={Multi-objective Asynchronous Successive Halving},' | ||||||||||
'author={Schmucker, Robin and Donini, Michele and Zafar, Muhammad Bilal and Salinas,' | ||||||||||
' David and Archambeau, C{\'e}dric},' | ||||||||||
'journal={arXiv preprint arXiv:2106.12639},' | ||||||||||
'year={2021}', | ||||||||||
], | ||||||||||
} | ||||||||||
|
||||||||||
def init_model(self, config: Union[CS.Configuration, Dict]): | ||||||||||
""" Function that returns the model initialized based on the configuration and fidelity | ||||||||||
""" | ||||||||||
|
||||||||||
if isinstance(config, CS.Configuration): | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually we don't need that check ( The objective_function -wrapper casts configurations always to dicts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually we don't need that check ( The objective_function -wrapper casts configurations always to dicts. |
||||||||||
config = config.get_dictionary() | ||||||||||
model = TransformerModel( | ||||||||||
self.variable['ntoken'], config['emsize'], self.variable['nhead'], config['emsize'], | ||||||||||
self.variable['nlayers'], config['dropout']) | ||||||||||
|
||||||||||
return model | ||||||||||
|
||||||||||
@AbstractMultiObjectiveBenchmark.check_parameters | ||||||||||
def objective_function(self, configuration: Union[CS.Configuration, Dict], | ||||||||||
fidelity: Union[Dict, CS.Configuration, None] = None, | ||||||||||
rng: Union[np.random.RandomState, int, None] = None, | ||||||||||
shuffle: bool = False, | ||||||||||
**kwargs) -> Dict: | ||||||||||
""" | ||||||||||
|
||||||||||
Parameters | ||||||||||
---------- | ||||||||||
configuration | ||||||||||
fidelity: Dict, None | ||||||||||
epoch: int - Values: [1, 81] | ||||||||||
Number of epochs an architecture was trained. | ||||||||||
Note: the number of epoch is 1 indexed! (Results after the first epoch: epoch = 1) | ||||||||||
|
||||||||||
Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. | ||||||||||
rng : np.random.RandomState, int, None | ||||||||||
Random seed to use in the benchmark. | ||||||||||
|
||||||||||
To prevent overfitting on a single seed, it is possible to pass a | ||||||||||
parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. | ||||||||||
If this parameter is not given, the default random state is used. | ||||||||||
|
||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add |
||||||||||
|
||||||||||
kwargs | ||||||||||
|
||||||||||
Returns | ||||||||||
------- | ||||||||||
Dict - | ||||||||||
function_value : Dict | ||||||||||
validation_accuracy: float | ||||||||||
log_perplexity: float | ||||||||||
cost : time to train the network | ||||||||||
info : Dict | ||||||||||
validation_accuracy : float, | ||||||||||
test_accuracy : float, | ||||||||||
log_perplexity : float, | ||||||||||
negative_log_perplexity : float, | ||||||||||
training_cost : float, | ||||||||||
valid_cost : float, | ||||||||||
test_cost : float, | ||||||||||
fidelity : Dict | ||||||||||
used fidelities in this evaluation | ||||||||||
""" | ||||||||||
|
||||||||||
self.rng = rng_helper.get_rng(rng) | ||||||||||
self.__seed_everything() | ||||||||||
|
||||||||||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | ||||||||||
ts_start = time.time() | ||||||||||
|
||||||||||
# batchify data | ||||||||||
batch_size = configuration['batch_size'] | ||||||||||
train_data = batchify(self.X_train, batch_size=batch_size).to(device) | ||||||||||
val_data = batchify(self.X_valid, batch_size=self.variable["eval_batch_size"]).to(device) | ||||||||||
test_data = batchify(self.X_test, batch_size=self.variable["eval_batch_size"]).to(device) | ||||||||||
|
||||||||||
epochs = fidelity['budget'] | ||||||||||
|
||||||||||
model = self.init_model(configuration).to(device) | ||||||||||
|
||||||||||
criterion = nn.CrossEntropyLoss() | ||||||||||
|
||||||||||
learning_rate = configuration['lr'] | ||||||||||
learning_rate_factor = configuration['lr_factor'] | ||||||||||
clip = configuration['clip'] | ||||||||||
best_val_loss = None | ||||||||||
train_time = 0 | ||||||||||
eval_time = 0 | ||||||||||
|
||||||||||
t = tqdm.tqdm(total=epochs) | ||||||||||
for epoch in range(epochs): | ||||||||||
epoch_start_time = time.time() | ||||||||||
train_loss, train_acc = model.train_fun(self.ntokens, criterion, train_data, learning_rate, clip) | ||||||||||
train_time += time.time() - epoch_start_time | ||||||||||
start = time.time() | ||||||||||
val_loss, val_acc = model.eval_fun(self.ntokens, criterion, val_data) | ||||||||||
val_loss = np.clip(val_loss, 1e-10, 10) | ||||||||||
print("val acc for last epoch", val_acc) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||
eval_time += start - time.time() | ||||||||||
|
||||||||||
t.set_postfix(val_accuracy=val_acc) | ||||||||||
t.update() | ||||||||||
|
||||||||||
if not np.isfinite(val_loss): | ||||||||||
val_loss = 7 | ||||||||||
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
|
||||||||||
# Save the model if the validation loss is the best we've seen so far. | ||||||||||
if not best_val_loss or val_loss < best_val_loss: | ||||||||||
best_val_loss = val_loss | ||||||||||
else: | ||||||||||
# Anneal the learning rate if no improvement has been seen in the validation dataset. | ||||||||||
learning_rate /= learning_rate_factor | ||||||||||
|
||||||||||
start_time = time.time() | ||||||||||
_, test_acc = model.eval_fun(self.ntokens, criterion, test_data) | ||||||||||
eval_test_runtime = time.time() - start_time | ||||||||||
|
||||||||||
perplexity = math.exp(best_val_loss) | ||||||||||
log_perplexity = best_val_loss | ||||||||||
neg_log_perplexity = 10 - best_val_loss | ||||||||||
elapsed_time = float(ts_start - time.time()) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. elapsed_time is already float There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. elapsed_time is already float |
||||||||||
|
||||||||||
return {'function_value': {'log_perplexity': log_perplexity, | ||||||||||
'accuracy': val_acc.item(), | ||||||||||
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
'time': train_time + eval_time | ||||||||||
}, | ||||||||||
'cost': elapsed_time, | ||||||||||
'info': {'train_accuracy': train_acc.item(), | ||||||||||
'validation_accuracy': val_acc.item(), | ||||||||||
'test_accuracy': test_acc.item(), | ||||||||||
'log_perplexity': log_perplexity, | ||||||||||
'perplexity': perplexity, | ||||||||||
'negative_log_perplexity': neg_log_perplexity, | ||||||||||
'training_cost': train_time, | ||||||||||
'valid_cost': eval_time, | ||||||||||
'test_cost': eval_test_runtime, | ||||||||||
'fidelity': fidelity | ||||||||||
} | ||||||||||
} | ||||||||||
|
||||||||||
@AbstractMultiObjectiveBenchmark.check_parameters | ||||||||||
def objective_function_test(self, configuration: Union[CS.Configuration, Dict], | ||||||||||
fidelity: Union[Dict, None] = None, | ||||||||||
rng: Union[np.random.RandomState, int, None] = None, | ||||||||||
shuffle: bool = False, | ||||||||||
**kwargs) -> Dict: | ||||||||||
""" | ||||||||||
Get the validated results. Runs a given configuration on the largest budget (here: 50). | ||||||||||
Parameters | ||||||||||
---------- | ||||||||||
configuration | ||||||||||
fidelity: Dict, None | ||||||||||
epoch: int - Values: [1, 81] | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We only allow 81. Maybe this is something we should discuss: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We only allow 81. Maybe this is something we should discuss: |
||||||||||
Number of epochs an architecture was trained. | ||||||||||
Note: the number of epoch is 1 indexed. (Results after the first epoch: epoch = 1) | ||||||||||
|
||||||||||
Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. | ||||||||||
rng : np.random.RandomState, int, None | ||||||||||
Random seed to use in the benchmark. | ||||||||||
|
||||||||||
To prevent overfitting on a single seed, it is possible to pass a | ||||||||||
parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. | ||||||||||
If this parameter is not given, the default random state is used. | ||||||||||
|
||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||
kwargs | ||||||||||
Returns | ||||||||||
------- | ||||||||||
Dict - | ||||||||||
function_value : Dict | ||||||||||
validation_accuracy: float | ||||||||||
log_perplexity: float | ||||||||||
cost : time to train the network | ||||||||||
info : Dict | ||||||||||
validation_accuracy : float, | ||||||||||
test_accuracy : float, | ||||||||||
log_perplexity : float, | ||||||||||
negative_log_perplexity : float, | ||||||||||
training_cost : float, | ||||||||||
valid_cost : float, | ||||||||||
test_cost : float, | ||||||||||
fidelity : Dict | ||||||||||
used fidelities in this evaluation | ||||||||||
""" | ||||||||||
|
||||||||||
# The result dict should contain already all necessary information -> Just swap the function value from valid | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. outdated comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. outdated comment. |
||||||||||
# to test and the corresponding time cost | ||||||||||
assert fidelity['epoch'] == 81, 'Only test data for the 50. epoch is available. ' | ||||||||||
ts_start = time.time() | ||||||||||
|
||||||||||
self.rng = rng_helper.get_rng(rng) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
self.__seed_everything() | ||||||||||
|
||||||||||
# batchify data | ||||||||||
batch_size = configuration['batch_size'] | ||||||||||
train_data = batchify(self.X_train, batch_size=batch_size) | ||||||||||
val_data = batchify(self.X_valid, batch_size=batch_size) | ||||||||||
train_data = np.vstack((train_data, val_data)) | ||||||||||
train_data = torch.tensor(train_data).to(self.device) | ||||||||||
test_data = batchify(self.X_test, batch_size=self.variable["eval_batch_size"]).to(self.device) | ||||||||||
|
||||||||||
epochs = fidelity['budget'] | ||||||||||
|
||||||||||
model = self.init_model(configuration).to(self.device) | ||||||||||
|
||||||||||
criterion = nn.CrossEntropyLoss() | ||||||||||
|
||||||||||
learning_rate = configuration['lr'] | ||||||||||
learning_rate_factor = configuration['lr_factor'] | ||||||||||
clip = configuration['clip'] | ||||||||||
best_test_loss = None | ||||||||||
train_time = 0 | ||||||||||
eval_time = 0 | ||||||||||
t = tqdm.tqdm(total=epochs) | ||||||||||
for epoch in range(1, epochs + 1): | ||||||||||
epoch_start_time = time.time() | ||||||||||
train_loss, train_acc = model.train_fun(self.ntokens, criterion, train_data, learning_rate, | ||||||||||
clip) | ||||||||||
train_time += time.time() - epoch_start_time | ||||||||||
start = time.time() | ||||||||||
|
||||||||||
test_loss, test_acc = model.eval_fun(self.ntokens, criterion, test_data) | ||||||||||
test_loss = np.clip(test_loss, 1e-10, 10) | ||||||||||
eval_time += time.time() - start | ||||||||||
|
||||||||||
t.set_postfix(test_accuracy=test_acc) | ||||||||||
t.update() | ||||||||||
if not np.isfinite(test_loss): | ||||||||||
test_loss = 7 | ||||||||||
|
||||||||||
# Save the model if the validation loss is the best we've seen so far. | ||||||||||
if not best_test_loss or test_loss < best_test_loss: | ||||||||||
best_test_loss = test_loss | ||||||||||
else: | ||||||||||
# Anneal the learning rate if no improvement has been seen in the validation dataset. | ||||||||||
learning_rate /= learning_rate_factor | ||||||||||
|
||||||||||
perplexity = math.exp(best_test_loss) | ||||||||||
log_perplexity = best_test_loss | ||||||||||
neg_log_perplexity = 10 - best_test_loss | ||||||||||
elapsed_time = float(ts_start - time.time()) | ||||||||||
|
||||||||||
return {'function_value': {'log_perplexity': log_perplexity, | ||||||||||
'accuracy': test_acc.item(), | ||||||||||
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
'time': train_time + eval_time | ||||||||||
}, | ||||||||||
'cost': elapsed_time, | ||||||||||
'info': {'train_accuracy': train_acc.item(), | ||||||||||
'test_accuracy': test_acc.item(), | ||||||||||
'log_perplexity': log_perplexity, | ||||||||||
'perplexity': perplexity, | ||||||||||
'negative_log_perplexity': neg_log_perplexity, | ||||||||||
'training_cost': train_time, | ||||||||||
'test_cost': eval_time, | ||||||||||
'fidelity': fidelity | ||||||||||
} | ||||||||||
} | ||||||||||
|
||||||||||
__all__ = ["LanguageModelBenchmark"] | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a new line at the end of file. -> checkstyle error There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a new line at the end of file. -> checkstyle error |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
""" Benchmark for the Multi-Objective Language Model Benchmark from hpobench/benchmarks/mo/lm_benchmark.py | ||
""" | ||
|
||
from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient | ||
|
||
|
||
class LanguageModelBenchmark(AbstractBenchmarkClient): | ||
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def __init__(self, **kwargs): | ||
kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LanguageModelBenchmark') | ||
kwargs['container_name'] = kwargs.get('container_name', 'lm_benchmark') | ||
kwargs['latest'] = kwargs.get('container_tag', '0.0.1') | ||
ayushi-3536 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
super(LanguageModelBenchmark, self).__init__(**kwargs) |
Uh oh!
There was an error while loading. Please reload this page.