-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnew_users.py
197 lines (181 loc) · 7.02 KB
/
new_users.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
from __future__ import print_function, division
import options
import numpy as np
import dynet as dy
import data
import evaluation
import helpers
import utils
import sys
import os
def load_user_filepairs(file_list):
src_files, trg_files = [], []
with open(file_list, 'r') as f:
for l in f:
src_file, trg_file = l.strip().split()
src_files.append(src_file)
trg_files.append(trg_file)
return zip(src_files, trg_files)
def split_user_data(src, trg, n_test=2):
ids = np.arange(len(src), dtype=int)
np.random.shuffle(ids)
src = np.asarray(src)
trg = np.asarray(trg)
return src[ids[:-n_test]], src[ids[-n_test:]], trg[ids[:-n_test]], trg[ids[-n_test:]], ids
def evaluate_model(s2s, test_src, beam_size):
s2s.set_test_mode()
# Test on test split
translations = []
for x in test_src:
y_hat = s2s.translate(x, 0, beam_size=beam_size)
translations.append(y_hat)
return translations
def adapt_user(s2s, trainer, train_src, train_trg, test_src, opt):
timer = utils.Timer()
log = utils.Logger(opt.verbose)
n_train = len(train_src)
n_tokens = (sum(map(len, train_trg)) - len(train_trg))
# Train for n_iter
timer.restart()
best_ppl = np.inf
for epoch in range(opt.num_epochs):
timer.tick()
dy.renew_cg()
losses = []
# Add losses for all samples
for x, y in zip(train_src, train_trg):
losses.append(s2s.calculate_user_loss([x], [y], [0], update_mode=opt.update_mode))
loss = dy.average(losses)
# Backward + update
loss.backward()
trainer.update()
# Print loss etc...
train_loss = loss.value() / n_tokens
train_ppl = np.exp(train_loss)
trainer.status()
elapsed = timer.tick()
log.info(" Training_loss=%f, ppl=%f, time=%f s, tok/s=%.1f" %
(train_loss, train_ppl, elapsed, n_tokens / elapsed))
if train_ppl < best_ppl:
best_ppl = train_ppl
translations = evaluate_model(s2s, test_src, opt.beam_size)
else:
log.info("Early stopping after %d iterations" % (epoch+1))
break
return translations
def optimized_adapt():
# Precompute scores if we're only training the biases
train_scores = []
for x, y in zip(train_src, train_trg):
sent_scores = s2s.precompute_scores([x], [y], [0])
train_scores.append([score.npvalue() for score in sent_scores])
#print('[%s]' % ', '.join(str(len(s)) for s in train_scores))
# Train
timer.restart()
best_ppl = np.inf
for epoch in range(opt.num_epochs):
timer.tick()
dy.renew_cg()
losses = []
# Add losses for all samples
for x, y in zip(train_scores, train_trg):
losses.append(s2s.calculate_user_bias_loss([x], [y], [0], update_mode=opt.update_mode))
loss = dy.average(losses)
# Backward + update
loss.backward()
trainer.update()
# Print loss etc...
train_loss = loss.value() / n_tokens
train_ppl = np.exp(train_loss)
elapsed = timer.tick()
if (epoch + 1) % opt.check_train_error_every == 0:
trainer.status()
log.info(" Training_loss=%f, ppl=%f, time=%f s, tok/s=%.1f" %
(train_loss, train_ppl, elapsed, n_tokens / elapsed))
if train_ppl < best_ppl and best_ppl - train_ppl >= 1e-3:
best_ppl = train_ppl
translations = evaluate_model(s2s, test_src, opt.beam_size)
else:
log.info("Early stopping after %d iterations" % (epoch+1))
break
# Record metrics
def eval_user_adaptation(opt):
log = utils.Logger(opt.verbose)
timer = utils.Timer()
# Read vocabs
lexicon = helpers.get_lexicon(opt)
# Read data
filepairs = load_user_filepairs(opt.usr_file_list)
# Get target language model
lang_model = None
# Load model
s2s = helpers.build_model(opt, lexicon, lang_model, test=True)
if opt.update_mode == 'mixture_weights' and not opt.user_recognizer == 'fact_voc':
log.info('Updating only the mixture weights doesn\'t make sense here')
exit()
s2s.lm = lexicon.trg_unigrams
# s2s.freeze_parameters()
# Trainer
trainer = helpers.get_trainer(opt, s2s)
# print config
if opt.verbose:
options.print_config(opt, src_dict_size=len(lexicon.w2ids),
trg_dict_size=len(lexicon.w2idt))
# This will store translations and gold sentences
base_translations = []
adapt_translations = []
gold = []
# Run training
for usr_id, (src_file, trg_file) in enumerate(filepairs):
log.info('Evaluating on files %s' % os.path.basename(src_file).split()[0])
# Load file pair
src_data = data.read_corpus(src_file, lexicon.w2ids, raw=True)
trg_data = data.read_corpus(trg_file, lexicon.w2idt, raw=True)
# split train/test
train_src, test_src, train_trg, test_trg, order = split_user_data(
src_data, trg_data, n_test=opt.n_test)
# Convert train data to indices
train_src = lexicon.sents_to_ids(train_src)
train_trg = lexicon.sents_to_ids(train_trg, trg=True)
# Save test data
for s in test_trg:
gold.append(' '.join(s))
# Reset model
s2s.load()
s2s.reset_usr_vec()
# Translate with baseline model
base_translations.extend(evaluate_model(s2s, test_src, opt.beam_size))
# Start loop
n_train = opt.max_n_train
adapt_translations.extend(adapt_user(s2s, trainer, train_src[:n_train], train_trg[:n_train], test_src, opt))
# Temp files
temp_gold = utils.exp_temp_filename(opt, 'gold.txt')
temp_base = utils.exp_temp_filename(opt, '%s_base.txt' % opt.update_mode)
temp_adapt = utils.exp_temp_filename(opt, '%s_adapt.txt' % opt.update_mode)
utils.savetxt(temp_gold, gold)
utils.savetxt(temp_base, base_translations)
utils.savetxt(temp_adapt, adapt_translations)
# Evaluate base translations
bleu, details = evaluation.bleu_score(temp_gold, temp_base)
log.info('Base BLEU score: %.2f' % bleu)
# Evaluate base translations
bleu, details = evaluation.bleu_score(temp_gold, temp_adapt)
log.info('Adaptation BLEU score: %.2f' % bleu)
# Compare both
temp_bootstrap_gold = utils.exp_temp_filename(opt, 'bootstrap_gold.txt')
temp_bootstrap_base = utils.exp_temp_filename(opt, 'bootstrap_base.txt')
temp_bootstrap_adapt = utils.exp_temp_filename(opt, 'bootstrap_adapt.txt')
bleus = evaluation.paired_bootstrap_resampling(temp_gold, temp_base, temp_adapt,
opt.bootstrap_num_samples,
opt.bootstrap_sample_size,
temp_bootstrap_gold,
temp_bootstrap_base,
temp_bootstrap_adapt)
evaluation.print_paired_stats(bleus)
os.remove(temp_bootstrap_gold)
os.remove(temp_bootstrap_base)
os.remove(temp_bootstrap_adapt)
# Results
if __name__ == "__main__":
opt = options.get_options()
eval_user_adaptation(opt)