-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
185 lines (135 loc) · 5.92 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import numpy as np
import torch
USE_GPU = True
BATCH_SIZE = 32
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import torchtext
from torchtext import data
import spacy
from spacy.symbols import ORTH
def spacy_tok(x):
return [tok.lower() for tok in x]
TEXT = data.Field(lower=True, tokenize=spacy_tok)
from torchtext.datasets import WikiText2
train, valid, test = WikiText2.splits(TEXT) # loading custom datasets requires passing in the field, but nothing else.
TEXT.build_vocab(train, vectors="glove.6B.200d")
train_iter, valid_iter, test_iter = data.BPTTIterator.splits(
(train, valid, test),
batch_size=BATCH_SIZE,
bptt_len=30, # this is where we specify the sequence length
device=(0 if USE_GPU else -1),
repeat=False)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable as V
class RNNModel(nn.Module):
def __init__(self, ntoken, ninp,
nhid, nlayers, bsz,
dropout=0.5, tie_weights=True):
super(RNNModel, self).__init__()
self.nhid, self.nlayers, self.bsz = nhid, nlayers, bsz
self.drop = nn.Dropout(dropout)
self.encoder = nn.Embedding(ntoken, ninp)
#self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
self.rnn = nn.LSTM(ninp, nhid, nlayers)
self.decoder = nn.Linear(nhid, ntoken)
self.init_weights()
self.hidden = self.init_hidden(bsz) # the input is a batched consecutive corpus
# therefore, we retain the hidden state across batches
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
self.decoder.bias.data.fill_(0)
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, input):
#emb = self.drop(self.encoder(input)).to(device)
emb = self.encoder(input).to(device)
output, self.hidden = self.rnn(emb, self.hidden)
#output = self.drop(output)
decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
return decoded.view(output.size(0), output.size(1), decoded.size(1))
def init_hidden(self, bsz):
weight = next(self.parameters()).data
return (V(weight.new(self.nlayers, bsz, self.nhid).zero_().cuda()),
V(weight.new(self.nlayers, bsz, self.nhid).zero_()).cuda())
#return (V(weight.new(self.nlayers, bsz, self.nhid).zero_()),
# V(weight.new(self.nlayers, bsz, self.nhid).zero_()))
def reset_history(self):
"""Wraps hidden states in new Variables, to detach them from their history."""
self.hidden = tuple(V(v.data) for v in self.hidden)
weight_matrix = TEXT.vocab.vectors
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = RNNModel(weight_matrix.size(0),
weight_matrix.size(1), 200, 1, BATCH_SIZE)
model.encoder.weight.data.copy_(weight_matrix);
if USE_GPU:
print("--")
#model.cuda()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.7, 0.99))
n_epochs = 2
n_tokens = weight_matrix.size(0)
from tqdm import tqdm
def train_epoch(epoch):
"""One epoch of a training loop"""
epoch_loss = 0
model.train()
#model.eval()
for batch in train_iter:
# reset the hidden state or else the model will try to backpropagate to the
# beginning of the dataset, requiring lots of time and a lot of memory
model.reset_history()
optimizer.zero_grad()
text, targets = batch.text.to(device), batch.target.to(device)
prediction = model(text).to(device)
# pytorch currently only supports cross entropy loss for inputs of 2 or 4 dimensions.
# we therefore flatten the predictions out across the batch axis so that it becomes
# shape (batch_size * sequence_length, n_tokens)
# in accordance to this, we reshape the targets to be
# shape (batch_size * sequence_length)
#print("targets--: ", targets.shape)
#print("prediction-- : ", targets.shape)
loss = criterion(prediction.view(-1, n_tokens), targets.view(-1)).to(device)
loss.backward()
optimizer.step()
epoch_loss += loss.item() * prediction.size(0) * prediction.size(1)
epoch_loss /= len(train.examples[0].text)
# monitor the loss
val_loss = 0
model.eval()
with torch.no_grad():
for batch in valid_iter:
model.reset_history()
text, targets = batch.text.to(device), batch.target.to(device)
prediction = model(text).to(device)
loss = criterion(prediction.view(-1, n_tokens), targets.view(-1))
val_loss += loss.item() * text.size(0)
val_loss /= len(valid.examples[0].text)
print('Epoch: {}, Training Loss: {:.4f}, Validation Loss: {:.4f}'.format(epoch, epoch_loss, val_loss))
for epoch in range(1, n_epochs + 1):
train_epoch(epoch)
def word_ids_to_sentence(id_tensor, vocab, join=None):
"""Converts a sequence of word ids to a sentence"""
print(type(id_tensor))
#print(id_tensor)
if isinstance(id_tensor, torch.LongTensor):
ids = id_tensor.transpose(0, 1).contiguous().view(-1)
elif isinstance(id_tensor, np.ndarray):
ids = id_tensor.transpose().reshape(-1)
else:
ids = id_tensor.transpose(0, 1).contiguous().view(-1)
#print("ids", ids)
batch = [vocab.itos[ind] for ind in ids] # denumericalize
if join is None:
return batch
else:
return join.join(batch)
b = next(iter(valid_iter))
inputs_word=word_ids_to_sentence(b.text.cuda().data, TEXT.vocab, join=' ')[:210]
print(inputs_word)
arrs = model(b.text.cuda()).cuda().data.cpu().numpy()
preds=word_ids_to_sentence(np.argmax(arrs, axis=2), TEXT.vocab, join=' ')[:210]
print(preds)