-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseq2seqModel.py
119 lines (90 loc) · 3.79 KB
/
seq2seqModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from __future__ import unicode_literals, print_function, division
import torch
import torch.nn as nn
import torch.nn.functional as F
from config import getConfig
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gConfig= getConfig.get_config()
SOS_token = 0
EOS_token = 1
MAX_LENGTH=gConfig['max_length']
units=gConfig['layer_size']
BATCHSIZE=gConfig['batch_size']
criterion = nn.NLLLoss()
class Encoder(nn.Module):
def __init__(self, input_size, hidden_size):
super(Encoder, self).__init__()
self.hidden_size = hidden_size
#print(input_size)
self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size)
def forward(self, input, hidden):
embedded = self.embedding(input).view(1, 1, -1)
output, hidden = self.gru(embedded, hidden)
return output, hidden
def initHidden(self):
return torch.zeros(1, 1, self.hidden_size, device=device)
class AttentionDencoder(nn.Module):
def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
super(AttentionDencoder, self).__init__()
self.hidden_size = hidden_size
self.output_size = output_size
self.dropout_p = dropout_p
self.max_length = max_length
self.embedding = nn.Embedding(self.output_size, self.hidden_size)
self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
self.dropout = nn.Dropout(self.dropout_p)
self.gru = nn.GRU(self.hidden_size, self.hidden_size)
self.out = nn.Linear(self.hidden_size, self.output_size)
def forward(self, input, hidden, encoder_outputs):
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded)
#using softmax to calculate the attention
attn_weights = F.softmax(
self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
attn_applied = torch.bmm(attn_weights.unsqueeze(0),encoder_outputs.unsqueeze(0))
output = torch.cat((embedded[0], attn_applied[0]), 1)
output = self.attn_combine(output).unsqueeze(0)
output = F.relu(output)
output, hidden = self.gru(output, hidden)
output = F.log_softmax(self.out(output[0]), dim=1)
return output, hidden, attn_weights
def initHidden(self):
return torch.zeros(1, 1, self.hidden_size, device=device)
def min(a,b):
if a>b:
return b
else:
return a
# training method
def train_step(input_tensors, target_tensors,encoder,decoder,encoder_optimizer, decoder_optimizer):
encoder_hidden = encoder.initHidden()
encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()
loss = 0
#unpack the each sample by for loop
for i in range(BATCHSIZE):
input_tensor=input_tensors[i]
target_tensor = target_tensors[i]
input_length = input_tensor.size(0)
target_length = target_tensor.size(0)
encoder_outputs = torch.zeros(MAX_LENGTH, encoder.hidden_size, device=device)
lenth_min=min(MAX_LENGTH,input_length)
# pass in each feature to the encoder, using for loop to emulate RNN
for ei in range(lenth_min):
encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
encoder_outputs[ei] = encoder_output[0, 0]
#encoder_outputs, encoder_hidden = encoder(input_tensor[0], encoder_hidden)
decoder_input = torch.tensor([[SOS_token]], device=device)
decoder_hidden = encoder_hidden
#pass in the result of encoder
for di in range(target_length):
decoder_output, decoder_hidden, decoder_attention = decoder(
decoder_input, decoder_hidden, encoder_outputs)
loss += (criterion(F.log_softmax(decoder_output,dim=1), target_tensor[di]))/BATCHSIZE
decoder_input = target_tensor[di]
loss.backward()
encoder_optimizer.step()
decoder_optimizer.step()
return loss.item() / target_length