8
8
from tensorlayer .models .transformer .utils import metrics
9
9
from tensorlayer .models .transformer .utils import attention_visualisation
10
10
import tensorlayer as tl
11
-
12
-
13
11
""" Translation from Portugese to English by Transformer model
14
12
This tutorial provides basic instructions on how to define and train Transformer model on Tensorlayer for
15
13
Translation task. You can also learn how to visualize the attention block via this tutorial.
16
14
"""
17
15
16
+
18
17
def set_up_dataset ():
19
18
# Set up dataset for Portugese-English translation from the TED Talks Open Translation Project.
20
19
# This dataset contains approximately 50000 training examples, 1100 validation examples, and 2000 test examples.
21
20
# https://www.ted.com/participate/translate
22
21
23
- examples , metadata = tfds .load ('ted_hrlr_translate/pt_to_en' , with_info = True ,
24
- as_supervised = True )
22
+ examples , metadata = tfds .load ('ted_hrlr_translate/pt_to_en' , with_info = True , as_supervised = True )
25
23
train_examples , val_examples = examples ['train' ], examples ['validation' ]
26
24
27
25
# Set up tokenizer and save the tokenizer
28
26
tokenizer = tfds .features .text .SubwordTextEncoder .build_from_corpus (
29
- (en .numpy () and pt .numpy () for pt , en in train_examples ), target_vocab_size = 2 ** 14 )
27
+ (en .numpy () and pt .numpy () for pt , en in train_examples ), target_vocab_size = 2 ** 14
28
+ )
30
29
31
30
tokenizer .save_to_file ("tokenizer" )
32
31
tokenizer = tfds .features .text .SubwordTextEncoder .load_from_file ("tokenizer" )
@@ -38,44 +37,42 @@ def test_tokenizer_success(tokenizer):
38
37
sample_string = 'TensorLayer is awesome.'
39
38
40
39
tokenized_string = tokenizer .encode (sample_string )
41
- print ('Tokenized string is {}' .format (tokenized_string ))
40
+ print ('Tokenized string is {}' .format (tokenized_string ))
42
41
43
42
original_string = tokenizer .decode (tokenized_string )
44
- print ('The original string: {}' .format (original_string ))
43
+ print ('The original string: {}' .format (original_string ))
45
44
assert original_string == sample_string
46
45
47
46
48
-
49
47
def generate_training_dataset (train_examples , tokenizer ):
48
+
50
49
def encode (lang1 , lang2 ):
51
- lang1 = tokenizer .encode (
52
- lang1 .numpy ()) + [tokenizer .vocab_size + 1 ]
50
+ lang1 = tokenizer .encode (lang1 .numpy ()) + [tokenizer .vocab_size + 1 ]
51
+
52
+ lang2 = tokenizer .encode (lang2 .numpy ()) + [tokenizer .vocab_size + 1 ]
53
53
54
- lang2 = tokenizer .encode (
55
- lang2 .numpy ()) + [tokenizer .vocab_size + 1 ]
56
-
57
54
return lang1 , lang2
55
+
58
56
MAX_LENGTH = 50
57
+
59
58
def filter_max_length (x , y , max_length = MAX_LENGTH ):
60
- return tf .logical_and (tf .size (x ) <= max_length ,
61
- tf . size ( y ) <= max_length )
59
+ return tf .logical_and (tf .size (x ) <= max_length , tf . size ( y ) <= max_length )
60
+
62
61
def tf_encode (pt , en ):
63
62
return tf .py_function (encode , [pt , en ], [tf .int64 , tf .int64 ])
63
+
64
64
train_dataset = train_examples .map (tf_encode )
65
65
train_dataset = train_dataset .filter (filter_max_length )
66
66
# cache the dataset to memory to get a speedup while reading from it.
67
67
train_dataset = train_dataset .cache ()
68
68
BUFFER_SIZE = 20000
69
69
BATCH_SIZE = 64
70
- train_dataset = train_dataset .shuffle (BUFFER_SIZE ).padded_batch (
71
- BATCH_SIZE , padded_shapes = ([- 1 ], [- 1 ]))
70
+ train_dataset = train_dataset .shuffle (BUFFER_SIZE ).padded_batch (BATCH_SIZE , padded_shapes = ([- 1 ], [- 1 ]))
72
71
train_dataset = train_dataset .prefetch (tf .data .experimental .AUTOTUNE )
73
72
74
73
return train_dataset
75
74
76
75
77
-
78
-
79
76
def model_setup (tokenizer ):
80
77
# define Hyper parameters for transformer
81
78
class HYPER_PARAMS (object ):
@@ -91,16 +88,14 @@ class HYPER_PARAMS(object):
91
88
extra_decode_length = 50
92
89
beam_size = 5
93
90
alpha = 0.6 # used to calculate length normalization in beam search
94
-
95
-
96
- label_smoothing = 0.1
97
- learning_rate = 2.0
98
- learning_rate_decay_rate = 1.0
99
- learning_rate_warmup_steps = 4000
100
-
101
- sos_id = 0
102
- eos_id = tokenizer .vocab_size + 1
103
91
92
+ label_smoothing = 0.1
93
+ learning_rate = 2.0
94
+ learning_rate_decay_rate = 1.0
95
+ learning_rate_warmup_steps = 4000
96
+
97
+ sos_id = 0
98
+ eos_id = tokenizer .vocab_size + 1
104
99
105
100
model = Transformer (HYPER_PARAMS )
106
101
@@ -112,20 +107,20 @@ class HYPER_PARAMS(object):
112
107
113
108
# Use the Adam optimizer with a custom learning rate scheduler according to the formula in the Paper "Attention is All you need"
114
109
class CustomSchedule (tf .keras .optimizers .schedules .LearningRateSchedule ):
115
- def __init__ (self , d_model , warmup_steps = 5 ):
116
- super (CustomSchedule , self ).__init__ ()
117
-
118
- self .d_model = d_model
119
- self .d_model = tf .cast (self .d_model , tf .float32 )
120
110
121
- self . warmup_steps = warmup_steps
122
-
123
- def __call__ ( self , step ):
124
- arg1 = tf . math . rsqrt ( step )
125
- arg2 = step * ( self .warmup_steps ** - 1.5 )
126
-
127
- return tf . math . rsqrt ( self . d_model ) * tf . math . minimum ( arg1 , arg2 )
111
+ def __init__ ( self , d_model , warmup_steps = 5 ):
112
+ super ( CustomSchedule , self ). __init__ ()
113
+
114
+ self . d_model = d_model
115
+ self .d_model = tf . cast ( self . d_model , tf . float32 )
116
+
117
+ self . warmup_steps = warmup_steps
128
118
119
+ def __call__ (self , step ):
120
+ arg1 = tf .math .rsqrt (step )
121
+ arg2 = step * (self .warmup_steps ** - 1.5 )
122
+
123
+ return tf .math .rsqrt (self .d_model ) * tf .math .minimum (arg1 , arg2 )
129
124
130
125
131
126
def tutorial_transformer ():
@@ -146,23 +141,17 @@ def tutorial_transformer():
146
141
if (batch % 50 == 0 ):
147
142
print ('Batch ID {} at Epoch [{}/{}]: loss {:.4f}' .format (batch , epoch + 1 , num_epochs , loss ))
148
143
149
-
150
-
151
144
model .eval ()
152
145
sentence_en = tokenizer .encode ('TensorLayer is awesome.' )
153
146
[prediction , weights_decoder ], weights_encoder = model (inputs = [sentence_en ])
154
147
155
- predicted_sentence = tokenizer .decode ([i for i in prediction ["outputs" ][0 ]
156
- if i < tokenizer .vocab_size ])
148
+ predicted_sentence = tokenizer .decode ([i for i in prediction ["outputs" ][0 ] if i < tokenizer .vocab_size ])
157
149
print ("Translated: " , predicted_sentence )
158
150
159
-
160
- # visualize the self attention
151
+ # visualize the self attention
161
152
tokenizer_str = [tokenizer .decode ([ts ]) for ts in (sentence_en )]
162
153
attention_visualisation .plot_attention_weights (weights_encoder ["layer_0" ], tokenizer_str , tokenizer_str )
163
154
164
-
165
-
166
155
167
156
if __name__ == "__main__" :
168
157
tutorial_transformer ()
0 commit comments