Skip to content

Commit 80c985c

Browse files
author
Lingjun Liu
committed
doc
1 parent e0e81f0 commit 80c985c

File tree

6 files changed

+50
-176
lines changed

6 files changed

+50
-176
lines changed

tensorlayer/models/transformer/attention_layer.py

+2-39
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def get_config(self):
6060
}
6161

6262
def build(self, inputs_shape):
63+
6364
# Transformation for linearly projecting the queries, keys, and values.
6465
self.q_transformation = self._get_weights(
6566
"q_project", shape=(self.hidden_size, self.hidden_size), init=tf.initializers.get('glorot_uniform')
@@ -75,20 +76,7 @@ def build(self, inputs_shape):
7576
)
7677

7778
def split_heads(self, x):
78-
"""Split x into different heads, and transpose the resulting value.
79-
80-
The tensor is transposed to insure the inner dimensions hold the correct
81-
values during the matrix multiplication.
8279

83-
Parameters
84-
-----------
85-
86-
x: A tensor with shape [batch_size, length, hidden_size]
87-
88-
Returns:
89-
-----------
90-
A tensor with shape [batch_size, num_heads, length, hidden_size/num_heads]
91-
"""
9280
with tf.name_scope("split_heads"):
9381
batch_size = tf.shape(x)[0]
9482
length = tf.shape(x)[1]
@@ -103,40 +91,15 @@ def split_heads(self, x):
10391
return tf.transpose(x, [0, 2, 1, 3])
10492

10593
def combine_heads(self, x):
106-
"""Combine tensor that has been split.
107-
108-
Args:
109-
x: A tensor [batch_size, num_heads, length, hidden_size/num_heads]
11094

111-
Returns:
112-
-----------
113-
A tensor with shape [batch_size, length, hidden_size]
114-
"""
11595
with tf.name_scope("combine_heads"):
11696
batch_size = tf.shape(x)[0]
11797
length = tf.shape(x)[2]
11898
x = tf.transpose(x, [0, 2, 1, 3]) # --> [batch, length, num_heads, depth]
11999
return tf.reshape(x, [batch_size, length, self.hidden_size])
120100

121101
def forward(self, x, y, mask, cache=None):
122-
"""Apply attention mechanism to x and y.
123-
124-
Args:
125-
x: a tensor with shape [batch_size, length_x, hidden_size]
126-
y: a tensor with shape [batch_size, length_y, hidden_size]
127-
mask: attention bias that will be added to the result of the dot product.
128-
training: boolean, whether in training mode or not.
129-
cache: (Used during prediction) dictionary with tensors containing results
130-
of previous attentions. The dictionary must have the items:
131-
{"k": tensor with shape [batch_size, i, key_channels],
132-
"v": tensor with shape [batch_size, i, value_channels]}
133-
where i is the current decoded length.
134-
135-
Returns:
136-
-----------
137-
Attention layer output with shape [batch_size, length_x, hidden_size]
138-
Attention weights with shape [batch_size, number_of_head, length_x, length_y]
139-
"""
102+
"""Apply attention mechanism to x and y."""
140103
# Linearly project the query (q), key (k) and value (v) using different
141104
# learned projections. This is in preparation of splitting them into
142105
# multiple heads. Multi-head attention uses multiple queries, keys, and

tensorlayer/models/transformer/beamsearchHelper/beam_search.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,11 @@ def sequence_beam_search(
7272
eos_id: int
7373
id of eos token, used to determine when a sequence has finished
7474
75-
Returns
75+
Notes
7676
-------
77-
Top decoded sequences [batch_size, beam_size, max_decode_length]
78-
sequence scores [batch_size, beam_size]
77+
The function would return:
78+
Top decoded sequences [batch_size, beam_size, max_decode_length]
79+
sequence scores [batch_size, beam_size]
7980
"""
8081

8182
batch_size = tf.shape(initial_ids)[0]

tensorlayer/models/transformer/beamsearchHelper/beam_search_v1.py

+4-38
Original file line numberDiff line numberDiff line change
@@ -166,15 +166,6 @@ def _continue_search(self, state):
166166
2) when the worst score in the finished sequences is better than the best
167167
score in the alive sequences (i.e. the finished sequences are provably
168168
unchanging)
169-
170-
Parameters
171-
-----------
172-
state: A dictionary with the current loop state.
173-
174-
Returns:
175-
-----------
176-
Bool tensor with value True if loop should continue, False if loop should
177-
terminate.
178169
"""
179170
i = state[_StateKeys.CUR_INDEX]
180171
alive_log_probs = state[_StateKeys.ALIVE_LOG_PROBS]
@@ -216,13 +207,6 @@ def _search_step(self, state):
216207
by the length normalization factor. Without length normalization, the
217208
search is more likely to return shorter sequences.
218209
219-
Parameters
220-
-----------
221-
state: A dictionary with the current loop state.
222-
223-
Returns:
224-
-----------
225-
new state dictionary.
226210
"""
227211
# Grow alive sequences by one token.
228212
new_seq, new_log_probs, new_cache = self._grow_alive_seq(state)
@@ -241,20 +225,9 @@ def _search_step(self, state):
241225

242226
def _grow_alive_seq(self, state):
243227
"""Grow alive sequences by one token, and collect top 2*beam_size sequences.
244-
245228
2*beam_size sequences are collected because some sequences may have reached
246229
the EOS token. 2*beam_size ensures that at least beam_size sequences are
247230
still alive.
248-
249-
Parameters
250-
-----------
251-
state: A dictionary with the current loop state.
252-
Returns:
253-
-----------
254-
Tuple of
255-
(Top 2*beam_size sequences [batch_size, 2 * beam_size, cur_index + 1],
256-
Scores of returned sequences [batch_size, 2 * beam_size],
257-
New alive cache, for each of the 2 * beam_size sequences)
258231
"""
259232
i = state[_StateKeys.CUR_INDEX]
260233
alive_seq = state[_StateKeys.ALIVE_SEQ]
@@ -384,10 +357,11 @@ def sequence_beam_search(
384357
eos_id: int
385358
id of eos token, used to determine when a sequence has finished
386359
387-
Returns
360+
Notes
388361
-------
389-
Top decoded sequences [batch_size, beam_size, max_decode_length]
390-
sequence scores [batch_size, beam_size]
362+
The function would return:
363+
Top decoded sequences [batch_size, beam_size, max_decode_length]
364+
sequence scores [batch_size, beam_size]
391365
"""
392366
batch_size = tf.shape(initial_ids)[0]
393367
sbs = SequenceBeamSearch(symbols_to_logits_fn, vocab_size, batch_size, beam_size, alpha, max_decode_length, eos_id)
@@ -449,14 +423,6 @@ def _get_shape_keep_last_dim(tensor):
449423

450424
def _flatten_beam_dim(tensor):
451425
"""Reshapes first two dimensions in to single dimension.
452-
453-
Parameters
454-
-----------
455-
tensor: Tensor to reshape of shape [A, B, ...]
456-
457-
Returns
458-
-----------
459-
Reshaped tensor of shape [A*B, ...]
460426
"""
461427
shape = _shape_list(tensor)
462428
shape[0] *= shape[1]

tensorlayer/models/transformer/embedding_layer.py

+6-25
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@ def __init__(self, vocab_size, hidden_size):
3030
3131
Parameters
3232
-----------
33-
vocab_size: Number of tokens in the embedding. (Typically ~32,000)
34-
hidden_size: Dimensionality of the embedding. (Typically 512 or 1024)
33+
vocab_size : int
34+
Number of tokens in the embedding. (Typically ~32,000)
35+
hidden_size : int
36+
Dimensionality of the embedding. (Typically 512 or 1024)
3537
"""
3638
super(EmbeddingLayer, self).__init__()
3739
self.vocab_size = vocab_size
@@ -56,20 +58,7 @@ def get_config(self):
5658
}
5759

5860
def forward(self, inputs, mode="embedding"):
59-
"""Get token embeddings of inputs.
60-
61-
Parameters
62-
-----------
63-
inputs: An int64 tensor with shape [batch_size, length]
64-
mode: string, a valid value is one of "embedding" and "linear".
65-
Returns:
66-
-----------
67-
outputs: (1) If mode == "embedding", output embedding tensor, float32 with
68-
shape [batch_size, length, embedding_size]; (2) mode == "linear", output
69-
linear tensor, float32 with shape [batch_size, length, vocab_size].
70-
Raises:
71-
ValueError: if mode is not valid.
72-
"""
61+
"""Get token embeddings of inputs."""
7362
if mode == "embedding":
7463
return self._embedding(inputs)
7564
elif mode == "linear":
@@ -89,15 +78,7 @@ def _embedding(self, inputs):
8978
return embeddings
9079

9180
def _linear(self, inputs):
92-
"""Computes logits by running inputs through a linear layer.
93-
94-
Parameters
95-
-----------
96-
inputs: A float32 tensor with shape [batch_size, length, hidden_size]
97-
Returns:
98-
-----------
99-
float32 tensor with shape [batch_size, length, vocab_size].
100-
"""
81+
"""Computes logits by running inputs through a linear layer."""
10182
with tf.name_scope("presoftmax_linear"):
10283
batch_size = tf.shape(inputs)[0]
10384
length = tf.shape(inputs)[1]

tensorlayer/models/transformer/feedforward_layer.py

+7-15
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,12 @@ def __init__(self, hidden_size, filter_size, keep_prob):
3030
3131
Parameters
3232
-----------
33-
hidden_size: int, output dim of hidden layer.
34-
filter_size: int, filter size for the inner (first) dense layer.
35-
relu_dropout: float, dropout rate for training.
33+
hidden_size: int
34+
output dim of hidden layer.
35+
filter_size: int
36+
filter size for the inner (first) dense layer.
37+
relu_dropout: float
38+
dropout rate for training.
3639
"""
3740
super(TransformerFeedForwardLayer, self).__init__()
3841
self.hidden_size = hidden_size
@@ -60,18 +63,7 @@ def get_config(self):
6063
}
6164

6265
def forward(self, inputs):
63-
"""Return outputs of the feedforward network.
64-
65-
Parameters
66-
-----------
67-
x: tensor with shape [batch_size, length, hidden_size]
68-
training: boolean, whether in training mode or not.
69-
70-
Returns:
71-
-----------
72-
Output of the feedforward network.
73-
tensor with shape [batch_size, length, hidden_size]
74-
"""
66+
"""Return outputs of the feedforward network."""
7567
# Retrieve dynamically known shapes
7668
x = inputs
7769
batch_size = tf.shape(x)[0]

0 commit comments

Comments
 (0)