Skip to content

Commit 83c6c72

Browse files
lingvo-botcopybara-github
authored andcommitted
Create additional functions for causal padding in BaseConv2DLayer.
This change simplifies changes to causal padding after inheritance but does not change the layer itself. PiperOrigin-RevId: 643141393
1 parent 2442e5b commit 83c6c72

File tree

1 file changed

+39
-29
lines changed

1 file changed

+39
-29
lines changed

lingvo/core/layers.py

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,44 @@ def _GetWeights(self,
454454
b = (beta - (gamma * mean * sigma_recip))
455455
return filter_w, b
456456

457+
def _ApplyCausalPadding(self, inputs):
458+
p = self.params
459+
# The effective spatial filter size for dilated convolutions is
460+
# (kernel - 1) * dilation_rate + 1 as according to
461+
# https://www.tensorflow.org/api_docs/python/tf/nn/convolution.
462+
causal_pad_size = (p.filter_shape[0] - 1) * p.dilation_rate[0]
463+
464+
# Apply padding in width dimension to mimic SAME padding.
465+
# Using the similar logic as above to produce the same number of output
466+
# as if SAME padding is used.
467+
width_pad_size = (p.filter_shape[1] - 1) * p.dilation_rate[1]
468+
469+
# The amount of padding on the left is tricky. If stride > 1, total
470+
# padding required for SAME padding would be:
471+
# pad = ceil(input_size / stride - 1) * stride + eff_kernel - input_size
472+
# where eff_kernel = (kernel - 1) * dilation_rate + 1
473+
# TensorFlow also pads more on the right / bottom side if total padding
474+
# required is an odd number, so pad_left = pad // 2
475+
# Therefore pad_left could depend on input size, which might be dynamic.
476+
# Here we only handle two special cases where 1) stride = 1, then
477+
# pad_left = (eff_kernel - 1) // 2
478+
# and 2) kernel = 1, then
479+
# pad_left = 0
480+
if p.filter_stride[1] > 1 and p.filter_shape[1] > 1:
481+
raise ValueError(
482+
'Causal convolution only supports width stride = 1 or filter '
483+
'width = 1.'
484+
)
485+
width_pad_left = max(0, width_pad_size - 1) // 2
486+
width_pad_right = width_pad_size - width_pad_left
487+
pad_size = [
488+
[0, 0],
489+
[causal_pad_size, 0],
490+
[width_pad_left, width_pad_right],
491+
[0, 0],
492+
]
493+
return tf.pad(inputs, pad_size)
494+
457495
def _ApplyConv(self, theta, inputs, folded_bn_padding=None):
458496
p = self.params
459497
strides = [p.filter_stride[0], p.filter_stride[1]]
@@ -462,42 +500,14 @@ def _ApplyConv(self, theta, inputs, folded_bn_padding=None):
462500
if dtype != tf.float32:
463501
cast_dtype = tf.float32
464502
inputs = tf.cast(inputs, cast_dtype)
465-
466503
padding_algorithm = 'SAME'
467504
if p.causal_convolution:
468505
# Causal convolution is only applied in time (height) dimension.
469506
# Use VALID padding and shift the inputs to the right to ensure that the
470507
# first output only depends on the first input and so on. The output is
471508
# the same size as the input, as if the convolution used SAME padding.
472509
padding_algorithm = 'VALID'
473-
# The effective spatial filter size for dilated convolutions is
474-
# (kernel - 1) * dilation_rate + 1 as according to
475-
# https://www.tensorflow.org/api_docs/python/tf/nn/convolution.
476-
causal_pad_size = (p.filter_shape[0] - 1) * p.dilation_rate[0]
477-
478-
# Apply padding in width dimension to mimic SAME padding.
479-
# Using the similar logic as above to produce the same number of output
480-
# as if SAME padding is used.
481-
width_pad_size = (p.filter_shape[1] - 1) * p.dilation_rate[1]
482-
483-
# The amount of padding on the left is tricky. If stride > 1, total
484-
# padding required for SAME padding would be:
485-
# pad = ceil(input_size / stride - 1) * stride + eff_kernel - input_size
486-
# where eff_kernel = (kernel - 1) * dilation_rate + 1
487-
# TensorFlow also pads more on the right / bottom side if total padding
488-
# required is an odd number, so pad_left = pad // 2
489-
# Therefore pad_left could depend on input size, which might be dynamic.
490-
# Here we only handle two special cases where 1) stride = 1, then
491-
# pad_left = (eff_kernel - 1) // 2
492-
# and 2) kernel = 1, then
493-
# pad_left = 0
494-
if p.filter_stride[1] > 1 and p.filter_shape[1] > 1:
495-
raise ValueError('Causal convolution only supports width stride = 1 '
496-
'or filter width = 1.')
497-
width_pad_left = max(0, width_pad_size - 1) // 2
498-
width_pad_right = width_pad_size - width_pad_left
499-
inputs = tf.pad(inputs, [[0, 0], [causal_pad_size, 0],
500-
[width_pad_left, width_pad_right], [0, 0]])
510+
inputs = self._ApplyCausalPadding(inputs)
501511

502512
# Lambda for computing the actual convolution.
503513
def ComputeRawConvolution(filter_w):

0 commit comments

Comments
 (0)