Adds low level support for Conv3D.

eladeban · mn-robot · commit b9cc6d871b43 · 2019-05-17T11:58:40.000-07:00
Note that 3D convolutions are not yet supported by network_regularizer.

PiperOrigin-RevId: 248762137
diff --git a/morph_net/network_regularizers/cost_calculator.py b/morph_net/network_regularizers/cost_calculator.py
@@ -8,7 +8,9 @@
 
 
 CONV2D_OPS = ('Conv2D', 'Conv2DBackpropInput', 'DepthwiseConv2dNative')
-FLOP_OPS = CONV2D_OPS + ('MatMul',)
+CONV3D_OPS = ('Conv3D',)
+CONV_OPS = CONV2D_OPS + CONV3D_OPS
+FLOP_OPS = CONV_OPS + ('MatMul',)
 SUPPORTED_OPS = FLOP_OPS + (
     'Add', 'AddN', 'ConcatV2', 'FusedBatchNorm', 'Mul', 'Relu', 'Relu6', 'Sum')
 
@@ -60,7 +62,7 @@ def _get_cost_or_regularization_term(self, is_regularization, ops=None):
         continue
 
       # Get regularization and alive terms for input and output.
-      input_tensor = _get_input(op)
+      input_tensor = get_input_activation(op)
       if op.type == 'ConcatV2':
         # For concat, the input and output regularization are identical but the
         # input is composed of multiple concatenated regularizers.  Thus, just
@@ -110,8 +112,8 @@ def get_regularization_term(self, ops=None):
     return self._get_cost_or_regularization_term(True, ops)
 
 
-def _get_input(op):
-  """Returns the input to that op that represents the activations.
+def get_input_activation(op):
+  """Returns the input to `op` that represents the activations.
 
   (as opposed to e.g. weights.)
 
@@ -122,10 +124,12 @@ def _get_input(op):
     A tf.Tensor representing the input activations.
 
   Raises:
+    ValueError: op type not supported.).
     ValueError: MatMul is used with transposition (unsupported).
   """
-  assert op.type in SUPPORTED_OPS, 'Op type %s is not supported.' % op.type
-  if op.type == 'Conv2D' or op.type == 'DepthwiseConv2dNative':
+  if op.type not in SUPPORTED_OPS:
+    raise ValueError('Op type %s is not supported.' % op.type)
+  if op.type in ('Conv3D', 'Conv2D', 'DepthwiseConv2dNative'):
     return op.inputs[0]
   if op.type == 'Conv2DBackpropInput':
     return op.inputs[2]
diff --git a/morph_net/network_regularizers/cost_calculator_test.py b/morph_net/network_regularizers/cost_calculator_test.py
@@ -5,12 +5,13 @@
 from __future__ import print_function
 
 import collections
+from absl.testing import parameterized
 from morph_net.framework import batch_norm_source_op_handler
 from morph_net.framework import concat_op_handler
 from morph_net.framework import grouping_op_handler
 from morph_net.framework import op_regularizer_manager as orm
 from morph_net.framework import output_non_passthrough_op_handler
-from morph_net.network_regularizers import cost_calculator
+from morph_net.network_regularizers import cost_calculator as cc
 from morph_net.network_regularizers import resource_function
 from morph_net.testing import add_concat_model_stub
 import tensorflow as tf
@@ -19,7 +20,7 @@
 layers = tf.contrib.layers
 
 
-class NetworkRegularizerTest(tf.test.TestCase):
+class CostCalculatorTest(parameterized.TestCase, tf.test.TestCase):
 
   def _batch_norm_scope(self):
     params = {
@@ -70,8 +71,7 @@ def testImageIsNotZerothOutputOfOp(self):
 
     # Create OpRegularizerManager and NetworkRegularizer for test.
     manager = orm.OpRegularizerManager([output_op], op_handler_dict)
-    calculator = cost_calculator.CostCalculator(
-        manager, resource_function.flop_function)
+    calculator = cc.CostCalculator(manager, resource_function.flop_function)
 
     # Calculate expected FLOP cost.
     expected_alive_conv1 = sum(add_concat_model_stub.expected_alive()['conv1'])
@@ -92,6 +92,32 @@ def testImageIsNotZerothOutputOfOp(self):
       queue.enqueue((non_image_tensor, image)).run()
       self.assertEqual(expected_cost,
                        calculator.get_cost([conv1_op]).eval())
+      # for 0/1 assigments cost and reg_term are equal:
+      self.assertEqual(expected_cost,
+                       calculator.get_regularization_term([conv1_op]).eval())
+
+  @parameterized.named_parameters(
+      ('_conv2d', 4, lambda x: layers.conv2d(x, 16, 3), 'Conv2D'),
+      ('_convt', 4, lambda x: layers.conv2d_transpose(x, 16, 3),
+       'conv2d_transpose'),
+      ('_conv2s', 4, lambda x: layers.separable_conv2d(x, None, 3),
+       'depthwise'),
+      ('_conv3d', 5, lambda x: layers.conv3d(x, 16, 3), 'Conv3D'))
+  def test_get_input_activation2(self, rank, fn, op_name):
+    g = tf.get_default_graph()
+    inputs = tf.zeros([6] * rank)
+    with arg_scope([
+        layers.conv2d, layers.conv2d_transpose, layers.separable_conv2d,
+        layers.conv3d
+    ],
+                   scope='test_layer'):
+      _ = fn(inputs)
+    for op in g.get_operations():
+      print(op.name)
+    self.assertEqual(
+        inputs,
+        cc.get_input_activation(
+            g.get_operation_by_name('test_layer/' + op_name)))
 
 
 if __name__ == '__main__':
diff --git a/morph_net/network_regularizers/resource_function.py b/morph_net/network_regularizers/resource_function.py
@@ -5,6 +5,8 @@
 from __future__ import print_function
 from morph_net.framework import op_handler_util
 from morph_net.network_regularizers import cost_calculator
+
+import numpy as np
 import tensorflow as tf
 
 # Data sheet for K80:
@@ -54,15 +56,17 @@ def flop_coeff(op):
   have one multiplication and one addition for each convolution weight and
   pixel. This function returns C.
 
+  Supported operations names are listed in cost_calculator.FLOP_OPS.
+
   Args:
-    op: A tf.Operation of type 'Conv2D' or 'MatMul'.
+    op: A tf.Operation of supported types.
 
   Returns:
     A float, the coefficient that when multiplied by the input depth and by the
     output depth gives the number of flops needed to compute the convolution.
 
   Raises:
-    ValueError: conv_op is not a tf.Operation of type Conv2D.
+    ValueError: conv_op is not a supported tf.Operation.
   """
   if not is_flop_op(op):
     return 0.0
@@ -72,24 +76,35 @@ def flop_coeff(op):
     return 2.0
   # Looking at the output shape makes it easy to automatically take into
   # account strides and the type of padding.
-  if op.type == 'Conv2D' or op.type == 'DepthwiseConv2dNative':
-    shape = op.outputs[0].shape.dims
-    tensor_shape = tf.shape(op.outputs[0])
+  def kernel_num_elements(tensor):
+    """Returns the number of elements of a kernel.
+
+    Args:
+      tensor: The weight tensor.
+
+    Returns:
+      Number of elements of the kernel (either float or tf.float).
+    """
+    num_elements = np.prod(tensor.shape.dims[1:-1]).value
+    if num_elements:
+      return num_elements
+    return tf.to_float(tf.reduce_prod(tf.shape(tensor)[1:-1]))
+
+  if op.type in ('Conv2D', 'DepthwiseConv2dNative', 'Conv3D'):
+    num_elements = kernel_num_elements(op.outputs[0])
   elif op.type == 'Conv2DBackpropInput':
     # For a transposed convolution, the input and the output are swapped (as
     # far as shapes are concerned). In other words, for a given filter shape
     # and stride, if Conv2D maps from shapeX to shapeY, Conv2DBackpropInput
     # maps from shapeY to shapeX. Therefore wherever we use the output shape
     # for Conv2D, we use the input shape for Conv2DBackpropInput.
-    input_tensor = _get_input(op)
-    shape = input_tensor.shape.dims
-    tensor_shape = tf.shape(input_tensor)
-
+    num_elements = kernel_num_elements(cost_calculator.get_input_activation(op))
+  else:
+    # Can only happen if elements are added to FLOP_OPS and not taken care of.
+    assert False, '%s in cost_calculator.FLOP_OPS but not handled' % op.type
   # Handle dynamic shaping while keeping old code path to not break
   # other clients.
-  size = shape[1] * shape[2]
-  size = size.value or tf.to_float(tensor_shape[1] * tensor_shape[2])
-  return 2.0 * size * _get_conv_filter_size(op)
+  return 2.0 * num_elements * _get_conv_filter_size(op)
 
 
 def num_weights_coeff(op):
@@ -107,7 +122,7 @@ def num_weights_coeff(op):
   """
   if not is_flop_op(op):
     return 0.0
-  return (_get_conv_filter_size(op) if op.type in cost_calculator.CONV2D_OPS
+  return (_get_conv_filter_size(op) if op.type in cost_calculator.CONV_OPS
           else 1.0)
 
 
@@ -420,37 +435,12 @@ def is_flop_op(op):
 
 
 def _get_conv_filter_size(conv_op):
-  assert conv_op.type in cost_calculator.CONV2D_OPS
+  # Works for 2D and 3D convs where sizes of weight matrix are:
+  # 4D or 5D tensors: [kernel_size[:], inputs, outputs]
+  assert conv_op.type in cost_calculator.CONV_OPS
   conv_weights = conv_op.inputs[1]
-  filter_shape = conv_weights.shape.as_list()[:2]
-  return filter_shape[0] * filter_shape[1]
-
-
-def _get_input(op):
-  """Returns the input to that op that represents the activations.
-
-  Specifically, return the activation tensor rather than the weight tensor.
-
-  Args:
-    op: A tf.Operation object with type in _SUPPORTED_OPS.
-
-  Returns:
-    A tf.Tensor representing the input activations.
-
-  Raises:
-    ValueError: MatMul is used with transposition (unsupported).
-  """
-  assert op.type in cost_calculator.SUPPORTED_OPS, (
-      'Op type %s is not supported.' % op.type)
-  if op.type == 'Conv2D' or op.type == 'DepthwiseConv2dNative':
-    return op.inputs[0]
-  if op.type == 'Conv2DBackpropInput':
-    return op.inputs[2]
-  if op.type == 'MatMul':
-    if op.get_attr('transpose_a') or op.get_attr('transpose_b'):
-      raise ValueError('MatMul with transposition is not yet supported.')
-    return op.inputs[0]
-  return op.inputs[0]
+  filter_shape = conv_weights.shape.as_list()[:-2]
+  return np.prod(filter_shape)
 
 
 def _calculate_bilinear_regularization(
diff --git a/morph_net/network_regularizers/resource_function_test.py b/morph_net/network_regularizers/resource_function_test.py
@@ -34,13 +34,18 @@ def setUp(self):
     layers.separable_conv2d(
         net, None, [3, 2], depth_multiplier=1, padding='SAME', scope='dw1')
 
-    self.conv_op = tf.get_default_graph().get_operation_by_name('conv1/Conv2D')
-    self.convt_op = tf.get_default_graph().get_operation_by_name(
+    self.video_shape = (1, 11, 9, 13, 17)
+    self.video = tf.placeholder(tf.float32, shape=[1, None, None, None, 17])
+    net = layers.conv3d(
+        self.video, 19, [7, 3, 5], stride=2, padding='SAME', scope='vconv1')
+    g = tf.get_default_graph()
+    self.conv_op = g.get_operation_by_name('conv1/Conv2D')
+    self.convt_op = g.get_operation_by_name(
         'convt2/conv2d_transpose')
-    self.matmul_op = tf.get_default_graph().get_operation_by_name(
-        'FC/MatMul')
-    self.dw_op = tf.get_default_graph().get_operation_by_name(
-        'dw1/depthwise')
+    self.matmul_op = g.get_operation_by_name('FC/MatMul')
+    self.dw_op = g.get_operation_by_name('dw1/depthwise')
+    self.conv3d_op = g.get_operation_by_name(
+        'vconv1/Conv3D')
 
   @parameterized.named_parameters(
       ('_BatchSize1_AliveIn17_AliveOut19', 1, 17, 19),
@@ -1149,7 +1154,7 @@ def testBadHardware(self):
       _ = resource_function.latency_function_factory(None, 11)
 
   def testConvFlopsCoeff(self):
-    tf.reset_default_graph()
+    tf.compat.v1.reset_default_graph()
     image = tf.constant(0.0, shape=[1, 11, 13, 17])
     layers.conv2d(image, 19, [7, 5], stride=2, padding='SAME', scope='conv1')
     conv_op = tf.get_default_graph().get_operation_by_name('conv1/Conv2D')
@@ -1159,7 +1164,7 @@ def testConvFlopsCoeff(self):
     self.assertNearRelatively(expected_coeff, actual_coeff)
 
   def testConvFlopsCoeffUnknownShape(self):
-    tf.reset_default_graph()
+    tf.compat.v1.reset_default_graph()
     image = tf.placeholder(tf.float32, shape=[1, None, None, 17])
     net = layers.conv2d(
         image, 19, [7, 5], stride=2, padding='SAME', scope='conv1')
@@ -1176,7 +1181,7 @@ def testConvFlopsCoeffUnknownShape(self):
     self.assertNearRelatively(expected_coeff, actual_coeff)
 
   def testConvTransposeFlopsCoeff(self):
-    tf.reset_default_graph()
+    tf.compat.v1.reset_default_graph()
     image = tf.constant(0.0, shape=[1, 11, 13, 17])
     layers.conv2d_transpose(
         image, 29, [7, 5], stride=2, padding='SAME', scope='convt2')
@@ -1204,7 +1209,7 @@ def testFcNumWeightsCoeff(self):
     self.assertNearRelatively(1.0, actual_coeff)
 
   def testDepthwiseConvFlopsCoeff(self):
-    tf.reset_default_graph()
+    tf.compat.v1.reset_default_graph()
     image = tf.constant(0.0, shape=[1, 11, 13, 17])
     net = layers.conv2d(
         image, 10, [7, 5], stride=2, padding='SAME', scope='conv2')
@@ -1218,6 +1223,19 @@ def testDepthwiseConvFlopsCoeff(self):
     actual_coeff = resource_function.flop_coeff(dw_op)
     self.assertNearRelatively(expected_coeff, actual_coeff)
 
+  def test_conv3d_flops_coeff(self):
+    tf.compat.v1.reset_default_graph()
+    input_depth = 17
+    output_depth = 10
+    video = tf.zeros([1, 15, 12, 13, input_depth])
+    _ = layers.conv3d(
+        video, output_depth, [7, 5, 3], stride=2, padding='SAME', scope='conv')
+    conv_op = tf.get_default_graph().get_operation_by_name('conv/Conv3D')
+    # Divide by the input depth and the output depth to get the coefficient.
+    expected_coeff = _flops(conv_op) / (input_depth * output_depth)
+    actual_coeff = resource_function.flop_coeff(conv_op)
+    self.assertNearRelatively(expected_coeff, actual_coeff)
+
 
 def _flops(op):
   """Get the number of flops of a convolution, from the ops stats registry.