From a9a4e5e7956644b8a925e190e2a9f6ddec930c62 Mon Sep 17 00:00:00 2001 From: Andrew Hundt Date: Mon, 12 Aug 2019 19:13:16 -0400 Subject: [PATCH 1/2] Fully Convolutional Network (FCN) dilation support --- efficientnet_pytorch/model.py | 19 +++++++++++++------ efficientnet_pytorch/utils.py | 13 ++++++++----- .../original_tf/efficientnet_builder.py | 9 ++++++--- .../original_tf/efficientnet_model.py | 2 +- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/efficientnet_pytorch/model.py b/efficientnet_pytorch/model.py index b1145d9..af4ab30 100644 --- a/efficientnet_pytorch/model.py +++ b/efficientnet_pytorch/model.py @@ -46,9 +46,12 @@ def __init__(self, block_args, global_params): # Depthwise convolution phase k = self._block_args.kernel_size s = self._block_args.stride + d = self._block_args.dilation + if d is None: + d = 1 self._depthwise_conv = Conv2d( in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise - kernel_size=k, stride=s, bias=False) + kernel_size=k, stride=s, bias=False, dilation=d) self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) # Squeeze and Excitation layer, if desired @@ -127,19 +130,23 @@ def __init__(self, blocks_args=None, global_params=None): # Build blocks self._blocks = nn.ModuleList([]) - for block_args in self._blocks_args: + dilate_idx = len(self._blocks_args) - self._global_params.num_dilation + for i, block_args in enumerate(self._blocks_args): + dilate = i >= dilate_idx # Update block input and output filters based on depth multiplier. block_args = block_args._replace( input_filters=round_filters(block_args.input_filters, self._global_params), output_filters=round_filters(block_args.output_filters, self._global_params), - num_repeat=round_repeats(block_args.num_repeat, self._global_params) + num_repeat=round_repeats(block_args.num_repeat, self._global_params), + stride=[1, 1] if dilate else block_args.stride, + dilation=[2, 2] if dilate else block_args.dilation ) # The first block needs to take care of stride and filter size increase. self._blocks.append(MBConvBlock(block_args, self._global_params)) if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1, dilation=1) for _ in range(block_args.num_repeat - 1): self._blocks.append(MBConvBlock(block_args, self._global_params)) @@ -191,8 +198,8 @@ def from_name(cls, model_name, override_params=None): return EfficientNet(blocks_args, global_params) @classmethod - def from_pretrained(cls, model_name, num_classes=1000): - model = EfficientNet.from_name(model_name, override_params={'num_classes': num_classes}) + def from_pretrained(cls, model_name, num_classes=1000, num_dilation=0): + model = EfficientNet.from_name(model_name, override_params={'num_classes': num_classes, 'num_dilation': num_dilation}) load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000)) return model diff --git a/efficientnet_pytorch/utils.py b/efficientnet_pytorch/utils.py index acdfb77..228cc5e 100644 --- a/efficientnet_pytorch/utils.py +++ b/efficientnet_pytorch/utils.py @@ -22,13 +22,13 @@ GlobalParams = collections.namedtuple('GlobalParams', [ 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 'num_classes', 'width_coefficient', 'depth_coefficient', - 'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size']) + 'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size', 'num_dilation']) # Parameters for an individual model block BlockArgs = collections.namedtuple('BlockArgs', [ 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', - 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + 'expand_ratio', 'id_skip', 'stride', 'se_ratio', 'dilation']) # Change namedtuple defaults @@ -184,7 +184,8 @@ def _decode_block_string(block_string): expand_ratio=int(options['e']), id_skip=('noskip' not in block_string), se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + stride=[int(options['s'][0])], + dilation=[int(options['d'][0]), int(options['d'][1])] if 'd' in options else [1, 1]) @staticmethod def _encode_block_string(block): @@ -195,7 +196,8 @@ def _encode_block_string(block): 's%d%d' % (block.strides[0], block.strides[1]), 'e%s' % block.expand_ratio, 'i%d' % block.input_filters, - 'o%d' % block.output_filters + 'o%d' % block.output_filters, + 'd%d%d' % (block.dilation[0], block.dilation[1]), ] if 0 < block.se_ratio <= 1: args.append('se%s' % block.se_ratio) @@ -232,7 +234,7 @@ def encode(blocks_args): def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2, - drop_connect_rate=0.2, image_size=None, num_classes=1000): + drop_connect_rate=0.2, image_size=None, num_classes=1000, num_dilation=0): """ Creates a efficientnet model. """ blocks_args = [ @@ -255,6 +257,7 @@ def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0. depth_divisor=8, min_depth=None, image_size=image_size, + num_dilation=num_dilation ) return blocks_args, global_params diff --git a/tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_builder.py b/tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_builder.py index 1b80bbe..ad9a863 100644 --- a/tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_builder.py +++ b/tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_builder.py @@ -66,7 +66,8 @@ def _decode_block_string(self, block_string): expand_ratio=int(options['e']), id_skip=('noskip' not in block_string), se_ratio=float(options['se']) if 'se' in options else None, - strides=[int(options['s'][0]), int(options['s'][1])]) + strides=[int(options['s'][0]), int(options['s'][1])], + dilation=[int(options['d'][0]), int(options['d'][1])] if 'd' in options else [1, 1]) def _encode_block_string(self, block): """Encodes a block to a string.""" @@ -76,7 +77,8 @@ def _encode_block_string(self, block): 's%d%d' % (block.strides[0], block.strides[1]), 'e%s' % block.expand_ratio, 'i%d' % block.input_filters, - 'o%d' % block.output_filters + 'o%d' % block.output_filters, + 'd%d%d' % (block.dilation[0], block.dilation[1]), ] if block.se_ratio > 0 and block.se_ratio <= 1: args.append('se%s' % block.se_ratio) @@ -134,7 +136,8 @@ def efficientnet(width_coefficient=None, width_coefficient=width_coefficient, depth_coefficient=depth_coefficient, depth_divisor=8, - min_depth=None) + min_depth=None, + num_dilation=0) decoder = BlockDecoder() return decoder.decode(blocks_args), global_params diff --git a/tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_model.py b/tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_model.py index 2b312d3..bdcdf00 100644 --- a/tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_model.py +++ b/tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_model.py @@ -36,7 +36,7 @@ GlobalParams = collections.namedtuple('GlobalParams', [ 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 'data_format', 'num_classes', 'width_coefficient', 'depth_coefficient', - 'depth_divisor', 'min_depth', 'drop_connect_rate', + 'depth_divisor', 'min_depth', 'drop_connect_rate', 'num_dilation' ]) GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) From d5d050d6fdbd8e46a6a1de25572d24645938bcf4 Mon Sep 17 00:00:00 2001 From: Andrew Hundt Date: Mon, 12 Aug 2019 20:02:39 -0400 Subject: [PATCH 2/2] efficientnet_pytorch/model.py dilate stride 2 layers only --- efficientnet_pytorch/model.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/efficientnet_pytorch/model.py b/efficientnet_pytorch/model.py index af4ab30..18ed424 100644 --- a/efficientnet_pytorch/model.py +++ b/efficientnet_pytorch/model.py @@ -128,12 +128,22 @@ def __init__(self, blocks_args=None, global_params=None): self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) + dilate_count = 0 + dilations = [] + # determine blocks to dilate from last to first + for block_args in reversed(self._blocks_args): + if (block_args.stride == [2] or block_args.stride == [2, 2]) and dilate_count < self._global_params.num_dilation: + dilations += [True] + dilate_count += 1 + else: + dilations += [False] + # Organize from first to last + dilations.reverse() + # Build blocks self._blocks = nn.ModuleList([]) - dilate_idx = len(self._blocks_args) - self._global_params.num_dilation - for i, block_args in enumerate(self._blocks_args): - - dilate = i >= dilate_idx + for block_args, dilate in zip(self._blocks_args, dilations): + # Update block input and output filters based on depth multiplier. block_args = block_args._replace( input_filters=round_filters(block_args.input_filters, self._global_params),