lorenzbaraldi
diff --git a/‎tests/test_models.py
Lines changed: 162 additions & 151 deletions b/‎tests/test_models.py
Lines changed: 162 additions & 151 deletions
diff --git a/‎timm/layers/__init__.py
Lines changed: 1 addition & 0 deletions b/‎timm/layers/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎timm/layers/adaptive_avgmax_pool.py
Lines changed: 85 additions & 25 deletions b/‎timm/layers/adaptive_avgmax_pool.py
Lines changed: 85 additions & 25 deletions
diff --git a/‎timm/layers/classifier.py
Lines changed: 60 additions & 18 deletions b/‎timm/layers/classifier.py
Lines changed: 60 additions & 18 deletions
diff --git a/‎timm/layers/format.py
Lines changed: 58 additions & 0 deletions b/‎timm/layers/format.py
Lines changed: 58 additions & 0 deletions
@@ -20,6 +20,7 @@
     EvoNorm2dS0, EvoNorm2dS0a, EvoNorm2dS1, EvoNorm2dS1a, EvoNorm2dS2, EvoNorm2dS2a
 from .fast_norm import is_fast_norm, set_fast_norm, fast_group_norm, fast_layer_norm
 from .filter_response_norm import FilterResponseNormTlu2d, FilterResponseNormAct2d
+from .format import Format, get_channel_dim, get_spatial_dim, nchw_to, nhwc_to
 from .gather_excite import GatherExcite
 from .global_context import GlobalContext
 from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible, extend_tuple
 
@@ -9,31 +9,37 @@
 
 Hacked together by / Copyright 2020 Ross Wightman
 """
+from typing import Optional, Tuple, Union
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+from .format import get_spatial_dim, get_channel_dim
+
+_int_tuple_2_t = Union[int, Tuple[int, int]]
+
 
 def adaptive_pool_feat_mult(pool_type='avg'):
-    if pool_type == 'catavgmax':
+    if pool_type.endswith('catavgmax'):
         return 2
     else:
         return 1
 
 
-def adaptive_avgmax_pool2d(x, output_size=1):
+def adaptive_avgmax_pool2d(x, output_size: _int_tuple_2_t = 1):
     x_avg = F.adaptive_avg_pool2d(x, output_size)
     x_max = F.adaptive_max_pool2d(x, output_size)
     return 0.5 * (x_avg + x_max)
 
 
-def adaptive_catavgmax_pool2d(x, output_size=1):
+def adaptive_catavgmax_pool2d(x, output_size: _int_tuple_2_t = 1):
     x_avg = F.adaptive_avg_pool2d(x, output_size)
     x_max = F.adaptive_max_pool2d(x, output_size)
     return torch.cat((x_avg, x_max), 1)
 
 
-def select_adaptive_pool2d(x, pool_type='avg', output_size=1):
+def select_adaptive_pool2d(x, pool_type='avg', output_size: _int_tuple_2_t = 1):
     """Selectable global pooling function with dynamic input kernel size
     """
     if pool_type == 'avg':
@@ -49,17 +55,56 @@ def select_adaptive_pool2d(x, pool_type='avg', output_size=1):
     return x
 
 
-class FastAdaptiveAvgPool2d(nn.Module):
-    def __init__(self, flatten=False):
-        super(FastAdaptiveAvgPool2d, self).__init__()
+class FastAdaptiveAvgPool(nn.Module):
+    def __init__(self, flatten: bool = False, input_fmt: F = 'NCHW'):
+        super(FastAdaptiveAvgPool, self).__init__()
+        self.flatten = flatten
+        self.dim = get_spatial_dim(input_fmt)
+
+    def forward(self, x):
+        return x.mean(self.dim, keepdim=not self.flatten)
+
+
+class FastAdaptiveMaxPool(nn.Module):
+    def __init__(self, flatten: bool = False, input_fmt: str = 'NCHW'):
+        super(FastAdaptiveMaxPool, self).__init__()
         self.flatten = flatten
+        self.dim = get_spatial_dim(input_fmt)
+
+    def forward(self, x):
+        return x.amax(self.dim, keepdim=not self.flatten)
+
+
+class FastAdaptiveAvgMaxPool(nn.Module):
+    def __init__(self, flatten: bool = False, input_fmt: str = 'NCHW'):
+        super(FastAdaptiveAvgMaxPool, self).__init__()
+        self.flatten = flatten
+        self.dim = get_spatial_dim(input_fmt)
+
+    def forward(self, x):
+        x_avg = x.mean(self.dim, keepdim=not self.flatten)
+        x_max = x.amax(self.dim, keepdim=not self.flatten)
+        return 0.5 * x_avg + 0.5 * x_max
+
+
+class FastAdaptiveCatAvgMaxPool(nn.Module):
+    def __init__(self, flatten: bool = False, input_fmt: str = 'NCHW'):
+        super(FastAdaptiveCatAvgMaxPool, self).__init__()
+        self.flatten = flatten
+        self.dim_reduce = get_spatial_dim(input_fmt)
+        if flatten:
+            self.dim_cat = 1
+        else:
+            self.dim_cat = get_channel_dim(input_fmt)
 
     def forward(self, x):
-        return x.mean((2, 3), keepdim=not self.flatten)
+        x_avg = x.mean(self.dim_reduce, keepdim=not self.flatten)
+        x_max = x.amax(self.dim_reduce, keepdim=not self.flatten)
+        return torch.cat((x_avg, x_max), self.dim_cat)
 
 
 class AdaptiveAvgMaxPool2d(nn.Module):
-    def __init__(self, output_size=1):
+    def __init__(self, output_size: _int_tuple_2_t = 1):
         super(AdaptiveAvgMaxPool2d, self).__init__()
         self.output_size = output_size
 
@@ -68,7 +113,7 @@ def forward(self, x):
 
 
 class AdaptiveCatAvgMaxPool2d(nn.Module):
-    def __init__(self, output_size=1):
+    def __init__(self, output_size: _int_tuple_2_t = 1):
         super(AdaptiveCatAvgMaxPool2d, self).__init__()
         self.output_size = output_size
 
@@ -79,26 +124,41 @@ def forward(self, x):
 class SelectAdaptivePool2d(nn.Module):
     """Selectable global pooling layer with dynamic input kernel size
     """
-    def __init__(self, output_size=1, pool_type='fast', flatten=False):
+    def __init__(
+            self,
+            output_size: _int_tuple_2_t = 1,
+            pool_type: str = 'fast',
+            flatten: bool = False,
+            input_fmt: str = 'NCHW',
+    ):
         super(SelectAdaptivePool2d, self).__init__()
+        assert input_fmt in ('NCHW', 'NHWC')
         self.pool_type = pool_type or ''  # convert other falsy values to empty string for consistent TS typing
-        self.flatten = nn.Flatten(1) if flatten else nn.Identity()
-        if pool_type == '':
+        if not pool_type:
             self.pool = nn.Identity()  # pass through
-        elif pool_type == 'fast':
-            assert output_size == 1
-            self.pool = FastAdaptiveAvgPool2d(flatten)
+            self.flatten = nn.Flatten(1) if flatten else nn.Identity()
+        elif pool_type.startswith('fast') or input_fmt != 'NCHW':
+            assert output_size == 1, 'Fast pooling and non NCHW input formats require output_size == 1.'
+            if pool_type.endswith('avgmax'):
+                self.pool = FastAdaptiveAvgMaxPool(flatten, input_fmt=input_fmt)
+            elif pool_type.endswith('catavgmax'):
+                self.pool = FastAdaptiveCatAvgMaxPool(flatten, input_fmt=input_fmt)
+            elif pool_type.endswith('max'):
+                self.pool = FastAdaptiveMaxPool(flatten, input_fmt=input_fmt)
+            else:
+                self.pool = FastAdaptiveAvgPool(flatten, input_fmt=input_fmt)
             self.flatten = nn.Identity()
-        elif pool_type == 'avg':
-            self.pool = nn.AdaptiveAvgPool2d(output_size)
-        elif pool_type == 'avgmax':
-            self.pool = AdaptiveAvgMaxPool2d(output_size)
-        elif pool_type == 'catavgmax':
-            self.pool = AdaptiveCatAvgMaxPool2d(output_size)
-        elif pool_type == 'max':
-            self.pool = nn.AdaptiveMaxPool2d(output_size)
         else:
-            assert False, 'Invalid pool type: %s' % pool_type
+            assert input_fmt == 'NCHW'
+            if pool_type == 'avgmax':
+                self.pool = AdaptiveAvgMaxPool2d(output_size)
+            elif pool_type == 'catavgmax':
+                self.pool = AdaptiveCatAvgMaxPool2d(output_size)
+            elif pool_type == 'max':
+                self.pool = nn.AdaptiveMaxPool2d(output_size)
+            else:
+                self.pool = nn.AdaptiveAvgPool2d(output_size)
+            self.flatten = nn.Flatten(1) if flatten else nn.Identity()
 
     def is_identity(self):
         return not self.pool_type
 
@@ -15,13 +15,23 @@
 from .create_norm import get_norm_layer
 
 
-def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False):
+def _create_pool(
+        num_features: int,
+        num_classes: int,
+        pool_type: str = 'avg',
+        use_conv: bool = False,
+        input_fmt: Optional[str] = None,
+):
     flatten_in_pool = not use_conv  # flatten when we use a Linear layer after pooling
     if not pool_type:
         assert num_classes == 0 or use_conv,\
             'Pooling can only be disabled if classifier is also removed or conv classifier is used'
         flatten_in_pool = False  # disable flattening if pooling is pass-through (no pooling)
-    global_pool = SelectAdaptivePool2d(pool_type=pool_type, flatten=flatten_in_pool)
+    global_pool = SelectAdaptivePool2d(
+        pool_type=pool_type,
+        flatten=flatten_in_pool,
+        input_fmt=input_fmt,
+    )
     num_pooled_features = num_features * global_pool.feat_mult()
     return global_pool, num_pooled_features
 
@@ -36,9 +46,25 @@ def _create_fc(num_features, num_classes, use_conv=False):
     return fc
 
 
-def create_classifier(num_features, num_classes, pool_type='avg', use_conv=False):
-    global_pool, num_pooled_features = _create_pool(num_features, num_classes, pool_type, use_conv=use_conv)
-    fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv)
+def create_classifier(
+        num_features: int,
+        num_classes: int,
+        pool_type: str = 'avg',
+        use_conv: bool = False,
+        input_fmt: str = 'NCHW',
+):
+    global_pool, num_pooled_features = _create_pool(
+        num_features,
+        num_classes,
+        pool_type,
+        use_conv=use_conv,
+        input_fmt=input_fmt,
+    )
+    fc = _create_fc(
+        num_pooled_features,
+        num_classes,
+        use_conv=use_conv,
+    )
     return global_pool, fc
 
 
@@ -52,6 +78,7 @@ def __init__(
             pool_type: str = 'avg',
             drop_rate: float = 0.,
             use_conv: bool = False,
+            input_fmt: str = 'NCHW',
     ):
         """
         Args:
@@ -64,28 +91,43 @@ def __init__(
         self.drop_rate = drop_rate
         self.in_features = in_features
         self.use_conv = use_conv
-
-        self.global_pool, num_pooled_features = _create_pool(in_features, num_classes, pool_type, use_conv=use_conv)
-        self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv)
+        self.input_fmt = input_fmt
+
+        self.global_pool, self.fc = create_classifier(
+            in_features,
+            num_classes,
+            pool_type,
+            use_conv=use_conv,
+            input_fmt=input_fmt,
+        )
         self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity()
 
-    def reset(self, num_classes, global_pool=None):
-        if global_pool is not None:
-            if global_pool != self.global_pool.pool_type:
-                self.global_pool, _ = _create_pool(self.in_features, num_classes, global_pool, use_conv=self.use_conv)
-            self.flatten = nn.Flatten(1) if self.use_conv and global_pool else nn.Identity()
-        num_pooled_features = self.in_features * self.global_pool.feat_mult()
-        self.fc = _create_fc(num_pooled_features, num_classes, use_conv=self.use_conv)
+    def reset(self, num_classes, pool_type=None):
+        if pool_type is not None and pool_type != self.global_pool.pool_type:
+            self.global_pool, self.fc = create_classifier(
+                self.in_features,
+                num_classes,
+                pool_type=pool_type,
+                use_conv=self.use_conv,
+                input_fmt=self.input_fmt,
+            )
+            self.flatten = nn.Flatten(1) if self.use_conv and pool_type else nn.Identity()
+        else:
+            num_pooled_features = self.in_features * self.global_pool.feat_mult()
+            self.fc = _create_fc(
+                num_pooled_features,
+                num_classes,
+                use_conv=self.use_conv,
+            )
 
     def forward(self, x, pre_logits: bool = False):
         x = self.global_pool(x)
         if self.drop_rate:
             x = F.dropout(x, p=float(self.drop_rate), training=self.training)
         if pre_logits:
-            return x.flatten(1)
-        else:
-            x = self.fc(x)
             return self.flatten(x)
+        x = self.fc(x)
+        return self.flatten(x)
 
 
 class NormMlpClassifierHead(nn.Module):
 
@@ -0,0 +1,58 @@
+from enum import Enum
+from typing import Union
+
+import torch
+
+
+class Format(str, Enum):
+    NCHW = 'NCHW'
+    NHWC = 'NHWC'
+    NCL = 'NCL'
+    NLC = 'NLC'
+
+
+FormatT = Union[str, Format]
+
+
+def get_spatial_dim(fmt: FormatT):
+    fmt = Format(fmt)
+    if fmt is Format.NLC:
+        dim = (1,)
+    elif fmt is Format.NCL:
+        dim = (2,)
+    elif fmt is Format.NHWC:
+        dim = (1, 2)
+    else:
+        dim = (2, 3)
+    return dim
+
+
+def get_channel_dim(fmt: FormatT):
+    fmt = Format(fmt)
+    if fmt is Format.NHWC:
+        dim = 3
+    elif fmt is Format.NLC:
+        dim = 2
+    else:
+        dim = 1
+    return dim
+
+
+def nchw_to(x: torch.Tensor, fmt: Format):
+    if fmt == Format.NHWC:
+        x = x.permute(0, 2, 3, 1)
+    elif fmt == Format.NLC:
+        x = x.flatten(2).transpose(1, 2)
+    elif fmt == Format.NCL:
+        x = x.flatten(2)
+    return x
+
+
+def nhwc_to(x: torch.Tensor, fmt: Format):
+    if fmt == Format.NCHW:
+        x = x.permute(0, 3, 1, 2)
+    elif fmt == Format.NLC:
+        x = x.flatten(1, 2)
+    elif fmt == Format.NCL:
+        x = x.flatten(1, 2).transpose(1, 2)
+    return x