ayasyrev
diff --git a/‎.flake8
Lines changed: 6 additions & 3 deletions b/‎.flake8
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/model_constructor/__init__.py
Lines changed: 3 additions & 7 deletions b/‎src/model_constructor/__init__.py
Lines changed: 3 additions & 7 deletions
diff --git a/‎src/model_constructor/activations.py
Lines changed: 33 additions & 12 deletions b/‎src/model_constructor/activations.py
Lines changed: 33 additions & 12 deletions
diff --git a/‎src/model_constructor/base_constructor.py
Lines changed: 0 additions & 1 deletion b/‎src/model_constructor/base_constructor.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/model_constructor/blocks.py
Lines changed: 186 additions & 0 deletions b/‎src/model_constructor/blocks.py
Lines changed: 186 additions & 0 deletions
diff --git a/‎src/model_constructor/convmixer.py
Lines changed: 0 additions & 1 deletion b/‎src/model_constructor/convmixer.py
Lines changed: 0 additions & 1 deletion
@@ -2,6 +2,9 @@
 select = C,E,F,W
 max-complexity = 10
 max-line-length = 120
-
-application-import-names = model_constructor
-import-order-style = google
+disable-noqa = True
+application-import-names = model_constructor, tests
+import-order-style = google
+per-file-ignores =
+    # imported but unused
+    __init__.py: F401
@@ -1,7 +1,3 @@
-from model_constructor.convmixer import ConvMixer  # noqa F401
-from model_constructor.model_constructor import (
-    ModelConstructor,
-    ModelCfg,
-)  # noqa F401
-
-from model_constructor.version import __version__  # noqa F401
+from .convmixer import ConvMixer
+from .model_constructor import ModelConstructor, ModelCfg
+from .version import __version__
@@ -5,9 +5,24 @@
 from torch.nn import Mish
 
 
-__all__ = ['mish', 'Mish', 'mish_jit', 'MishJit', 'mish_jit_fwd', 'mish_jit_bwd', 'MishJitAutoFn', 'mish_me', 'MishMe',
-           'hard_mish_jit', 'HardMishJit', 'hard_mish_jit_fwd', 'hard_mish_jit_bwd', 'HardMishJitAutoFn',
-           'hard_mish_me', 'HardMishMe']
+__all__ = [
+    "mish",
+    "Mish",
+    "mish_jit",
+    "MishJit",
+    "mish_jit_fwd",
+    "mish_jit_bwd",
+    "MishJitAutoFn",
+    "mish_me",
+    "MishMe",
+    "hard_mish_jit",
+    "HardMishJit",
+    "hard_mish_jit_fwd",
+    "hard_mish_jit_bwd",
+    "HardMishJitAutoFn",
+    "hard_mish_me",
+    "HardMishMe",
+]
 
 
 def mish(x, inplace: bool = False):
@@ -40,7 +55,8 @@ def mish_jit(x, _inplace: bool = False):
 class MishJit(nn.Module):
     def __init__(self, inplace: bool = False):
         """Jit version of Mish.
-        Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681"""
+        Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
+        """
         super(MishJit, self).__init__()
 
     def forward(self, x):
@@ -61,8 +77,9 @@ def mish_jit_bwd(x, grad_output):
 
 
 class MishJitAutoFn(torch.autograd.Function):
-    """ Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
+    """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
     A memory efficient, jit scripted variant of Mish"""
+
     @staticmethod
     def forward(ctx, x):
         ctx.save_for_backward(x)
@@ -79,8 +96,9 @@ def mish_me(x, inplace=False):
 
 
 class MishMe(nn.Module):
-    """ Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
+    """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
     A memory efficient, jit scripted variant of Mish"""
+
     def __init__(self, inplace: bool = False):
         super(MishMe, self).__init__()
 
@@ -90,18 +108,19 @@ def forward(self, x):
 
 @torch.jit.script
 def hard_mish_jit(x, inplace: bool = False):
-    """ Hard Mish
+    """Hard Mish
     Experimental, based on notes by Mish author Diganta Misra at
       https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
     """
     return 0.5 * x * (x + 2).clamp(min=0, max=2)
 
 
 class HardMishJit(nn.Module):
-    """ Hard Mish
+    """Hard Mish
     Experimental, based on notes by Mish author Diganta Misra at
       https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
     """
+
     def __init__(self, inplace: bool = False):
         super(HardMishJit, self).__init__()
 
@@ -116,16 +135,17 @@ def hard_mish_jit_fwd(x):
 
 @torch.jit.script
 def hard_mish_jit_bwd(x, grad_output):
-    m = torch.ones_like(x) * (x >= -2.)
-    m = torch.where((x >= -2.) & (x <= 0.), x + 1., m)
+    m = torch.ones_like(x) * (x >= -2.0)
+    m = torch.where((x >= -2.0) & (x <= 0.0), x + 1.0, m)
     return grad_output * m
 
 
 class HardMishJitAutoFn(torch.autograd.Function):
-    """ A memory efficient, jit scripted variant of Hard Mish
+    """A memory efficient, jit scripted variant of Hard Mish
     Experimental, based on notes by Mish author Diganta Misra at
       https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
     """
+
     @staticmethod
     def forward(ctx, x):
         ctx.save_for_backward(x)
@@ -142,10 +162,11 @@ def hard_mish_me(x, inplace: bool = False):
 
 
 class HardMishMe(nn.Module):
-    """ A memory efficient, jit scripted variant of Hard Mish
+    """A memory efficient, jit scripted variant of Hard Mish
     Experimental, based on notes by Mish author Diganta Misra at
       https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
     """
+
     def __init__(self, inplace: bool = False):
         super(HardMishMe, self).__init__()
 
 
@@ -3,7 +3,6 @@
 # Used in examples.
 # first implementation of xresnet - inspired by fastai version.
 from collections import OrderedDict
-from functools import partial
 
 import torch.nn as nn
 
 
@@ -0,0 +1,186 @@
+from typing import Callable, Union
+
+import torch
+from torch import nn
+
+from .helpers import ListStrMod, nn_seq
+from .layers import ConvBnAct, get_act
+
+
+class BasicBlock(nn.Module):
+    """Basic Resnet block.
+    Configurable - can use pool to reduce at identity path, change act etc."""
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        stride: int = 1,
+        conv_layer: type[ConvBnAct] = ConvBnAct,
+        act_fn: type[nn.Module] = nn.ReLU,
+        zero_bn: bool = True,
+        bn_1st: bool = True,
+        groups: int = 1,
+        dw: bool = False,
+        div_groups: Union[None, int] = None,
+        pool: Union[Callable[[], nn.Module], None] = None,
+        se: Union[nn.Module, None] = None,
+        sa: Union[nn.Module, None] = None,
+    ):
+        super().__init__()
+        # pool defined at ModelConstructor.
+        if div_groups is not None:  # check if groups != 1 and div_groups
+            groups = int(out_channels / div_groups)
+        layers: ListStrMod = [
+            (
+                "conv_0",
+                conv_layer(
+                    in_channels,
+                    out_channels,
+                    3,
+                    stride=stride,
+                    act_fn=act_fn,
+                    bn_1st=bn_1st,
+                    groups=in_channels if dw else groups,
+                ),
+            ),
+            (
+                "conv_1",
+                conv_layer(
+                    out_channels,
+                    out_channels,
+                    3,
+                    zero_bn=zero_bn,
+                    act_fn=False,
+                    bn_1st=bn_1st,
+                    groups=out_channels if dw else groups,
+                ),
+            ),
+        ]
+        if se:
+            layers.append(("se", se(out_channels)))
+        if sa:
+            layers.append(("sa", sa(out_channels)))
+        self.convs = nn_seq(layers)
+        if stride != 1 or in_channels != out_channels:
+            id_layers: ListStrMod = []
+            if (
+                stride != 1 and pool is not None
+            ):  # if pool - reduce by pool else stride 2 art id_conv
+                id_layers.append(("pool", pool()))
+            if in_channels != out_channels or (stride != 1 and pool is None):
+                id_layers.append(
+                    (
+                        "id_conv",
+                        conv_layer(
+                            in_channels,
+                            out_channels,
+                            1,
+                            stride=1 if pool else stride,
+                            act_fn=False,
+                        ),
+                    )
+                )
+            self.id_conv = nn_seq(id_layers)
+        else:
+            self.id_conv = None
+        self.act_fn = get_act(act_fn)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
+        identity = self.id_conv(x) if self.id_conv is not None else x
+        return self.act_fn(self.convs(x) + identity)
+
+
+class BottleneckBlock(nn.Module):
+    """Bottleneck Resnet block.
+    Configurable - can use pool to reduce at identity path, change act etc."""
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        stride: int = 1,
+        expansion: int = 4,
+        conv_layer: type[ConvBnAct] = ConvBnAct,
+        act_fn: type[nn.Module] = nn.ReLU,
+        zero_bn: bool = True,
+        bn_1st: bool = True,
+        groups: int = 1,
+        dw: bool = False,
+        div_groups: Union[None, int] = None,
+        pool: Union[Callable[[], nn.Module], None] = None,
+        se: Union[nn.Module, None] = None,
+        sa: Union[nn.Module, None] = None,
+    ):
+        super().__init__()
+        # pool defined at ModelConstructor.
+        mid_channels = out_channels // expansion
+        if div_groups is not None:  # check if groups != 1 and div_groups
+            groups = int(mid_channels / div_groups)
+        layers: ListStrMod = [
+            (
+                "conv_0",
+                conv_layer(
+                    in_channels,
+                    mid_channels,
+                    1,
+                    act_fn=act_fn,
+                    bn_1st=bn_1st,
+                ),
+            ),
+            (
+                "conv_1",
+                conv_layer(
+                    mid_channels,
+                    mid_channels,
+                    3,
+                    stride=stride,
+                    act_fn=act_fn,
+                    bn_1st=bn_1st,
+                    groups=mid_channels if dw else groups,
+                ),
+            ),
+            (
+                "conv_2",
+                conv_layer(
+                    mid_channels,
+                    out_channels,
+                    1,
+                    zero_bn=zero_bn,
+                    act_fn=False,
+                    bn_1st=bn_1st,
+                ),
+            ),
+        ]
+        if se:
+            layers.append(("se", se(out_channels)))
+        if sa:
+            layers.append(("sa", sa(out_channels)))
+        self.convs = nn_seq(layers)
+        if stride != 1 or in_channels != out_channels:
+            id_layers: ListStrMod = []
+            if (
+                stride != 1 and pool is not None
+            ):  # if pool - reduce by pool else stride 2 art id_conv
+                id_layers.append(("pool", pool()))
+            if in_channels != out_channels or (stride != 1 and pool is None):
+                id_layers.append(
+                    (
+                        "id_conv",
+                        conv_layer(
+                            in_channels,
+                            out_channels,
+                            1,
+                            stride=1 if pool else stride,
+                            act_fn=False,
+                        ),
+                    )
+                )
+            self.id_conv = nn_seq(id_layers)
+        else:
+            self.id_conv = None
+        self.act_fn = get_act(act_fn)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
+        identity = self.id_conv(x) if self.id_conv is not None else x
+        return self.act_fn(self.convs(x) + identity)
@@ -67,7 +67,6 @@ def __init__(
         bn_1st: bool = False,
         pre_act: bool = False,
     ):
-
         conv_layer: List[tuple[str, nn.Module]] = [
             (
                 "conv",
Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,6 @@ def __init__(`
`67`	`67`	`bn_1st: bool = False,`
`68`	`68`	`pre_act: bool = False,`
`69`	`69`	`):`
`70`		`-`
`71`	`70`	`conv_layer: List[tuple[str, nn.Module]] = [`
`72`	`71`	`(`
`73`	`72`	`"conv",`