Merge pull request #99 from ayasyrev/refactor_modelcfg

ayasyrev · web-flow · commit 8bfcd0831999 · 2023-07-05T11:08:58.000+03:00
Refactor modelcfg
diff --git a/noxfile_conda.py b/noxfile_conda.py
@@ -4,7 +4,6 @@
 @nox.session(python=["3.9", "3.10", "3.11"], venv_backend="mamba")
 def conda_tests(session: nox.Session) -> None:
     args = session.posargs or ["--cov"]
-    # session.install("pytest", "pytest-cov")
     session.conda_install("pytest", "pytest-cov")
     session.conda_install("pytorch")
     session.conda_install("pydantic")
diff --git a/src/model_constructor/blocks.py b/src/model_constructor/blocks.py
@@ -1,4 +1,4 @@
-from typing import Callable, Union
+from typing import Callable, Optional
 
 import torch
 from torch import nn
@@ -22,10 +22,10 @@ def __init__(
         bn_1st: bool = True,
         groups: int = 1,
         dw: bool = False,
-        div_groups: Union[None, int] = None,
-        pool: Union[Callable[[], nn.Module], None] = None,
-        se: Union[nn.Module, None] = None,
-        sa: Union[nn.Module, None] = None,
+        div_groups: Optional[int] = None,
+        pool: Optional[Callable[[], nn.Module]] = None,
+        se: Optional[nn.Module] = None,
+        sa: Optional[nn.Module] = None,
     ):
         super().__init__()
         # pool defined at ModelConstructor.
@@ -107,10 +107,10 @@ def __init__(
         bn_1st: bool = True,
         groups: int = 1,
         dw: bool = False,
-        div_groups: Union[None, int] = None,
-        pool: Union[Callable[[], nn.Module], None] = None,
-        se: Union[nn.Module, None] = None,
-        sa: Union[nn.Module, None] = None,
+        div_groups: Optional[int] = None,
+        pool: Optional[Callable[[], nn.Module]] = None,
+        se: Optional[nn.Module] = None,
+        sa: Optional[nn.Module] = None,
     ):
         super().__init__()
         # pool defined at ModelConstructor.
diff --git a/src/model_constructor/convmixer.py b/src/model_constructor/convmixer.py
@@ -5,16 +5,16 @@
 from collections import OrderedDict
 from typing import Callable, List, Optional, Union
 
+import torch
 import torch.nn as nn
-from torch import TensorType
 
 
 class Residual(nn.Module):
-    def __init__(self, fn: Callable[[TensorType], TensorType]):
+    def __init__(self, fn: Callable[[torch.Tensor], torch.Tensor]):
         super().__init__()
         self.fn = fn
 
-    def forward(self, x: TensorType) -> TensorType:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         return self.fn(x) + x
 
 
diff --git a/src/model_constructor/helpers.py b/src/model_constructor/helpers.py
@@ -1,12 +1,13 @@
 from collections import OrderedDict
 from functools import partial
-from typing import Iterable, Optional
+from typing import Iterable, Optional, Union
 from pydantic import BaseModel
 
 from torch import nn
 
 
 ListStrMod = list[tuple[str, nn.Module]]
+ModSeq = Union[nn.Module, nn.Sequential]
 
 
 def nn_seq(list_of_tuples: Iterable[tuple[str, nn.Module]]) -> nn.Sequential:
diff --git a/src/model_constructor/layers.py b/src/model_constructor/layers.py
@@ -258,14 +258,14 @@ class SEModule(nn.Module):
 
     def __init__(
         self,
-        channels,
-        reduction=16,
-        rd_channels=None,
-        rd_max=False,
-        se_layer=nn.Linear,
-        act_fn=nn.ReLU(inplace=True),
-        use_bias=True,
-        gate=nn.Sigmoid,
+        channels: int,
+        reduction: int = 16,
+        rd_channels: Optional[int] = None,
+        rd_max: bool = False,
+        se_layer: type[nn.Module] = nn.Linear,
+        act_fn: nn.Module = nn.ReLU(inplace=True),
+        use_bias: bool = True,
+        gate: type[nn.Module] = nn.Sigmoid,
     ):
         super().__init__()
         reducted = max(channels // reduction, 1)  # preserve zero-element tensors
@@ -286,7 +286,7 @@ def __init__(
             )
         )
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         bs, c, _, _ = x.shape
         y = self.squeeze(x).view(bs, c)
         y = self.excitation(y).view(bs, c, 1, 1)
@@ -298,14 +298,14 @@ class SEModuleConv(nn.Module):
 
     def __init__(
         self,
-        channels,
-        reduction=16,
-        rd_channels=None,
-        rd_max=False,
-        se_layer=nn.Conv2d,
-        act_fn=nn.ReLU(inplace=True),
-        use_bias=True,
-        gate=nn.Sigmoid,
+        channels: int,
+        reduction: int = 16,
+        rd_channels: Optional[int] = None,
+        rd_max: bool = False,
+        se_layer: type[nn.Module] = nn.Conv2d,
+        act_fn: nn.Module = nn.ReLU(inplace=True),
+        use_bias: bool = True,
+        gate: type[nn.Module] = nn.Sigmoid,
     ):
         super().__init__()
         #       rd_channels = math.ceil(channels//reduction/8)*8
@@ -327,7 +327,7 @@ def __init__(
             )
         )
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         y = self.squeeze(x)
         y = self.excitation(y)
         return x * y.expand_as(x)
diff --git a/src/model_constructor/model_constructor.py b/src/model_constructor/model_constructor.py
@@ -6,7 +6,7 @@
 from torch import nn
 
 from .blocks import BasicBlock, BottleneckBlock
-from .helpers import Cfg, ListStrMod, init_cnn, nn_seq
+from .helpers import Cfg, ListStrMod, ModSeq, init_cnn, nn_seq
 from .layers import ConvBnAct, SEModule, SimpleSelfAttention
 
 __all__ = [
@@ -33,20 +33,45 @@ class ModelCfg(Cfg, arbitrary_types_allowed=True, extra="forbid"):
     expansion: int = 1
     groups: int = 1
     dw: bool = False
-    div_groups: Union[int, None] = None
+    div_groups: Optional[int] = None
     sa: Union[bool, type[nn.Module]] = False
     se: Union[bool, type[nn.Module]] = False
-    se_module: Union[bool, None] = None
-    se_reduction: Union[int, None] = None
+    se_module: Optional[bool] = None
+    se_reduction: Optional[int] = None
     bn_1st: bool = True
     zero_bn: bool = True
     stem_stride_on: int = 0
     stem_sizes: list[int] = [64]
-    stem_pool: Union[Callable[[], nn.Module], None] = partial(
+    stem_pool: Optional[Callable[[], nn.Module]] = partial(
         nn.MaxPool2d, kernel_size=3, stride=2, padding=1
     )
     stem_bn_end: bool = False
 
+    @field_validator("se")
+    def set_se(  # pylint: disable=no-self-argument
+        cls, value: Union[bool, type[nn.Module]]
+    ) -> Union[bool, type[nn.Module]]:
+        if value:
+            if isinstance(value, (int, bool)):
+                return SEModule
+        return value
+
+    @field_validator("sa")
+    def set_sa(  # pylint: disable=no-self-argument
+        cls, value: Union[bool, type[nn.Module]]
+    ) -> Union[bool, type[nn.Module]]:
+        if value:
+            if isinstance(value, (int, bool)):
+                return SimpleSelfAttention  # default: ks=1, sym=sym
+        return value
+
+    @field_validator("se_module", "se_reduction")  # pragma: no cover
+    def deprecation_warning(  # pylint: disable=no-self-argument
+        cls, value: Union[bool, int, None]
+    ) -> Union[bool, int, None]:
+        print("Deprecated. Pass se_module as se argument, se_reduction as arg to se.")
+        return value
+
     def __repr__(self) -> str:
         se_repr = self.se.__name__ if self.se else "False"  # type: ignore
         model_name = self.name or self.__class__.__name__
@@ -61,7 +86,7 @@ def __repr__(self) -> str:
         )
 
 
-def make_stem(cfg: ModelCfg) -> nn.Sequential:  # type: ignore
+def make_stem(cfg: ModelCfg) -> nn.Sequential:
     """Create Resnet stem."""
     stem: ListStrMod = [
         (
@@ -116,15 +141,15 @@ def make_layer(cfg: ModelCfg, layer_num: int) -> nn.Sequential:  # type: ignore
     )
 
 
-def make_body(cfg: ModelCfg) -> nn.Sequential:  # type: ignore
+def make_body(cfg: ModelCfg) -> nn.Sequential:
     """Create model body."""
     return nn_seq(
         (f"l_{layer_num}", cfg.make_layer(cfg, layer_num))  # type: ignore
         for layer_num in range(len(cfg.layers))
     )
 
 
-def make_head(cfg: ModelCfg) -> nn.Sequential:  # type: ignore
+def make_head(cfg: ModelCfg) -> nn.Sequential:
     """Create head."""
     head = [
         ("pool", nn.AdaptiveAvgPool2d(1)),
@@ -138,35 +163,10 @@ class ModelConstructor(ModelCfg):
     """Model constructor. As default - resnet18"""
 
     init_cnn: Callable[[nn.Module], None] = init_cnn
-    make_stem: Callable[[ModelCfg], Union[nn.Module, nn.Sequential]] = make_stem  # type: ignore
-    make_layer: Callable[[ModelCfg, int], Union[nn.Module, nn.Sequential]] = make_layer  # type: ignore
-    make_body: Callable[[ModelCfg], Union[nn.Module, nn.Sequential]] = make_body  # type: ignore
-    make_head: Callable[[ModelCfg], Union[nn.Module, nn.Sequential]] = make_head  # type: ignore
-
-    @field_validator("se")
-    def set_se(  # pylint: disable=no-self-argument
-        cls, value: Union[bool, type[nn.Module]]
-    ) -> Union[bool, type[nn.Module]]:
-        if value:
-            if isinstance(value, (int, bool)):
-                return SEModule
-        return value
-
-    @field_validator("sa")
-    def set_sa(  # pylint: disable=no-self-argument
-        cls, value: Union[bool, type[nn.Module]]
-    ) -> Union[bool, type[nn.Module]]:
-        if value:
-            if isinstance(value, (int, bool)):
-                return SimpleSelfAttention  # default: ks=1, sym=sym
-        return value
-
-    @field_validator("se_module", "se_reduction")  # pragma: no cover
-    def deprecation_warning(  # pylint: disable=no-self-argument
-        cls, value: Union[bool, int, None]
-    ) -> Union[bool, int, None]:
-        print("Deprecated. Pass se_module as se argument, se_reduction as arg to se.")
-        return value
+    make_stem: Callable[[ModelCfg], ModSeq] = make_stem
+    make_layer: Callable[[ModelCfg, int], ModSeq] = make_layer
+    make_body: Callable[[ModelCfg], ModSeq] = make_body
+    make_head: Callable[[ModelCfg], ModSeq] = make_head
 
     @property
     def stem(self):
@@ -186,7 +186,7 @@ def from_cfg(cls, cfg: ModelCfg):
 
     @classmethod
     def create_model(
-        cls, cfg: Union[ModelCfg, None] = None, **kwargs: dict[str, Any]
+        cls, cfg: Optional[ModelCfg] = None, **kwargs: dict[str, Any]
     ) -> nn.Sequential:
         if cfg:
             return cls(**cfg.model_dump())()
diff --git a/src/model_constructor/universal_blocks.py b/src/model_constructor/universal_blocks.py
@@ -1,9 +1,9 @@
-from typing import Callable, Union
+from typing import Callable, Optional
 
 import torch
 from torch import nn
 
-from .helpers import nn_seq
+from .helpers import ModSeq, nn_seq
 from .layers import ConvBnAct, get_act
 from .model_constructor import ListStrMod, ModelCfg, ModelConstructor
 
@@ -32,10 +32,10 @@ def __init__(
         bn_1st: bool = True,
         groups: int = 1,
         dw: bool = False,
-        div_groups: Union[None, int] = None,
-        pool: Union[Callable[[], nn.Module], None] = None,
-        se: Union[nn.Module, None] = None,
-        sa: Union[nn.Module, None] = None,
+        div_groups: Optional[int] = None,
+        pool: Optional[Callable[[], nn.Module]] = None,
+        se: Optional[nn.Module] = None,
+        sa: Optional[nn.Module] = None,
     ):
         super().__init__()
         # pool defined at ModelConstructor.
@@ -134,7 +134,7 @@ def __init__(
             self.id_conv = None
         self.act_fn = get_act(act_fn)
 
-    def forward(self, x: torch.Tensor):  # type: ignore
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         identity = self.id_conv(x) if self.id_conv is not None else x
         return self.act_fn(self.convs(x) + identity)
 
@@ -156,10 +156,10 @@ def __init__(
         bn_1st: bool = True,
         groups: int = 1,
         dw: bool = False,
-        div_groups: Union[None, int] = None,
-        pool: Union[Callable[[], nn.Module], None] = None,
-        se: Union[type[nn.Module], None] = None,
-        sa: Union[type[nn.Module], None] = None,
+        div_groups: Optional[int] = None,
+        pool: Optional[Callable[[], nn.Module]] = None,
+        se: Optional[type[nn.Module]] = None,
+        sa: Optional[type[nn.Module]] = None,
     ):
         super().__init__()
         # pool defined at ModelConstructor.
@@ -255,14 +255,14 @@ def __init__(
             self.id_conv = None
         self.merge = get_act(act_fn)
 
-    def forward(self, x: torch.Tensor):  # type: ignore
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         if self.reduce:
             x = self.reduce(x)
         identity = self.id_conv(x) if self.id_conv is not None else x
         return self.merge(self.convs(x) + identity)
 
 
-def make_stem(cfg: ModelCfg) -> nn.Sequential:  # type: ignore
+def make_stem(cfg: ModelCfg) -> nn.Sequential:
     """Create xResnet stem -> 3 conv 3*3 instead of 1 conv 7*7"""
     len_stem = len(cfg.stem_sizes)
     stem: list[tuple[str, nn.Module]] = [
@@ -286,7 +286,7 @@ def make_stem(cfg: ModelCfg) -> nn.Sequential:  # type: ignore
     return nn_seq(stem)
 
 
-def make_layer(cfg: ModelCfg, layer_num: int) -> nn.Sequential:  # type: ignore
+def make_layer(cfg: ModelCfg, layer_num: int) -> nn.Sequential:
     """Create layer (stage)"""
     # if no pool on stem - stride = 2 for first layer block in body
     stride = 1 if cfg.stem_pool and layer_num == 0 else 2
@@ -316,15 +316,15 @@ def make_layer(cfg: ModelCfg, layer_num: int) -> nn.Sequential:  # type: ignore
     )
 
 
-def make_body(cfg: ModelCfg) -> nn.Sequential:  # type: ignore
+def make_body(cfg: ModelCfg) -> nn.Sequential:
     """Create model body."""
     return nn_seq(
         (f"l_{layer_num}", cfg.make_layer(cfg, layer_num))  # type: ignore
         for layer_num in range(len(cfg.layers))
     )
 
 
-def make_head(cfg: ModelCfg) -> nn.Sequential:  # type: ignore
+def make_head(cfg: ModelCfg) -> nn.Sequential:
     """Create head."""
     head = [
         ("pool", nn.AdaptiveAvgPool2d(1)),
@@ -337,10 +337,10 @@ def make_head(cfg: ModelCfg) -> nn.Sequential:  # type: ignore
 class XResNet(ModelConstructor):
     """Base Xresnet constructor."""
 
-    make_stem: Callable[[ModelCfg], Union[nn.Module, nn.Sequential]] = make_stem
-    make_layer: Callable[[ModelCfg, int], Union[nn.Module, nn.Sequential]] = make_layer
-    make_body: Callable[[ModelCfg], Union[nn.Module, nn.Sequential]] = make_body
-    make_head: Callable[[ModelCfg], Union[nn.Module, nn.Sequential]] = make_head
+    make_stem: Callable[[ModelCfg], ModSeq] = make_stem
+    make_layer: Callable[[ModelCfg, int], ModSeq] = make_layer
+    make_body: Callable[[ModelCfg], ModSeq] = make_body
+    make_head: Callable[[ModelCfg], ModSeq] = make_head
     block: type[nn.Module] = XResBlock
 
 
diff --git a/src/model_constructor/xresnet.py b/src/model_constructor/xresnet.py
diff --git a/src/model_constructor/yaresnet.py b/src/model_constructor/yaresnet.py