lorenzbaraldi
diff --git a/‎timm/models/__init__.py
Lines changed: 5 additions & 5 deletions b/‎timm/models/__init__.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎timm/models/_factory.py
Lines changed: 2 additions & 2 deletions b/‎timm/models/_factory.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎timm/models/_helpers.py
Lines changed: 38 additions & 14 deletions b/‎timm/models/_helpers.py
Lines changed: 38 additions & 14 deletions
diff --git a/‎timm/models/_manipulate.py
Lines changed: 36 additions & 16 deletions b/‎timm/models/_manipulate.py
Lines changed: 36 additions & 16 deletions
diff --git a/‎timm/models/_pretrained.py
Lines changed: 1 addition & 39 deletions b/‎timm/models/_pretrained.py
Lines changed: 1 addition & 39 deletions
@@ -74,12 +74,12 @@
 from ._features_fx import FeatureGraphNet, GraphExtractNet, create_feature_extractor, \
     register_notrace_module, is_notrace_module, get_notrace_modules, \
     register_notrace_function, is_notrace_function, get_notrace_functions
-from ._helpers import clean_state_dict, load_state_dict, load_checkpoint, remap_checkpoint, resume_checkpoint
+from ._helpers import clean_state_dict, load_state_dict, load_checkpoint, remap_state_dict, resume_checkpoint
 from ._hub import load_model_config_from_hf, load_state_dict_from_hf, push_to_hf_hub
 from ._manipulate import model_parameters, named_apply, named_modules, named_modules_with_params, \
     group_modules, group_parameters, checkpoint_seq, adapt_input_conv
-from ._pretrained import PretrainedCfg, DefaultCfg, \
-    filter_pretrained_cfg, generate_default_cfgs, split_model_name_tag
+from ._pretrained import PretrainedCfg, DefaultCfg, filter_pretrained_cfg
 from ._prune import adapt_model_from_string
-from ._registry import register_model, model_entrypoint, list_models, list_pretrained, is_model, list_modules, \
-    is_model_in_modules, is_model_pretrained, get_pretrained_cfg, get_pretrained_cfg_value
+from ._registry import split_model_name_tag, get_arch_name, generate_default_cfgs, register_model, \
+    register_model_deprecations, model_entrypoint, list_models, list_pretrained, get_deprecated_models, \
+    is_model, list_modules, is_model_in_modules, is_model_pretrained, get_pretrained_cfg, get_pretrained_cfg_value
@@ -3,10 +3,10 @@
 from urllib.parse import urlsplit
 
 from timm.layers import set_layer_config
-from ._pretrained import PretrainedCfg, split_model_name_tag
 from ._helpers import load_checkpoint
 from ._hub import load_model_config_from_hf
-from ._registry import is_model, model_entrypoint
+from ._pretrained import PretrainedCfg
+from ._registry import is_model, model_entrypoint, split_model_name_tag
 
 
 __all__ = ['parse_model_name', 'safe_model_name', 'create_model']
 
@@ -5,6 +5,7 @@
 import logging
 import os
 from collections import OrderedDict
+from typing import Any, Callable, Dict, Optional, Union
 
 import torch
 try:
@@ -13,30 +14,32 @@
 except ImportError:
     _has_safetensors = False
 
-import timm.models._builder
-
 _logger = logging.getLogger(__name__)
 
-__all__ = ['clean_state_dict', 'load_state_dict', 'load_checkpoint', 'remap_checkpoint', 'resume_checkpoint']
+__all__ = ['clean_state_dict', 'load_state_dict', 'load_checkpoint', 'remap_state_dict', 'resume_checkpoint']
 
 
-def clean_state_dict(state_dict):
+def clean_state_dict(state_dict: Dict[str, Any]) -> Dict[str, Any]:
     # 'clean' checkpoint by removing .module prefix from state dict if it exists from parallel training
-    cleaned_state_dict = OrderedDict()
+    cleaned_state_dict = {}
     for k, v in state_dict.items():
         name = k[7:] if k.startswith('module.') else k
         cleaned_state_dict[name] = v
     return cleaned_state_dict
 
 
-def load_state_dict(checkpoint_path, use_ema=True):
+def load_state_dict(
+        checkpoint_path: str,
+        use_ema: bool = True,
+        device: Union[str, torch.device] = 'cpu',
+) -> Dict[str, Any]:
     if checkpoint_path and os.path.isfile(checkpoint_path):
         # Check if safetensors or not and load weights accordingly
         if str(checkpoint_path).endswith(".safetensors"):
             assert _has_safetensors, "`pip install safetensors` to use .safetensors"
-            checkpoint = safetensors.torch.load_file(checkpoint_path, device='cpu')
+            checkpoint = safetensors.torch.load_file(checkpoint_path, device=device)
         else:
-            checkpoint = torch.load(checkpoint_path, map_location='cpu')
+            checkpoint = torch.load(checkpoint_path, map_location=device)
 
         state_dict_key = ''
         if isinstance(checkpoint, dict):
@@ -56,22 +59,37 @@ def load_state_dict(checkpoint_path, use_ema=True):
         raise FileNotFoundError()
 
 
-def load_checkpoint(model, checkpoint_path, use_ema=True, strict=True, remap=False):
+def load_checkpoint(
+        model: torch.nn.Module,
+        checkpoint_path: str,
+        use_ema: bool = True,
+        device: Union[str, torch.device] = 'cpu',
+        strict: bool = True,
+        remap: bool = False,
+        filter_fn: Optional[Callable] = None,
+):
     if os.path.splitext(checkpoint_path)[-1].lower() in ('.npz', '.npy'):
         # numpy checkpoint, try to load via model specific load_pretrained fn
         if hasattr(model, 'load_pretrained'):
-            timm.models._model_builder.load_pretrained(checkpoint_path)
+            model.load_pretrained(checkpoint_path)
         else:
             raise NotImplementedError('Model cannot load numpy checkpoint')
         return
-    state_dict = load_state_dict(checkpoint_path, use_ema)
+
+    state_dict = load_state_dict(checkpoint_path, use_ema, device=device)
     if remap:
-        state_dict = remap_checkpoint(model, state_dict)
+        state_dict = remap_state_dict(state_dict, model)
+    elif filter_fn:
+        state_dict = filter_fn(state_dict, model)
     incompatible_keys = model.load_state_dict(state_dict, strict=strict)
     return incompatible_keys
 
 
-def remap_checkpoint(model, state_dict, allow_reshape=True):
+def remap_state_dict(
+        state_dict: Dict[str, Any],
+        model: torch.nn.Module,
+        allow_reshape: bool = True
+):
     """ remap checkpoint by iterating over state dicts in order (ignoring original keys).
     This assumes models (and originating state dict) were created with params registered in same order.
     """
@@ -87,7 +105,13 @@ def remap_checkpoint(model, state_dict, allow_reshape=True):
     return out_dict
 
 
-def resume_checkpoint(model, checkpoint_path, optimizer=None, loss_scaler=None, log_info=True):
+def resume_checkpoint(
+        model: torch.nn.Module,
+        checkpoint_path: str,
+        optimizer: torch.optim.Optimizer = None,
+        loss_scaler: Any = None,
+        log_info: bool = True,
+):
     resume_epoch = None
     if os.path.isfile(checkpoint_path):
         checkpoint = torch.load(checkpoint_path, map_location='cpu')
 
@@ -3,7 +3,7 @@
 import re
 from collections import defaultdict
 from itertools import chain
-from typing import Callable, Union, Dict
+from typing import Any, Callable, Dict, Iterator, Tuple, Type, Union
 
 import torch
 from torch import nn as nn
@@ -13,15 +13,20 @@
            'group_with_matcher', 'group_modules', 'group_parameters', 'flatten_modules', 'checkpoint_seq']
 
 
-def model_parameters(model, exclude_head=False):
+def model_parameters(model: nn.Module, exclude_head: bool = False):
     if exclude_head:
         # FIXME this a bit of a quick and dirty hack to skip classifier head params based on ordering
         return [p for p in model.parameters()][:-2]
     else:
         return model.parameters()
 
 
-def named_apply(fn: Callable, module: nn.Module, name='', depth_first=True, include_root=False) -> nn.Module:
+def named_apply(
+        fn: Callable,
+        module: nn.Module, name='',
+        depth_first: bool = True,
+        include_root: bool = False,
+) -> nn.Module:
     if not depth_first and include_root:
         fn(module=module, name=name)
     for child_name, child_module in module.named_children():
@@ -32,7 +37,12 @@ def named_apply(fn: Callable, module: nn.Module, name='', depth_first=True, incl
     return module
 
 
-def named_modules(module: nn.Module, name='', depth_first=True, include_root=False):
+def named_modules(
+        module: nn.Module,
+        name: str = '',
+        depth_first: bool = True,
+        include_root: bool = False,
+):
     if not depth_first and include_root:
         yield name, module
     for child_name, child_module in module.named_children():
@@ -43,7 +53,12 @@ def named_modules(module: nn.Module, name='', depth_first=True, include_root=Fal
         yield name, module
 
 
-def named_modules_with_params(module: nn.Module, name='', depth_first=True, include_root=False):
+def named_modules_with_params(
+        module: nn.Module,
+        name: str = '',
+        depth_first: bool = True,
+        include_root: bool = False,
+):
     if module._parameters and not depth_first and include_root:
         yield name, module
     for child_name, child_module in module.named_children():
@@ -58,9 +73,9 @@ def named_modules_with_params(module: nn.Module, name='', depth_first=True, incl
 
 
 def group_with_matcher(
-        named_objects,
+        named_objects: Iterator[Tuple[str, Any]],
         group_matcher: Union[Dict, Callable],
-        output_values: bool = False,
+        return_values: bool = False,
         reverse: bool = False
 ):
     if isinstance(group_matcher, dict):
@@ -96,7 +111,7 @@ def _get_grouping(name):
     # map layers into groups via ordinals (ints or tuples of ints) from matcher
     grouping = defaultdict(list)
     for k, v in named_objects:
-        grouping[_get_grouping(k)].append(v if output_values else k)
+        grouping[_get_grouping(k)].append(v if return_values else k)
 
     # remap to integers
     layer_id_to_param = defaultdict(list)
@@ -107,7 +122,7 @@ def _get_grouping(name):
         layer_id_to_param[lid].extend(grouping[k])
 
     if reverse:
-        assert not output_values, "reverse mapping only sensible for name output"
+        assert not return_values, "reverse mapping only sensible for name output"
         # output reverse mapping
         param_to_layer_id = {}
         for lid, lm in layer_id_to_param.items():
@@ -121,24 +136,29 @@ def _get_grouping(name):
 def group_parameters(
         module: nn.Module,
         group_matcher,
-        output_values=False,
-        reverse=False,
+        return_values: bool = False,
+        reverse: bool = False,
 ):
     return group_with_matcher(
-        module.named_parameters(), group_matcher, output_values=output_values, reverse=reverse)
+        module.named_parameters(), group_matcher, return_values=return_values, reverse=reverse)
 
 
 def group_modules(
         module: nn.Module,
         group_matcher,
-        output_values=False,
-        reverse=False,
+        return_values: bool = False,
+        reverse: bool = False,
 ):
     return group_with_matcher(
-        named_modules_with_params(module), group_matcher, output_values=output_values, reverse=reverse)
+        named_modules_with_params(module), group_matcher, return_values=return_values, reverse=reverse)
 
 
-def flatten_modules(named_modules, depth=1, prefix='', module_types='sequential'):
+def flatten_modules(
+        named_modules: Iterator[Tuple[str, nn.Module]],
+        depth: int = 1,
+        prefix: Union[str, Tuple[str, ...]] = '',
+        module_types: Union[str, Tuple[Type[nn.Module]]] = 'sequential',
+):
     prefix_is_tuple = isinstance(prefix, tuple)
     if isinstance(module_types, str):
         if module_types == 'container':
 
@@ -4,7 +4,7 @@
 from typing import Any, Deque, Dict, Tuple, Optional, Union
 
 
-__all__ = ['PretrainedCfg', 'filter_pretrained_cfg', 'DefaultCfg', 'split_model_name_tag', 'generate_default_cfgs']
+__all__ = ['PretrainedCfg', 'filter_pretrained_cfg', 'DefaultCfg']
 
 
 @dataclass
@@ -91,41 +91,3 @@ def default(self):
     def default_with_tag(self):
         tag = self.tags[0]
         return tag, self.cfgs[tag]
-
-
-def split_model_name_tag(model_name: str, no_tag: str = '') -> Tuple[str, str]:
-    model_name, *tag_list = model_name.split('.', 1)
-    tag = tag_list[0] if tag_list else no_tag
-    return model_name, tag
-
-
-def generate_default_cfgs(cfgs: Dict[str, Union[Dict[str, Any], PretrainedCfg]]):
-    out = defaultdict(DefaultCfg)
-    default_set = set()  # no tag and tags ending with * are prioritized as default
-
-    for k, v in cfgs.items():
-        if isinstance(v, dict):
-            v = PretrainedCfg(**v)
-        has_weights = v.has_weights
-
-        model, tag = split_model_name_tag(k)
-        is_default_set = model in default_set
-        priority = (has_weights and not tag) or (tag.endswith('*') and not is_default_set)
-        tag = tag.strip('*')
-
-        default_cfg = out[model]
-
-        if priority:
-            default_cfg.tags.appendleft(tag)
-            default_set.add(model)
-        elif has_weights and not default_cfg.is_pretrained:
-            default_cfg.tags.appendleft(tag)
-        else:
-            default_cfg.tags.append(tag)
-
-        if has_weights:
-            default_cfg.is_pretrained = True
-
-        default_cfg.cfgs[tag] = v
-
-    return out