Fixes for CLIP

AlekseySh · web-flow · commit 946255347a8c · 2024-02-03T17:09:20.000+07:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -50,9 +50,9 @@ repos:
   - id: mypy
     additional_dependencies: [types-requests==2.25.9]
 
-# check for unused code
-- repo: https://github.com/jendrikseipp/vulture
-  rev: v2.6
-  hooks:
-    - id: vulture
-      args: [--min-confidence=100, --sort-by-size, .]
+# check for unused code  (todo: it started failing with some weird recursive error)
+#- repo: https://github.com/jendrikseipp/vulture
+#  rev: v2.6
+#  hooks:
+#    - id: vulture
+#      args: [--min-confidence=100, --sort-by-size, .]
diff --git a/oml/models/vit_clip/extractor.py b/oml/models/vit_clip/extractor.py
@@ -9,6 +9,7 @@
     filter_state_dict,
     patch_device_and_float,
     remove_criterion_in_state_dict,
+    remove_prefix_from_state_dict,
 )
 from oml.models.vit_clip.external.model import VisionTransformer
 from oml.utils.io import download_checkpoint
@@ -142,6 +143,8 @@ def __init__(
         self.normalize = normalise_features
         self.visual = self.constructors[arch]()
 
+        self.input_size = int(arch.split("_")[-1])
+
         if weights is None:
             return
         if weights in self.pretrained_models:
@@ -159,11 +162,16 @@ def __init__(
             state_dict = torch.load(Path(weights), map_location="cpu")
             state_dict = state_dict.get("state_dict", state_dict)
             state_dict = remove_criterion_in_state_dict(state_dict)
+            state_dict = remove_prefix_from_state_dict(state_dict, trial_key="class_embedding")
             state_dict = take_visual_part_of_vit_clip(state_dict, needed_keys=self.visual.state_dict().keys())
 
         self.visual.load_state_dict(state_dict=state_dict, strict=True)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        assert (x.shape[-2] == self.input_size) and (
+            x.shape[-1] == self.input_size
+        ), f"The model expects input images to be resized to {self.input_size}x{self.input_size}"
+
         res = self.visual.forward(x)
         if self.normalize:
             res = res / torch.linalg.norm(res, 2, dim=1, keepdim=True).detach()
diff --git a/tests/test_oml/test_models/test_models_creation.py b/tests/test_oml/test_models/test_models_creation.py
@@ -20,6 +20,7 @@
 vit_args = {"normalise_features": False, "use_multi_scale": False, "arch": "vits16"}
 
 
+# todo: add another test where Lightning saves the model
 @pytest.mark.parametrize(
     "constructor,args",
     [
diff --git a/tests/test_runs/test_pipelines/configs/train.yaml b/tests/test_runs/test_pipelines/configs/train.yaml
@@ -14,38 +14,32 @@ cache_size: 10
 transforms_train:
   name: augs_torch
   args:
-    im_size: 64
+    im_size: 224
 
 transforms_val:
   name: norm_resize_torch
   args:
-    im_size: 64
+    im_size: 224
 
 criterion:
   name: arcface
   args:
     smoothing_epsilon: 0
     m: 0.4
     s: 64
-    in_features: 384
+    in_features: 512
     num_classes: 4
 
 defaults:
   - optimizer: sgd
   - sampler: balance
 
 extractor:
-  name: extractor_with_mlp
+  name: vit_clip
   args:
-    mlp_features: [384]
+    arch: vitb16_224
     weights: null
-    extractor:
-      name: vit
-      args:
-        normalise_features: False
-        use_multi_scale: False
-        weights: null
-        arch: vits16
+    normalise_features: False
 
 scheduling:
   scheduler_interval: epoch
diff --git a/tests/test_runs/test_pipelines/configs/validate.yaml b/tests/test_runs/test_pipelines/configs/validate.yaml
@@ -7,7 +7,7 @@ dataframe_name: df.csv
 transforms_val:
   name: norm_resize_torch
   args:
-    im_size: 48
+    im_size: 224
 
 logs_root: logs
 
@@ -47,18 +47,11 @@ metric_args:
   visualize_only_overall_category: True
 
 extractor:
-  name: extractor_with_mlp
+  name: vit_clip
   args:
-    mlp_features: [384]
+    arch: vitb16_224
     weights: checkpoints/best.ckpt
-    extractor:
-      name: vit
-      args:
-        normalise_features: False
-        use_multi_scale: False
-        weights: null
-        arch: vits16
-
+    normalise_features: False
 
 hydra:
   run:
diff --git a/tests/test_runs/test_pipelines/test_pipelines.py b/tests/test_runs/test_pipelines/test_pipelines.py
@@ -45,7 +45,7 @@ def run(file: str, accelerator: str, devices: int, need_rm_logs: bool = True) ->
 @pytest.mark.parametrize("accelerator, devices", accelerator_devices_pairs())
 def test_train_and_validate(accelerator: str, devices: int) -> None:
     run("train.py", accelerator, devices, need_rm_logs=False)
-    # it takes checpoints from the train stage
+    # it takes checkpoints from the train stage
     run("validate.py", accelerator, devices, need_rm_logs=False)
 
     for file in ["train.py", "validate.py"]:

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@`
`20`	`20`	`vit_args = {"normalise_features": False, "use_multi_scale": False, "arch": "vits16"}`
`21`	`21`
`22`	`22`
	`23`	`+# todo: add another test where Lightning saves the model`
`23`	`24`	`@pytest.mark.parametrize(`
`24`	`25`	`"constructor,args",`
`25`	`26`	`[`