darkshapes
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎divisor/app.py‎
Lines changed: 42 additions & 11 deletions b/‎divisor/app.py‎
Lines changed: 42 additions & 11 deletions
diff --git a/‎divisor/commands.py‎
Lines changed: 6 additions & 5 deletions b/‎divisor/commands.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎divisor/flux_modules/image_embedder.py‎
Lines changed: 8 additions & 22 deletions b/‎divisor/flux_modules/image_embedder.py‎
Lines changed: 8 additions & 22 deletions
@@ -1,14 +1,15 @@
-Generate:
-
 ```
 uvx --from "divisor @ git+https://github.com/darkshapes/divisor" divisor
 ```
 
-Develop:
+or
 
 ```
 git clone https://github.com/darkshapes/divisors
 cd divisor
 uv sync --dev
 dvzr
 ```
+
+[![dvzr pytest](https://github.com/darkshapes/divisor/actions/workflows/divisor.yml/badge.svg)](https://github.com/darkshapes/divisor/actions/workflows/divisor.yml)<br>
+[<img src="https://img.shields.io/badge/me-__?logo=kofi&logoColor=white&logoSize=auto&label=feed&labelColor=maroon&color=grey&link=https%3A%2F%2Fko-fi.com%2Fdarkshapes">](https://ko-fi.com/darkshapes)<br>
@@ -6,6 +6,7 @@
 Routes to different inference modes based on flags.
 """
 
+import argparse
 import sys
 from fire import Fire
 
@@ -14,21 +15,51 @@ def main():
     """Main entry point that routes to appropriate inference function.
 
     Usage:
-        dvzr                    # Default: Flux image generation mode
-        dvzr -o / --omni        # DiMOO multimodal understanding mode
+        dvzr                                    # Default: Flux image generation mode
+        dvzr -o / --omni                        # DiMOO multimodal understanding mode
+        dvzr --model-type dev                   # Use flux1-dev model (default)
+        dvzr --model-type schnell               # Use flux1-schnell model
+        dvzr -m dev                             # Short form for model type
     """
-    # Check for --omni or -o flag (as standalone arguments, not part of other args)
-    has_omni_flag = any(arg in ["-o", "--omni"] for arg in sys.argv)
+    parser = argparse.ArgumentParser(description="Divisor CLI - Flux image generation and multimodal understanding")
+    parser.add_argument(
+        "-o",
+        "--omni",
+        action="store_true",
+        help="Enable DiMOO multimodal understanding mode",
+    )
+    parser.add_argument(
+        "-m",
+        "--model-type",
+        choices=["dev", "schnell"],
+        default="dev",
+        help="Model type to use: 'dev' (flux1-dev) or 'schnell' (flux1-schnell). Default: dev",
+    )
 
-    if has_omni_flag:
-        original_argv = sys.argv.copy()
-        filtered_argv = [arg for arg in original_argv if arg not in ["-o", "--omni"]]
-        sys.argv = filtered_argv
+    # Parse known args to separate our args from Fire's args
+    args, remaining_argv = parser.parse_known_args()
 
-    from divisor.flux_modules.prompt import main as flux_main
+    if args.omni:
+        # Remove --omni/-o from argv and route to omni mode
+        filtered_argv = [arg for arg in sys.argv[1:] if arg not in ["-o", "--omni"]]
+        sys.argv = [sys.argv[0]] + filtered_argv
+        # TODO: Import and call omni main function when implemented
+        # from divisor.omni_modules.prompt import main as omni_main
+        # Fire(omni_main)
+        raise NotImplementedError("Omni mode not yet implemented")
+    else:
+        # Route to Flux mode
+        from divisor.flux_modules.prompt import main as flux_main
 
-    # Flux uses Fire, which automatically handles sys.argv
-    Fire(flux_main)
+        # Add model_id argument to remaining argv for Fire to parse
+        # Fire converts underscores to hyphens, so model_id becomes --model-id
+        model_id = f"flux1-{args.model_type}"
+        # Insert model_id argument before other arguments
+        remaining_argv = ["--model-id", model_id] + remaining_argv
+        sys.argv = [sys.argv[0]] + remaining_argv
+
+        # Flux uses Fire, which automatically handles sys.argv
+        Fire(flux_main)
 
 
 if __name__ == "__main__":
 
@@ -73,8 +73,7 @@ def process_choice(
     if state.prompt is not None:
         prompt_display = state.prompt[:60] + "..." if len(state.prompt) > 60 else state.prompt
         nfo(f"[P]rompt: {prompt_display}")
-
-    choice = input(": [BDGLSRVXP] advance with Enter: ").lower().strip()
+        nfo(f"[E]dit Mode (REPL): {prompt_display}")
 
     choice_handlers = {
         "": lambda: (
@@ -84,16 +83,18 @@ def process_choice(
             controller.current_state,
         ),
         "g": lambda: change_guidance(controller, state, clear_prediction_cache),
-        "l": lambda: change_layer_dropout(controller, state, current_layer_dropout, clear_prediction_cache),
-        "r": lambda: change_resolution(controller, state, clear_prediction_cache),
         "s": lambda: change_seed(controller, state, rng, clear_prediction_cache),
+        "r": lambda: change_resolution(controller, state, clear_prediction_cache),
+        "l": lambda: change_layer_dropout(controller, state, current_layer_dropout, clear_prediction_cache),
         "b": lambda: toggle_buffer_mask(controller, state),
         "a": lambda: change_vae_offset(controller, state, ae, clear_prediction_cache),
         "v": lambda: change_variation(controller, state, variation_rng, clear_prediction_cache),
         "d": lambda: toggle_deterministic(controller, state, clear_prediction_cache),
-        "e": lambda: edit_mode(clear_prediction_cache),
         "p": lambda: change_prompt(controller, state, clear_prediction_cache, recompute_text_embeddings),
+        "e": lambda: edit_mode(clear_prediction_cache),
     }
+    prompt = "".join(key.upper() for key in choice_handlers if key)
+    choice = input(f": [{prompt}] or advance with Enter: ").lower().strip()
 
     if choice in choice_handlers:
         result = choice_handlers[choice]()
 
@@ -15,17 +15,15 @@
     SiglipVisionModel,
 )
 
-from divisor.flux_modules.util import print_load_warning
+from divisor.flux_modules.loading import print_load_warning
 
 
 class DepthImageEncoder:
     depth_model_name = "LiheYoung/depth-anything-large-hf"
 
     def __init__(self, device):
         self.device = device
-        self.depth_model = AutoModelForDepthEstimation.from_pretrained(
-            self.depth_model_name
-        ).to(device)
+        self.depth_model = AutoModelForDepthEstimation.from_pretrained(self.depth_model_name).to(device)
         self.processor = AutoProcessor.from_pretrained(self.depth_model_name)
 
     def __call__(self, img: torch.Tensor) -> torch.Tensor:
@@ -37,9 +35,7 @@ def __call__(self, img: torch.Tensor) -> torch.Tensor:
         img = self.processor(img_byte, return_tensors="pt")["pixel_values"]
         depth = self.depth_model(img.to(self.device)).predicted_depth
         depth = repeat(depth, "b h w -> b 3 h w")
-        depth = torch.nn.functional.interpolate(
-            depth, hw, mode="bicubic", antialias=True
-        )
+        depth = torch.nn.functional.interpolate(depth, hw, mode="bicubic", antialias=True)
 
         depth = depth / 127.5 - 1.0
         return depth
@@ -87,34 +83,24 @@ def __init__(
         super().__init__()
 
         self.redux_dim = redux_dim
-        self.device = (
-            device if isinstance(device, torch.device) else torch.device(device)
-        )
+        self.device = device if isinstance(device, torch.device) else torch.device(device)
         self.dtype = dtype
 
         with self.device:
             self.redux_up = nn.Linear(redux_dim, txt_in_features * 3, dtype=dtype)
-            self.redux_down = nn.Linear(
-                txt_in_features * 3, txt_in_features, dtype=dtype
-            )
+            self.redux_down = nn.Linear(txt_in_features * 3, txt_in_features, dtype=dtype)
 
             sd = load_sft(redux_path, device=str(device))
             missing, unexpected = self.load_state_dict(sd, strict=False, assign=True)
             print_load_warning(missing, unexpected)
 
-            self.siglip = SiglipVisionModel.from_pretrained(self.siglip_model_name).to(
-                dtype=dtype
-            )
+            self.siglip = SiglipVisionModel.from_pretrained(self.siglip_model_name).to(dtype=dtype)
         self.normalize = SiglipImageProcessor.from_pretrained(self.siglip_model_name)
 
     def __call__(self, x: Image.Image) -> torch.Tensor:
-        imgs = self.normalize.preprocess(
-            images=[x], do_resize=True, return_tensors="pt", do_convert_rgb=True
-        )
+        imgs = self.normalize.preprocess(images=[x], do_resize=True, return_tensors="pt", do_convert_rgb=True)
 
-        _encoded_x = self.siglip(
-            **imgs.to(device=self.device, dtype=self.dtype)
-        ).last_hidden_state
+        _encoded_x = self.siglip(**imgs.to(device=self.device, dtype=self.dtype)).last_hidden_state
 
         projected_x = self.redux_down(nn.functional.silu(self.redux_up(_encoded_x)))