Download models on startup

yotamN · yotamN · commit c99504203838 · 2025-11-13T16:26:31.000+02:00
diff --git a/src/speaches/config.py b/src/speaches/config.py
@@ -118,7 +118,6 @@ class Config(BaseSettings):
 
     unstable_ort_opts: OrtOptions = OrtOptions()
 
-    otel_exporter_otlp_endpoint: str | None = None
     """
     OpenTelemetry OTLP exporter endpoint. If set, telemetry will be enabled.
     Example: 'http://localhost:4317'
@@ -130,3 +129,5 @@ class Config(BaseSettings):
     OpenTelemetry service name for identifying this application in traces.
     Shadows OTEL_SERVICE_NAME environment variable.
     """
+
+    preload_models: list[str] = []
diff --git a/src/speaches/executors/shared/registry.py b/src/speaches/executors/shared/registry.py
@@ -13,7 +13,10 @@
     pyannote_speaker_embedding_model_registry,
 )
 from speaches.executors.shared.executor import Executor
-from speaches.executors.silero_vad_v5 import SileroVADModelManager, silero_vad_model_registry
+from speaches.executors.silero_vad_v5 import (
+    SileroVADModelManager,
+    silero_vad_model_registry,
+)
 from speaches.executors.whisper import WhisperModelManager, whisper_model_registry
 
 
@@ -85,3 +88,9 @@ def all_executors(self):  # noqa: ANN201
             self._pyannote_executor,
             self._vad_executor,
         )
+
+    def download_model_by_id(self, model_id: str) -> bool:
+        for executor in self.all_executors():
+            if model_id in [model.id for model in executor.model_registry.list_remote_models()]:
+                return executor.model_registry.download_model_files_if_not_exist(model_id)
+        return False
diff --git a/src/speaches/main.py b/src/speaches/main.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from collections.abc import AsyncGenerator
+from contextlib import asynccontextmanager
 import logging
 import os
 import uuid
@@ -19,7 +21,7 @@
 from starlette.exceptions import HTTPException as StarletteHTTPException
 from starlette.responses import RedirectResponse
 
-from speaches.dependencies import ApiKeyDependency, get_config
+from speaches.dependencies import ApiKeyDependency, get_config, get_executor_registry
 from speaches.logger import setup_logger
 from speaches.routers.chat import (
     router as chat_router,
@@ -66,6 +68,26 @@
 ]
 
 
+@asynccontextmanager
+async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
+    logger = logging.getLogger(__name__)
+    config = get_config()
+
+    if config.preload_models:
+        logger.info(f"Preloading {len(config.preload_models)} models on startup")
+        executor_registry = get_executor_registry()
+
+        for model_id in config.preload_models:
+            try:
+                logger.info(f"Downloading model: {model_id}")
+                executor_registry.download_model_by_id(model_id)
+                logger.info(f"Successfully downloaded model: {model_id}")
+            except Exception:
+                logger.exception(f"Failed to download model {model_id}")
+
+    yield
+
+
 def create_app() -> FastAPI:
     config = get_config()  # HACK
     setup_logger(config.log_level)
@@ -94,6 +116,7 @@ def create_app() -> FastAPI:
         version="0.8.3",  # TODO: update this on release
         license_info={"name": "MIT License", "identifier": "MIT"},
         openapi_tags=TAGS_METADATA,
+        lifespan=lifespan,
     )
 
     # Instrument FastAPI app if telemetry is enabled
diff --git a/src/speaches/registry_utils.py b/src/speaches/registry_utils.py
@@ -0,0 +1,16 @@
+def download_model_by_id(model_id: str) -> bool:
+    from speaches.executors.kokoro.utils import model_registry as kokoro_model_registry
+    from speaches.executors.parakeet.utils import model_registry as parakeet_model_registry
+    from speaches.executors.piper.utils import model_registry as piper_model_registry
+    from speaches.executors.whisper.utils import model_registry as whisper_model_registry
+
+    if model_id in [model.id for model in kokoro_model_registry.list_remote_models()]:
+        return kokoro_model_registry.download_model_files_if_not_exist(model_id)
+    elif model_id in [model.id for model in piper_model_registry.list_remote_models()]:
+        return piper_model_registry.download_model_files_if_not_exist(model_id)
+    elif model_id in [model.id for model in whisper_model_registry.list_remote_models()]:
+        return whisper_model_registry.download_model_files_if_not_exist(model_id)
+    elif model_id in [model.id for model in parakeet_model_registry.list_remote_models()]:
+        return parakeet_model_registry.download_model_files_if_not_exist(model_id)
+    else:
+        raise ValueError(f"Model '{model_id}' not found in registry")
diff --git a/src/speaches/routers/models.py b/src/speaches/routers/models.py
@@ -40,7 +40,9 @@ class ListAudioModelsResponse(BaseModel):
 
 # HACK: returning ListModelsResponse directly causes extra `Model` fields to be omitted
 @router.get("/v1/audio/models", response_model=ListAudioModelsResponse)
-def list_local_audio_models(executor_registry: ExecutorRegistryDependency) -> JSONResponse:
+def list_local_audio_models(
+    executor_registry: ExecutorRegistryDependency,
+) -> JSONResponse:
     models: list[Model] = []
     for executor in executor_registry.text_to_speech:
         models.extend(list(executor.model_registry.list_local_models()))
@@ -53,7 +55,9 @@ class ListVoicesResponse(BaseModel):
 
 # HACK: returning ListModelsResponse directly causes extra `Model` fields to be omitted
 @router.get("/v1/audio/voices", response_model=ListModelsResponse)
-def list_local_audio_voices(executor_registry: ExecutorRegistryDependency) -> JSONResponse:
+def list_local_audio_voices(
+    executor_registry: ExecutorRegistryDependency,
+) -> JSONResponse:
     models: list[KokoroModel | PiperModel] = []
     for executor in executor_registry.text_to_speech:
         models.extend(list(executor.model_registry.list_local_models()))