Download models on startup

yotamN · fedirz · commit fa1e0d585259 · 2025-11-30T08:25:06.000-08:00
diff --git a/src/speaches/config.py b/src/speaches/config.py
@@ -130,3 +130,11 @@ class Config(BaseSettings):
     OpenTelemetry service name for identifying this application in traces.
     Shadows OTEL_SERVICE_NAME environment variable.
     """
+
+    preload_models: list[str] = []
+    """
+    List of model IDs to download during application startup.
+    Models will be downloaded sequentially if they do not already exist locally.
+    Application will exit if any model fails to download or is not found in the registry.
+    Example: ["Systran/faster-whisper-tiny", "rhasspy/piper-voices"]
+    """
diff --git a/src/speaches/executors/shared/registry.py b/src/speaches/executors/shared/registry.py
@@ -13,7 +13,10 @@
     pyannote_speaker_embedding_model_registry,
 )
 from speaches.executors.shared.executor import Executor
-from speaches.executors.silero_vad_v5 import SileroVADModelManager, silero_vad_model_registry
+from speaches.executors.silero_vad_v5 import (
+    SileroVADModelManager,
+    silero_vad_model_registry,
+)
 from speaches.executors.whisper import WhisperModelManager, whisper_model_registry
 
 
@@ -85,3 +88,9 @@ def all_executors(self):  # noqa: ANN201
             self._pyannote_executor,
             self._vad_executor,
         )
+
+    def download_model_by_id(self, model_id: str) -> bool:
+        for executor in self.all_executors():
+            if model_id in [model.id for model in executor.model_registry.list_remote_models()]:
+                return executor.model_registry.download_model_files_if_not_exist(model_id)
+        raise ValueError(f"Model '{model_id}' not found")
diff --git a/src/speaches/main.py b/src/speaches/main.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from collections.abc import AsyncGenerator
+from contextlib import asynccontextmanager
 import logging
 import os
 import uuid
@@ -19,7 +21,7 @@
 from starlette.exceptions import HTTPException as StarletteHTTPException
 from starlette.responses import RedirectResponse
 
-from speaches.dependencies import ApiKeyDependency, get_config
+from speaches.dependencies import ApiKeyDependency, get_config, get_executor_registry
 from speaches.logger import setup_logger
 from speaches.routers.chat import (
     router as chat_router,
@@ -66,6 +68,23 @@
 ]
 
 
+@asynccontextmanager
+async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
+    logger = logging.getLogger(__name__)
+    config = get_config()
+
+    if config.preload_models:
+        logger.info(f"Preloading {len(config.preload_models)} models on startup")
+        executor_registry = get_executor_registry()
+
+        for model_id in config.preload_models:
+            logger.info(f"Downloading model: {model_id}")
+            executor_registry.download_model_by_id(model_id)
+            logger.info(f"Successfully downloaded model: {model_id}")
+
+    yield
+
+
 def create_app() -> FastAPI:
     config = get_config()  # HACK
     setup_logger(config.log_level)
@@ -94,6 +113,7 @@ def create_app() -> FastAPI:
         version="0.8.3",  # TODO: update this on release
         license_info={"name": "MIT License", "identifier": "MIT"},
         openapi_tags=TAGS_METADATA,
+        lifespan=lifespan,
     )
 
     # Instrument FastAPI app if telemetry is enabled
diff --git a/src/speaches/routers/models.py b/src/speaches/routers/models.py
@@ -40,7 +40,9 @@ class ListAudioModelsResponse(BaseModel):
 
 # HACK: returning ListModelsResponse directly causes extra `Model` fields to be omitted
 @router.get("/v1/audio/models", response_model=ListAudioModelsResponse)
-def list_local_audio_models(executor_registry: ExecutorRegistryDependency) -> JSONResponse:
+def list_local_audio_models(
+    executor_registry: ExecutorRegistryDependency,
+) -> JSONResponse:
     models: list[Model] = []
     for executor in executor_registry.text_to_speech:
         models.extend(list(executor.model_registry.list_local_models()))
@@ -53,7 +55,9 @@ class ListVoicesResponse(BaseModel):
 
 # HACK: returning ListModelsResponse directly causes extra `Model` fields to be omitted
 @router.get("/v1/audio/voices", response_model=ListModelsResponse)
-def list_local_audio_voices(executor_registry: ExecutorRegistryDependency) -> JSONResponse:
+def list_local_audio_voices(
+    executor_registry: ExecutorRegistryDependency,
+) -> JSONResponse:
     models: list[KokoroModel | PiperModel] = []
     for executor in executor_registry.text_to_speech:
         models.extend(list(executor.model_registry.list_local_models()))
@@ -77,14 +81,14 @@ def get_local_model(executor_registry: ExecutorRegistryDependency, model_id: Mod
 # NOTE: without `response_model` and `JSONResponse` extra fields aren't included in the response
 @router.post("/v1/models/{model_id:path}")
 def download_remote_model(executor_registry: ExecutorRegistryDependency, model_id: ModelId) -> Response:
-    for executor in executor_registry.all_executors():
-        if model_id in [model.id for model in executor.model_registry.list_remote_models()]:
-            was_downloaded = executor.model_registry.download_model_files_if_not_exist(model_id)
-            if was_downloaded:
-                return Response(status_code=200, content=f"Model '{model_id}' downloaded")
-            else:
-                return Response(status_code=201, content=f"Model '{model_id}' already exists")
-    raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found")
+    try:
+        was_downloaded = executor_registry.download_model_by_id(model_id)
+        if was_downloaded:
+            return Response(status_code=200, content=f"Model '{model_id}' downloaded")
+        else:
+            return Response(status_code=201, content=f"Model '{model_id}' already exists")
+    except ValueError as error:
+        raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found") from error
 
 
 # TODO: document that any model will be deleted regardless if it's supported speaches or not