Load from local dir (#443)

I8dNLo · joein · web-flow · commit 314842121d45 · 2025-01-28T12:46:16.000+01:00
* HF sources for all models

* Specific_model_path model path support

* Fix hf download

* fix: rollback incorrect model replacement

* refactor: remove redundant type imports

* refactor: replace List with list

* fix: remove redundant param in late interaction text embedding

* Update fastembed/common/model_management.py

* fix: rollback post process onnx output

---------

Co-authored-by: George Panchuk &lt;george.panchuk@qdrant.tech&gt;
diff --git a/fastembed/common/model_management.py b/fastembed/common/model_management.py
@@ -114,6 +114,7 @@ def download_files_from_huggingface(
             extra_patterns (list[str]): extra patterns to allow in the snapshot download, typically
                 includes the required model files.
             local_files_only (bool, optional): Whether to only use local files. Defaults to False.
+            specific_model_path (Optional[str], optional): The path to the model dir already pooled from external source
         Returns:
             Path: The path to the model directory.
         """
@@ -364,6 +365,9 @@ def download_model(
             Path: The path to the downloaded model directory.
         """
         local_files_only = kwargs.get("local_files_only", False)
+        specific_model_path: Optional[str] = kwargs.pop("specific_model_path", None)
+        if specific_model_path:
+            return Path(specific_model_path)
         retries = 1 if local_files_only else retries
         hf_source = model.get("sources", {}).get("hf")
         url_source = model.get("sources", {}).get("url")
diff --git a/fastembed/image/onnx_embedding.py b/fastembed/image/onnx_embedding.py
@@ -1,7 +1,6 @@
 from typing import Any, Iterable, Optional, Sequence, Type
 
 import numpy as np
-
 from fastembed.common import ImageInput, OnnxProvider
 from fastembed.common.onnx_model import OnnxOutputContext
 from fastembed.common.utils import define_cache_dir, normalize
@@ -78,6 +77,7 @@ def __init__(
         device_ids: Optional[list[int]] = None,
         lazy_load: bool = False,
         device_id: Optional[int] = None,
+        specific_model_path: Optional[str] = None,
         **kwargs,
     ):
         """
@@ -96,6 +96,7 @@ def __init__(
             lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                 Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
             device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
+            specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
 
         Raises:
             ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
@@ -120,7 +121,10 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = define_cache_dir(cache_dir)
         self._model_dir = self.download_model(
-            self.model_description, self.cache_dir, local_files_only=self._local_files_only
+            self.model_description,
+            self.cache_dir,
+            local_files_only=self._local_files_only,
+            specific_model_path=specific_model_path,
         )
 
         if not self.lazy_load:
@@ -145,7 +149,7 @@ def list_supported_models(cls) -> list[dict[str, Any]]:
         Lists the supported models.
 
         Returns:
-            list[Dict[str, Any]]: A list of dictionaries containing the model information.
+            list[dict[str, Any]]: A list of dictionaries containing the model information.
         """
         return supported_onnx_models
 
diff --git a/fastembed/late_interaction/colbert.py b/fastembed/late_interaction/colbert.py
@@ -124,6 +124,7 @@ def __init__(
         device_ids: Optional[list[int]] = None,
         lazy_load: bool = False,
         device_id: Optional[int] = None,
+        specific_model_path: Optional[str] = None,
         **kwargs,
     ):
         """
@@ -142,6 +143,7 @@ def __init__(
             lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                 Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
             device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
+            specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
 
         Raises:
             ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
@@ -167,7 +169,10 @@ def __init__(
         self.cache_dir = define_cache_dir(cache_dir)
 
         self._model_dir = self.download_model(
-            self.model_description, self.cache_dir, local_files_only=self._local_files_only
+            self.model_description,
+            self.cache_dir,
+            local_files_only=self._local_files_only,
+            specific_model_path=specific_model_path,
         )
         self.mask_token_id = None
         self.pad_token_id = None
diff --git a/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py b/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py
@@ -95,6 +95,7 @@ def __init__(
         device_ids: Optional[list[int]] = None,
         lazy_load: bool = False,
         device_id: Optional[int] = None,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ):
         """
@@ -113,6 +114,7 @@ def __init__(
             lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                 Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
             device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
+            specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
 
         Raises:
             ValueError: If the model_name is not in the format <org>/<model> e.g. Xenova/ms-marco-MiniLM-L-6-v2.
@@ -145,6 +147,7 @@ def __init__(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
+            specific_model_path=specific_model_path,
         )
 
         if not self.lazy_load:
diff --git a/fastembed/sparse/bm25.py b/fastembed/sparse/bm25.py
@@ -108,6 +108,7 @@ def __init__(
         language: str = "english",
         token_max_length: int = 40,
         disable_stemmer: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs,
     ):
         super().__init__(model_name, cache_dir, **kwargs)
@@ -125,7 +126,10 @@ def __init__(
         self.cache_dir = define_cache_dir(cache_dir)
 
         self._model_dir = self.download_model(
-            model_description, self.cache_dir, local_files_only=self._local_files_only
+            model_description,
+            self.cache_dir,
+            local_files_only=self._local_files_only,
+            specific_model_path=specific_model_path,
         )
 
         self.token_max_length = token_max_length
diff --git a/fastembed/sparse/bm42.py b/fastembed/sparse/bm42.py
@@ -66,6 +66,7 @@ def __init__(
         device_ids: Optional[list[int]] = None,
         lazy_load: bool = False,
         device_id: Optional[int] = None,
+        specific_model_path: Optional[str] = None,
         **kwargs,
     ):
         """
@@ -86,6 +87,7 @@ def __init__(
             lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                 Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
             device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
+            specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
 
         Raises:
             ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
@@ -111,7 +113,10 @@ def __init__(
         self.cache_dir = define_cache_dir(cache_dir)
 
         self._model_dir = self.download_model(
-            self.model_description, self.cache_dir, local_files_only=self._local_files_only
+            self.model_description,
+            self.cache_dir,
+            local_files_only=self._local_files_only,
+            specific_model_path=specific_model_path,
         )
 
         self.invert_vocab = {}
diff --git a/fastembed/sparse/splade_pp.py b/fastembed/sparse/splade_pp.py
@@ -73,6 +73,7 @@ def __init__(
         device_ids: Optional[list[int]] = None,
         lazy_load: bool = False,
         device_id: Optional[int] = None,
+        specific_model_path: Optional[str] = None,
         **kwargs,
     ):
         """
@@ -91,6 +92,7 @@ def __init__(
             lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                 Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
             device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
+            specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
 
         Raises:
             ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
@@ -115,7 +117,10 @@ def __init__(
         self.cache_dir = define_cache_dir(cache_dir)
 
         self._model_dir = self.download_model(
-            self.model_description, self.cache_dir, local_files_only=self._local_files_only
+            self.model_description,
+            self.cache_dir,
+            local_files_only=self._local_files_only,
+            specific_model_path=specific_model_path,
         )
 
         if not self.lazy_load:
diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py
@@ -193,6 +193,7 @@ def __init__(
         device_ids: Optional[list[int]] = None,
         lazy_load: bool = False,
         device_id: Optional[int] = None,
+        specific_model_path: Optional[str] = None,
         **kwargs,
     ):
         """
@@ -211,6 +212,7 @@ def __init__(
             lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                 Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
             device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
+            specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
 
         Raises:
             ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
@@ -234,7 +236,10 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = define_cache_dir(cache_dir)
         self._model_dir = self.download_model(
-            self.model_description, self.cache_dir, local_files_only=self._local_files_only
+            self.model_description,
+            self.cache_dir,
+            local_files_only=self._local_files_only,
+            specific_model_path=specific_model_path,
         )
 
         if not self.lazy_load:
diff --git a/fastembed/text/onnx_text_model.py b/fastembed/text/onnx_text_model.py
@@ -44,6 +44,7 @@ def _load_onnx_model(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_id: Optional[int] = None,
+        specific_model_path: Optional[str] = None,
     ) -> None:
         super()._load_onnx_model(
             model_dir=model_dir,