qdrant
diff --git a/‎fastembed/common/onnx_model.py
+4-5 b/‎fastembed/common/onnx_model.py
+4-5
diff --git a/‎fastembed/image/image_embedding.py
+1-1 b/‎fastembed/image/image_embedding.py
+1-1
diff --git a/‎fastembed/image/image_embedding_base.py
+5 b/‎fastembed/image/image_embedding_base.py
+5
diff --git a/‎fastembed/image/onnx_embedding.py
+3-6 b/‎fastembed/image/onnx_embedding.py
+3-6
diff --git a/‎fastembed/image/onnx_image_model.py
+3-7 b/‎fastembed/image/onnx_image_model.py
+3-7
diff --git a/‎fastembed/late_interaction/colbert.py
+3-6 b/‎fastembed/late_interaction/colbert.py
+3-6
@@ -92,22 +92,21 @@ def init_embedding(
         self,
         model_name: str,
         cache_dir: str,
+        **kwargs,
     ) -> OnnxModel:
         raise NotImplementedError()
 
     def __init__(
         self,
         model_name: str,
         cache_dir: str,
+        **kwargs,
     ):
-        self.model = self.init_embedding(model_name, cache_dir)
+        self.model = self.init_embedding(model_name, cache_dir, **kwargs)
 
     @classmethod
     def start(cls, model_name: str, cache_dir: str, **kwargs: Any) -> "EmbeddingWorker":
-        return cls(
-            model_name=model_name,
-            cache_dir=cache_dir,
-        )
+        return cls(model_name=model_name, cache_dir=cache_dir, **kwargs)
 
     def process(self, items: Iterable[Tuple[int, Any]]) -> Iterable[Tuple[int, Any]]:
         raise NotImplementedError("Subclasses must implement this method")
@@ -53,7 +53,7 @@ def __init__(
             supported_models = EMBEDDING_MODEL_TYPE.list_supported_models()
             if any(model_name.lower() == model["model"].lower() for model in supported_models):
                 self.model = EMBEDDING_MODEL_TYPE(
-                    model_name, cache_dir, threads, providers=providers, **kwargs
+                    model_name, cache_dir, threads=threads, providers=providers, **kwargs
                 )
                 return
 
 
@@ -31,6 +31,11 @@ def embed(
 
         Args:
             images - The list of image paths to preprocess and embed.
+            batch_size: Batch size for encoding
+            parallel:
+                If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
+                If 0, use all available cores.
+                If None, don't use data-parallel processing, use default onnxruntime threading instead.
             **kwargs: Additional keyword argument to pass to the embed method.
 
         Yields:
 
@@ -106,6 +106,7 @@ def embed(
             images=images,
             batch_size=batch_size,
             parallel=parallel,
+            **kwargs,
         )
 
     @classmethod
@@ -126,9 +127,5 @@ def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[np.nd
 
 
 class OnnxImageEmbeddingWorker(ImageEmbeddingWorker):
-    def init_embedding(
-        self,
-        model_name: str,
-        cache_dir: str,
-    ) -> OnnxImageEmbedding:
-        return OnnxImageEmbedding(model_name=model_name, cache_dir=cache_dir, threads=1)
+    def init_embedding(self, model_name: str, cache_dir: str, **kwargs) -> OnnxImageEmbedding:
+        return OnnxImageEmbedding(model_name=model_name, cache_dir=cache_dir, threads=1, **kwargs)
@@ -59,9 +59,7 @@ def onnx_embed(self, images: List[PathInput], **kwargs) -> OnnxOutputContext:
         onnx_input = self._preprocess_onnx_input(onnx_input)
         model_output = self.model.run(None, onnx_input)
         embeddings = model_output[0].reshape(len(images), -1)
-        return OnnxOutputContext(
-            model_output=embeddings
-        )
+        return OnnxOutputContext(model_output=embeddings)
 
     def _embed_images(
         self,
@@ -70,6 +68,7 @@ def _embed_images(
         images: ImageInput,
         batch_size: int = 256,
         parallel: Optional[int] = None,
+        **kwargs,
     ) -> Iterable[T]:
         is_small = False
 
@@ -89,10 +88,7 @@ def _embed_images(
                 yield from self._post_process_onnx_output(self.onnx_embed(batch))
         else:
             start_method = "forkserver" if "forkserver" in get_all_start_methods() else "spawn"
-            params = {
-                "model_name": model_name,
-                "cache_dir": cache_dir,
-            }
+            params = {"model_name": model_name, "cache_dir": cache_dir, **kwargs}
             pool = ParallelWorkerPool(
                 parallel, self._get_worker_class(), start_method=start_method
             )
 
@@ -171,6 +171,7 @@ def embed(
             documents=documents,
             batch_size=batch_size,
             parallel=parallel,
+            **kwargs,
         )
 
     def query_embed(self, query: Union[str, List[str]], **kwargs) -> np.ndarray:
@@ -188,9 +189,5 @@ def _get_worker_class(cls) -> Type[TextEmbeddingWorker]:
 
 
 class ColbertEmbeddingWorker(TextEmbeddingWorker):
-    def init_embedding(
-        self,
-        model_name: str,
-        cache_dir: str,
-    ) -> Colbert:
-        return Colbert(model_name=model_name, cache_dir=cache_dir, threads=1)
+    def init_embedding(self, model_name: str, cache_dir: str, **kwargs) -> Colbert:
+        return Colbert(model_name=model_name, cache_dir=cache_dir, threads=1, **kwargs)
Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ def __init__(`
`53`	`53`	`supported_models = EMBEDDING_MODEL_TYPE.list_supported_models()`
`54`	`54`	`if any(model_name.lower() == model["model"].lower() for model in supported_models):`
`55`	`55`	`self.model = EMBEDDING_MODEL_TYPE(`
`56`		`- model_name, cache_dir, threads, providers=providers, **kwargs`
	`56`	`+ model_name, cache_dir, threads=threads, providers=providers, **kwargs`
`57`	`57`	`)`
`58`	`58`	`return`
`59`	`59`