Added DeprecationWarning for Splade model (#331)

I8dNLo · web-flow · commit a6841a8bde2d · 2024-08-21T15:56:47.000+03:00
* Added DeprecationWarning for Splade model

* Dry and simple
diff --git a/fastembed/sparse/sparse_text_embedding.py b/fastembed/sparse/sparse_text_embedding.py
@@ -8,6 +8,7 @@
     SparseTextEmbeddingBase,
 )
 from fastembed.sparse.splade_pp import SpladePP
+import warnings
 
 
 class SparseTextEmbedding(SparseTextEmbeddingBase):
@@ -50,13 +51,17 @@ def __init__(
         **kwargs,
     ):
         super().__init__(model_name, cache_dir, threads, **kwargs)
+        if model_name == "prithvida/Splade_PP_en_v1":
+            warnings.warn(
+                "The right spelling is prithivida/Splade_PP_en_v1. "
+                "Support of this name will be removed soon, please fix the model_name",
+                DeprecationWarning,
+            )
+            model_name = "prithivida/Splade_PP_en_v1"
 
         for EMBEDDING_MODEL_TYPE in self.EMBEDDINGS_REGISTRY:
             supported_models = EMBEDDING_MODEL_TYPE.list_supported_models()
-            if any(
-                model_name.lower() == model["model"].lower()
-                for model in supported_models
-            ):
+            if any(model_name.lower() == model["model"].lower() for model in supported_models):
                 self.model = EMBEDDING_MODEL_TYPE(
                     model_name,
                     cache_dir,
@@ -95,9 +100,7 @@ def embed(
         """
         yield from self.model.embed(documents, batch_size, parallel, **kwargs)
 
-    def query_embed(
-        self, query: Union[str, Iterable[str]], **kwargs
-    ) -> Iterable[SparseEmbedding]:
+    def query_embed(self, query: Union[str, Iterable[str]], **kwargs) -> Iterable[SparseEmbedding]:
         """
         Embeds queries
 
diff --git a/fastembed/sparse/splade_pp.py b/fastembed/sparse/splade_pp.py
@@ -1,7 +1,6 @@
 from typing import Any, Dict, Iterable, List, Optional, Sequence, Type, Union
 
 import numpy as np
-
 from fastembed.common import OnnxProvider
 from fastembed.common.onnx_model import OnnxOutputContext
 from fastembed.common.utils import define_cache_dir
@@ -12,16 +11,6 @@
 from fastembed.text.onnx_text_model import OnnxTextModel, TextEmbeddingWorker
 
 supported_splade_models = [
-    {
-        "model": "prithvida/Splade_PP_en_v1",
-        "vocab_size": 30522,
-        "description": "Misspelled version of the model. Retained for backward compatibility. Independent Implementation of SPLADE++ Model for English",
-        "size_in_GB": 0.532,
-        "sources": {
-            "hf": "Qdrant/SPLADE_PP_en_v1",
-        },
-        "model_file": "model.onnx",
-    },
     {
         "model": "prithivida/Splade_PP_en_v1",
         "vocab_size": 30522,
@@ -78,7 +67,6 @@ def __init__(
         Raises:
             ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
         """
-
         super().__init__(model_name, cache_dir, threads, **kwargs)
 
         model_description = self._get_model_description(model_name)