diff --git a/.gitignore b/.gitignore index 5c21aeab7..a33086e65 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,6 @@ core/cat/plugins/* # tests plugin folder plugin_folder +# cache embedder +core/local_cache/* + diff --git a/core/cat/factory/custom_embedder.py b/core/cat/factory/custom_embedder.py index 21da5e599..237e58903 100644 --- a/core/cat/factory/custom_embedder.py +++ b/core/cat/factory/custom_embedder.py @@ -65,22 +65,3 @@ def embed_query(self, text: str) -> List[float]: ret.raise_for_status() return ret.json()['data'][0]['embedding'] -class CustomFastembedEmbeddings(Embeddings): - """Use Fastembed for embedding. - """ - def __init__(self, url, model,max_length) -> None: - self.url = url - output = httpx.post(f"{url}/embeddings", json={"model": model, "max_length": max_length}, follow_redirects=True, timeout=None) - output.raise_for_status() - - - def embed_documents(self, texts: List[str]): - ret = httpx.post(f"{self.url}/embeddings/document", json={"document": texts}, timeout=None) - ret.raise_for_status() - return ret.json() - - def embed_query(self, text: str) -> List[float]: - ret = httpx.post(f"{self.url}/embeddings/prompt", json={"prompt": text}, timeout=None) - ret.raise_for_status() - return ret.json() - \ No newline at end of file diff --git a/core/cat/factory/embedder.py b/core/cat/factory/embedder.py index d1324befe..1ac1da83f 100644 --- a/core/cat/factory/embedder.py +++ b/core/cat/factory/embedder.py @@ -1,8 +1,8 @@ from typing import Type import langchain from pydantic import BaseModel, ConfigDict - -from cat.factory.custom_embedder import CustomFastembedEmbeddings, DumbEmbedder, CustomOpenAIEmbeddings +from langchain.embeddings.fastembed import FastEmbedEmbeddings +from cat.factory.custom_embedder import DumbEmbedder, CustomOpenAIEmbeddings # Base class to manage LLM configuration. @@ -107,19 +107,20 @@ class EmbedderCohereConfig(EmbedderSettings): ) -class EmbedderFastEmbedConfig(EmbedderSettings): - url: str - model: str = "intfloat/multilingual-e5-large" - max_length: int = 512 - - _pyclass: Type = CustomFastembedEmbeddings +class EmbedderQdrantFastEmbedConfig(EmbedderSettings): + model_name: str = "BAAI/bge-base-en" + max_length: int = 512 # Unknown behavior for values > 512. + doc_embed_type: str = "passage" # as suggest on fastembed documentation, "passage" is the best option for documents. + + _pyclass: Type = FastEmbedEmbeddings model_config = ConfigDict( json_schema_extra = { - "humanReadableName": "Fast Embedder", - "description": "Configuration for Fast embeddings", + "humanReadableName": "Qdrant FastEmbed (Local)", + "description": "Configuration for Qdrant FastEmbed", } ) + SUPPORTED_EMDEDDING_MODELS = [ @@ -129,7 +130,7 @@ class EmbedderFastEmbedConfig(EmbedderSettings): EmbedderOpenAIConfig, EmbedderAzureOpenAIConfig, EmbedderCohereConfig, - EmbedderFastEmbedConfig + EmbedderQdrantFastEmbedConfig ] diff --git a/core/pyproject.toml b/core/pyproject.toml index e895a9295..f84e4c5d4 100644 --- a/core/pyproject.toml +++ b/core/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "pandas==1.5.3", "scikit-learn==1.2.1", "qdrant_client==1.6.6", - "langchain==0.0.315", + "langchain==0.0.336", "openai==0.27.5", "cohere==4.0.4", "huggingface-hub==0.13.2", @@ -37,7 +37,8 @@ dependencies = [ "perflint", "pylint-actions", "pytest", - "httpx" + "httpx", + "fastembed==0.1.1" ] [tool.coverage.run]