Skip to content

Commit 58ee7cc

Browse files
authored
fix: fix thenlper, update warnings (#486)
1 parent 27eeb39 commit 58ee7cc

File tree

4 files changed

+22
-26
lines changed

4 files changed

+22
-26
lines changed

fastembed/text/onnx_embedding.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,18 +93,6 @@
9393
),
9494
model_file="model_optimized.onnx",
9595
),
96-
DenseModelDescription(
97-
model="thenlper/gte-large",
98-
dim=1024,
99-
description=(
100-
"Text embeddings, Unimodal (text), English, 512 input tokens truncation, "
101-
"Prefixes for queries/documents: not necessary, 2023 year."
102-
),
103-
license="mit",
104-
size_in_GB=1.20,
105-
sources=ModelSource(hf="qdrant/gte-large-onnx"),
106-
model_file="model.onnx",
107-
),
10896
DenseModelDescription(
10997
model="mixedbread-ai/mxbai-embed-large-v1",
11098
dim=1024,
@@ -314,6 +302,7 @@ def _preprocess_onnx_input(
314302

315303
def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[NumpyArray]:
316304
embeddings = output.model_output
305+
317306
if embeddings.ndim == 3: # (batch_size, seq_len, embedding_dim)
318307
processed_embeddings = embeddings[:, 0]
319308
elif embeddings.ndim == 2: # (batch_size, embedding_dim)

fastembed/text/pooled_normalized_embedding.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,18 @@
109109
sources=ModelSource(hf="thenlper/gte-base"),
110110
model_file="onnx/model.onnx",
111111
),
112+
DenseModelDescription(
113+
model="thenlper/gte-large",
114+
dim=1024,
115+
description=(
116+
"Text embeddings, Unimodal (text), English, 512 input tokens truncation, "
117+
"Prefixes for queries/documents: not necessary, 2023 year."
118+
),
119+
license="mit",
120+
size_in_GB=1.20,
121+
sources=ModelSource(hf="qdrant/gte-large-onnx"),
122+
model_file="model.onnx",
123+
),
112124
]
113125

114126

fastembed/text/text_embedding.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -90,29 +90,24 @@ def __init__(
9090
super().__init__(model_name, cache_dir, threads, **kwargs)
9191
if model_name == "nomic-ai/nomic-embed-text-v1.5-Q":
9292
warnings.warn(
93-
"The model 'nomic-ai/nomic-embed-text-v1.5-Q' has been updated on HuggingFace. "
94-
"Please review the latest documentation and release notes to ensure compatibility with your workflow. ",
95-
UserWarning,
96-
stacklevel=2,
97-
)
98-
if model_name == "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2":
99-
warnings.warn(
100-
"The model 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' has been updated to "
101-
"include a mean pooling layer. Please ensure your usage aligns with the new functionality. "
102-
"Support for the previous version without mean pooling will be removed as of version 0.5.2.",
93+
"The model 'nomic-ai/nomic-embed-text-v1.5-Q' has been updated on HuggingFace. Please review "
94+
"the latest documentation on HF and release notes to ensure compatibility with your workflow. ",
10395
UserWarning,
10496
stacklevel=2,
10597
)
10698
if model_name in {
107-
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
99+
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
100+
"thenlper/gte-large",
108101
"intfloat/multilingual-e5-large",
102+
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
109103
}:
110104
warnings.warn(
111-
f"{model_name} has been updated as of fastembed 0.5.2, outputs are now average pooled.",
105+
f"The model {model_name} now uses mean pooling instead of CLS embedding. "
106+
f"In order to preserve the previous behaviour, consider either pinning fastembed version to 0.5.1 or "
107+
"using `add_custom_model` functionality.",
112108
UserWarning,
113109
stacklevel=2,
114110
)
115-
116111
for EMBEDDING_MODEL_TYPE in self.EMBEDDINGS_REGISTRY:
117112
supported_models = EMBEDDING_MODEL_TYPE._list_supported_models()
118113
if any(model_name.lower() == model.model.lower() for model in supported_models):

tests/test_text_onnx_embeddings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
[0.0802303, 0.3700881, -4.3053818, 0.4431803, -0.271572]
5353
),
5454
"thenlper/gte-large": np.array(
55-
[-0.01920587, 0.00113156, -0.00708992, -0.00632304, -0.04025577]
55+
[-0.00986551, -0.00018734, 0.00605892, -0.03289612, -0.0387564],
5656
),
5757
"mixedbread-ai/mxbai-embed-large-v1": np.array(
5858
[0.02295546, 0.03196154, 0.016512, -0.04031524, -0.0219634]

0 commit comments

Comments
 (0)