Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions nemoguardrails/embeddings/providers/fastembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@
from .base import EmbeddingModel


def _is_missing_onnx_model_error(ex: Exception) -> bool:
message = str(ex)
return "model.onnx" in message and (
"Could not find model.onnx in" in message or "NO_SUCHFILE" in message or "File doesn't exist" in message
)


def get_executor():
from . import embeddings_executor

Expand Down Expand Up @@ -51,14 +58,16 @@ def __init__(self, embedding_model: str, **kwargs):

try:
self.model = Embedding(embedding_model, **kwargs)
except ValueError as ex:
except Exception as ex:
# Sometimes the cached model in the temporary folder gets removed,
# but the folder still exists, which causes an error. In this case,
# we fall back to an explicit cache directory.
if "Could not find model.onnx in" in str(ex):
self.model = Embedding(embedding_model, cache_dir=".cache", **kwargs)
if _is_missing_onnx_model_error(ex):
fallback_kwargs = dict(kwargs)
fallback_kwargs["cache_dir"] = ".cache"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we make this an absolute path eg:

fallback_kwargs["cache_dir"] = str(Path.home() / ".cache" / "fastembed")

my concern is that is we use a relative path, it will fail in docker containers since cwd is usually / or read only

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or alternatively, log a warning when overriding the value

self.model = Embedding(embedding_model, **fallback_kwargs)
else:
raise ex
raise

# Get the embedding dimension of the model
self.embedding_size = len(list(self.model.embed("test"))[0].tolist())
Expand Down
42 changes: 42 additions & 0 deletions tests/test_embeddings_fastembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,54 @@
import os

import pytest
from onnxruntime.capi.onnxruntime_pybind11_state import NoSuchFile

from nemoguardrails.embeddings.providers.fastembed import FastEmbedEmbeddingModel
Comment on lines 18 to 21

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Top-level import of onnxruntime.capi.onnxruntime_pybind11_state.NoSuchFile makes the entire test module unimportable in any environment that lacks onnxruntime. Since _is_missing_onnx_model_error only inspects the string message (not the exception type), there's no need to use the real ONNXRuntime class — a plain RuntimeError with the same message text exercises exactly the same code path without the hard dependency.

Suggested change
import pytest
from onnxruntime.capi.onnxruntime_pybind11_state import NoSuchFile
from nemoguardrails.embeddings.providers.fastembed import FastEmbedEmbeddingModel
import pytest
from nemoguardrails.embeddings.providers.fastembed import FastEmbedEmbeddingModel
Prompt To Fix With AI
This is a comment left during a code review.
Path: tests/test_embeddings_fastembed.py
Line: 18-21

Comment:
Top-level import of `onnxruntime.capi.onnxruntime_pybind11_state.NoSuchFile` makes the entire test module unimportable in any environment that lacks `onnxruntime`. Since `_is_missing_onnx_model_error` only inspects the string message (not the exception type), there's no need to use the real ONNXRuntime class — a plain `RuntimeError` with the same message text exercises exactly the same code path without the hard dependency.

```suggestion
import pytest

from nemoguardrails.embeddings.providers.fastembed import FastEmbedEmbeddingModel
```

How can I resolve this? If you propose a fix, please make it concise.


LIVE_TEST_MODE = os.environ.get("LIVE_TEST")


class _FakeEmbeddingVector:
def tolist(self):
return [0.1, 0.2]


def test_recovers_from_missing_onnxruntime_model_cache(monkeypatch):
calls = []

class FakeTextEmbedding:
def __init__(self, model_name, **kwargs):
calls.append((model_name, kwargs))
if len(calls) == 1:
raise NoSuchFile(
"[ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from /tmp/model.onnx failed. File doesn't exist"
)
Comment on lines +37 to +40

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 After removing the top-level NoSuchFile import, the test can use a plain RuntimeError with the identical message. The production helper only string-matches on the message, so this change preserves full test coverage while eliminating the onnxruntime hard dependency.

Suggested change
if len(calls) == 1:
raise NoSuchFile(
"[ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from /tmp/model.onnx failed. File doesn't exist"
)
if len(calls) == 1:
raise RuntimeError(
"[ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from /tmp/model.onnx failed. File doesn't exist"
)
Prompt To Fix With AI
This is a comment left during a code review.
Path: tests/test_embeddings_fastembed.py
Line: 37-40

Comment:
After removing the top-level `NoSuchFile` import, the test can use a plain `RuntimeError` with the identical message. The production helper only string-matches on the message, so this change preserves full test coverage while eliminating the `onnxruntime` hard dependency.

```suggestion
            if len(calls) == 1:
                raise RuntimeError(
                    "[ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from /tmp/model.onnx failed. File doesn't exist"
                )
```

How can I resolve this? If you propose a fix, please make it concise.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!


def embed(self, documents):
return [_FakeEmbeddingVector()]

monkeypatch.setattr("fastembed.TextEmbedding", FakeTextEmbedding)

model = FastEmbedEmbeddingModel("all-MiniLM-L6-v2")

assert model.embedding_size == 2
assert calls == [
("sentence-transformers/all-MiniLM-L6-v2", {}),
("sentence-transformers/all-MiniLM-L6-v2", {"cache_dir": ".cache"}),
]


def test_reraises_unrelated_fastembed_errors(monkeypatch):
class FakeTextEmbedding:
def __init__(self, model_name, **kwargs):
raise ValueError("unrelated failure")

monkeypatch.setattr("fastembed.TextEmbedding", FakeTextEmbedding)

with pytest.raises(ValueError, match="unrelated failure"):
FastEmbedEmbeddingModel("all-MiniLM-L6-v2")


@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.")
def test_sync_embeddings():
model = FastEmbedEmbeddingModel("all-MiniLM-L6-v2")
Expand Down
Loading