Skip to content

Commit d1864a7

Browse files
authored
fix: fix Fastembed types + add py.typed (#1954)
1 parent 5b094f9 commit d1864a7

File tree

10 files changed

+20
-35
lines changed

10 files changed

+20
-35
lines changed

.github/workflows/fastembed.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,9 @@ jobs:
3333

3434
- name: Install Hatch
3535
run: pip install --upgrade hatch
36-
37-
# TODO: Once this integration is properly typed, use hatch run test:types
38-
# https://github.com/deepset-ai/haystack-core-integrations/issues/1771
3936
- name: Lint
4037
if: matrix.python-version == '3.9' && runner.os == 'Linux'
41-
run: hatch run fmt-check && hatch run lint:typing
38+
run: hatch run fmt-check && hatch run test:types
4239

4340
- name: Generate docs
4441
if: matrix.python-version == '3.9' && runner.os == 'Linux'

integrations/fastembed/pyproject.toml

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,14 @@ unit = 'pytest -m "not integration" {args:tests}'
6565
integration = 'pytest -m "integration" {args:tests}'
6666
all = 'pytest {args:tests}'
6767
cov-retry = 'all --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
68-
types = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
68+
types = """mypy -p haystack_integrations.components.embedders.fastembed \
69+
-p haystack_integrations.components.rankers.fastembed {args}"""
6970

70-
# TODO: remove lint environment once this integration is properly typed
71-
# test environment should be used instead
72-
# https://github.com/deepset-ai/haystack-core-integrations/issues/1771
73-
[tool.hatch.envs.lint]
74-
installer = "uv"
75-
detached = true
76-
dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243", "numpy"]
77-
[tool.hatch.envs.lint.scripts]
78-
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
71+
[tool.mypy]
72+
install_types = true
73+
non_interactive = true
74+
check_untyped_defs = true
75+
disallow_incomplete_defs = true
7976

8077
[tool.black]
8178
target-version = ["py38"]
@@ -161,14 +158,5 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
161158

162159
[tool.pytest.ini_options]
163160
minversion = "6.0"
164-
markers = ["unit: unit tests", "integration: integration tests"]
165-
166-
[[tool.mypy.overrides]]
167-
module = [
168-
"haystack.*",
169-
"haystack_integrations.*",
170-
"fastembed.*",
171-
"pytest.*",
172-
"numpy.*",
173-
]
174-
ignore_missing_imports = true
161+
markers = ["integration: integration tests"]
162+

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def get_embedding_backend(
2020
cache_dir: Optional[str] = None,
2121
threads: Optional[int] = None,
2222
local_files_only: bool = False,
23-
):
23+
) -> "_FastembedEmbeddingBackend":
2424
embedding_backend_id = f"{model_name}{cache_dir}{threads}"
2525

2626
if embedding_backend_id in _FastembedEmbeddingBackendFactory._instances:
@@ -49,7 +49,7 @@ def __init__(
4949
model_name=model_name, cache_dir=cache_dir, threads=threads, local_files_only=local_files_only
5050
)
5151

52-
def embed(self, data: List[str], progress_bar=True, **kwargs) -> List[List[float]]:
52+
def embed(self, data: List[str], progress_bar: bool = True, **kwargs: Any) -> List[List[float]]:
5353
# the embed method returns a Iterable[np.ndarray], so we convert it to a list of lists
5454
embeddings = []
5555
embeddings_iterable = self.model.embed(data, **kwargs)
@@ -74,7 +74,7 @@ def get_embedding_backend(
7474
threads: Optional[int] = None,
7575
local_files_only: bool = False,
7676
model_kwargs: Optional[Dict[str, Any]] = None,
77-
):
77+
) -> "_FastembedSparseEmbeddingBackend":
7878
embedding_backend_id = f"{model_name}{cache_dir}{threads}{local_files_only}{model_kwargs}"
7979

8080
if embedding_backend_id in _FastembedSparseEmbeddingBackendFactory._instances:
@@ -114,7 +114,7 @@ def __init__(
114114
**model_kwargs,
115115
)
116116

117-
def embed(self, data: List[List[str]], progress_bar=True, **kwargs) -> List[SparseEmbedding]:
117+
def embed(self, data: List[str], progress_bar: bool = True, **kwargs: Any) -> List[SparseEmbedding]:
118118
# The embed method returns a Iterable[SparseEmbedding], so we convert to Haystack SparseEmbedding type.
119119
# Each SparseEmbedding contains an `indices` key containing a list of int and
120120
# an `values` key containing a list of floats.

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]:
150150
return texts_to_embed
151151

152152
@component.output_types(documents=List[Document])
153-
def run(self, documents: List[Document]):
153+
def run(self, documents: List[Document]) -> Dict[str, List[Document]]:
154154
"""
155155
Embeds a list of Documents.
156156

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]:
141141
return texts_to_embed
142142

143143
@component.output_types(documents=List[Document])
144-
def run(self, documents: List[Document]):
144+
def run(self, documents: List[Document]) -> Dict[str, List[Document]]:
145145
"""
146146
Embeds a list of Documents.
147147

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def warm_up(self):
9494
)
9595

9696
@component.output_types(sparse_embedding=SparseEmbedding)
97-
def run(self, text: str):
97+
def run(self, text: str) -> Dict[str, SparseEmbedding]:
9898
"""
9999
Embeds text using the Fastembed model.
100100

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def warm_up(self):
9696
)
9797

9898
@component.output_types(embedding=List[float])
99-
def run(self, text: str):
99+
def run(self, text: str) -> Dict[str, List[float]]:
100100
"""
101101
Embeds text using the Fastembed model.
102102

integrations/fastembed/src/haystack_integrations/components/embedders/py.typed

Whitespace-only changes.

integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def __init__(
7676
self.local_files_only = local_files_only
7777
self.meta_fields_to_embed = meta_fields_to_embed or []
7878
self.meta_data_separator = meta_data_separator
79-
self._model = None
79+
self._model: Optional[TextCrossEncoder] = None
8080

8181
def to_dict(self) -> Dict[str, Any]:
8282
"""
@@ -140,7 +140,7 @@ def _prepare_fastembed_input_docs(self, documents: List[Document]) -> List[str]:
140140
return concatenated_input_list
141141

142142
@component.output_types(documents=List[Document])
143-
def run(self, query: str, documents: List[Document], top_k: Optional[int] = None):
143+
def run(self, query: str, documents: List[Document], top_k: Optional[int] = None) -> Dict[str, List[Document]]:
144144
"""
145145
Returns a list of documents ranked by their similarity to the given query, using FastEmbed.
146146

integrations/fastembed/src/haystack_integrations/components/rankers/py.typed

Whitespace-only changes.

0 commit comments

Comments
 (0)