Skip to content

Commit 0cddf57

Browse files
authored
fix: fix Pinecone types + add py.typed (#1993)
1 parent d804dd7 commit 0cddf57

File tree

7 files changed

+31
-36
lines changed

7 files changed

+31
-36
lines changed

.github/workflows/pinecone.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,9 @@ jobs:
5252
- name: Install Hatch
5353
run: pip install --upgrade hatch
5454

55-
# TODO: Once this integration is properly typed, use hatch run test:types
56-
# https://github.com/deepset-ai/haystack-core-integrations/issues/1771
5755
- name: Lint
5856
if: matrix.python-version == '3.9' && runner.os == 'Linux'
59-
run: hatch run fmt-check && hatch run lint:typing
57+
run: hatch run fmt-check && hatch run test:types
6058

6159
- name: Generate docs
6260
if: matrix.python-version == '3.9' && runner.os == 'Linux'

integrations/pinecone/pyproject.toml

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,14 @@ all = 'pytest {args:tests}'
7474
# with pytest-xdist (https://pytest-xdist.readthedocs.io/en/stable/distribution.html)
7575
cov-retry = 'all -n auto --maxprocesses=2 --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
7676

77-
[tool.hatch.envs.lint]
78-
installer = "uv"
79-
detached = true
80-
dependencies = ["pip", "mypy>=1.0.0", "ruff>=0.0.243", "numpy"]
81-
[tool.hatch.envs.lint.scripts]
82-
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
77+
types = """mypy -p haystack_integrations.components.retrievers.pinecone \
78+
-p haystack_integrations.document_stores.pinecone {args}"""
79+
80+
[tool.mypy]
81+
install_types = true
82+
non_interactive = true
83+
check_untyped_defs = true
84+
disallow_incomplete_defs = true
8385

8486
[tool.hatch.metadata]
8587
allow-direct-references = true
@@ -165,8 +167,4 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
165167

166168
[tool.pytest.ini_options]
167169
minversion = "6.0"
168-
markers = ["unit: unit tests", "integration: integration tests"]
169-
170-
[[tool.mypy.overrides]]
171-
module = ["pinecone.*", "haystack.*", "haystack_integrations.*", "pytest.*", "pytest_asyncio.*"]
172-
ignore_missing_imports = true
170+
markers = ["integration: integration tests"]

integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def run(
116116
query_embedding: List[float],
117117
filters: Optional[Dict[str, Any]] = None,
118118
top_k: Optional[int] = None,
119-
):
119+
) -> Dict[str, List[Document]]:
120120
"""
121121
Retrieve documents from the `PineconeDocumentStore`, based on their dense embeddings.
122122
@@ -145,7 +145,7 @@ async def run_async(
145145
query_embedding: List[float],
146146
filters: Optional[Dict[str, Any]] = None,
147147
top_k: Optional[int] = None,
148-
):
148+
) -> Dict[str, List[Document]]:
149149
"""
150150
Asynchronously retrieve documents from the `PineconeDocumentStore`, based on their dense embeddings.
151151

integrations/pinecone/src/haystack_integrations/components/retrievers/py.typed

Whitespace-only changes.

integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
from copy import copy
5-
from typing import Any, Dict, List, Literal, Optional
5+
from typing import Any, Dict, List, Literal, Optional, Union
66

77
from haystack import default_from_dict, default_to_dict, logging
88
from haystack.dataclasses import Document
99
from haystack.document_stores.types import DuplicatePolicy
1010
from haystack.utils import Secret, deserialize_secrets_inplace
1111

1212
from pinecone import Pinecone, PineconeAsyncio, PodSpec, ServerlessSpec
13+
from pinecone.db_data import _Index, _IndexAsyncio
1314

1415
from .filters import _normalize_filters, _validate_filters
1516

@@ -70,8 +71,8 @@ def __init__(
7071
self.dimension = dimension
7172
self.index_name = index
7273

73-
self._index = None
74-
self._async_index = None
74+
self._index: Optional[_Index] = None
75+
self._async_index: Optional[_IndexAsyncio] = None
7576
self._dummy_vector = [-10.0] * self.dimension
7677

7778
def _initialize_index(self):
@@ -137,7 +138,7 @@ async def _initialize_async_index(self):
137138
await async_client.close()
138139

139140
@staticmethod
140-
def _convert_dict_spec_to_pinecone_object(spec: Dict[str, Any]):
141+
def _convert_dict_spec_to_pinecone_object(spec: Dict[str, Any]) -> Union[ServerlessSpec, PodSpec]:
141142
"""Convert the spec dictionary to a Pinecone spec object"""
142143

143144
if "serverless" in spec:
@@ -447,12 +448,12 @@ def _convert_query_result_to_documents(self, query_result: Dict[str, Any]) -> Li
447448
return documents
448449

449450
@staticmethod
450-
def _discard_invalid_meta(document: Document):
451+
def _discard_invalid_meta(document: Document) -> None:
451452
"""
452453
Remove metadata fields with unsupported types from the document.
453454
"""
454455

455-
def valid_type(value: Any):
456+
def valid_type(value: Any) -> bool:
456457
return isinstance(value, METADATA_SUPPORTED_TYPES) or (
457458
isinstance(value, list) and all(isinstance(i, str) for i in value)
458459
)
@@ -475,8 +476,6 @@ def valid_type(value: Any):
475476

476477
document.meta = new_meta
477478

478-
return document
479-
480479
def _convert_documents_to_pinecone_format(self, documents: List[Document]) -> List[Dict[str, Any]]:
481480
documents_for_pinecone = []
482481
for document in documents:
@@ -491,7 +490,7 @@ def _convert_documents_to_pinecone_format(self, documents: List[Document]) -> Li
491490
if document.meta:
492491
self._discard_invalid_meta(document)
493492

494-
doc_for_pinecone = {"id": document.id, "values": embedding, "metadata": dict(document.meta)}
493+
doc_for_pinecone: Dict[str, Any] = {"id": document.id, "values": embedding, "metadata": dict(document.meta)}
495494

496495
# we save content as metadata
497496
if document.content is not None:
@@ -506,10 +505,10 @@ def _convert_documents_to_pinecone_format(self, documents: List[Document]) -> Li
506505
)
507506
if hasattr(document, "sparse_embedding") and document.sparse_embedding is not None:
508507
logger.warning(
509-
"Document %s has the `sparse_embedding` field set,"
508+
"Document {document_id} has the `sparse_embedding` field set,"
510509
"but storing sparse embeddings in Pinecone is not currently supported."
511510
"The `sparse_embedding` field will be ignored.",
512-
document.id,
511+
document_id=document.id,
513512
)
514513

515514
documents_for_pinecone.append(doc_for_pinecone)

integrations/pinecone/src/haystack_integrations/document_stores/py.typed

Whitespace-only changes.

integrations/pinecone/tests/test_document_store.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,13 @@ def test_discard_invalid_meta_invalid():
158158
],
159159
},
160160
)
161-
pinecone_doc = PineconeDocumentStore._discard_invalid_meta(invalid_metadata_doc)
161+
PineconeDocumentStore._discard_invalid_meta(invalid_metadata_doc)
162162

163-
assert pinecone_doc.meta["source_id"] == "62049ba1d1e1d5ebb1f6230b0b00c5356b8706c56e0b9c36b1dfc86084cd75f0"
164-
assert pinecone_doc.meta["page_number"] == 1
165-
assert pinecone_doc.meta["split_id"] == 0
166-
assert pinecone_doc.meta["split_idx_start"] == 0
167-
assert "_split_overlap" not in pinecone_doc.meta
163+
assert invalid_metadata_doc.meta["source_id"] == "62049ba1d1e1d5ebb1f6230b0b00c5356b8706c56e0b9c36b1dfc86084cd75f0"
164+
assert invalid_metadata_doc.meta["page_number"] == 1
165+
assert invalid_metadata_doc.meta["split_id"] == 0
166+
assert invalid_metadata_doc.meta["split_idx_start"] == 0
167+
assert "_split_overlap" not in invalid_metadata_doc.meta
168168

169169

170170
def test_discard_invalid_meta_valid():
@@ -175,10 +175,10 @@ def test_discard_invalid_meta_valid():
175175
"page_number": 1,
176176
},
177177
)
178-
pinecone_doc = PineconeDocumentStore._discard_invalid_meta(valid_metadata_doc)
178+
PineconeDocumentStore._discard_invalid_meta(valid_metadata_doc)
179179

180-
assert pinecone_doc.meta["source_id"] == "62049ba1d1e1d5ebb1f6230b0b00c5356b8706c56e0b9c36b1dfc86084cd75f0"
181-
assert pinecone_doc.meta["page_number"] == 1
180+
assert valid_metadata_doc.meta["source_id"] == "62049ba1d1e1d5ebb1f6230b0b00c5356b8706c56e0b9c36b1dfc86084cd75f0"
181+
assert valid_metadata_doc.meta["page_number"] == 1
182182

183183

184184
def test_convert_meta_to_int():

0 commit comments

Comments
 (0)