1
1
import inspect
2
2
from itertools import islice
3
- from typing import Any , AsyncGenerator , ClassVar , Dict , Generator , List , Optional , Set , Union
3
+ from typing import Any , AsyncGenerator , ClassVar , Dict , Generator , List , Optional , Set , Tuple , Union
4
4
5
5
import numpy as np
6
6
import qdrant_client
18
18
from .converters import (
19
19
DENSE_VECTORS_NAME ,
20
20
SPARSE_VECTORS_NAME ,
21
+ QdrantPoint ,
21
22
convert_haystack_documents_to_qdrant_points ,
22
23
convert_id ,
23
24
convert_qdrant_point_to_haystack_document ,
@@ -34,7 +35,7 @@ class QdrantStoreError(DocumentStoreError):
34
35
FilterType = Dict [str , Union [Dict [str , Any ], List [Any ], str , int , float , bool ]]
35
36
36
37
37
- def get_batches_from_generator (iterable , n ) :
38
+ def get_batches_from_generator (iterable : List , n : int ) -> Generator :
38
39
"""
39
40
Batch elements of an iterable into fixed-length chunks or blocks.
40
41
"""
@@ -127,10 +128,10 @@ def __init__(
127
128
write_batch_size : int = 100 ,
128
129
scroll_size : int = 10_000 ,
129
130
payload_fields_to_index : Optional [List [dict ]] = None ,
130
- ):
131
+ ) -> None :
131
132
"""
132
133
:param location:
133
- If `memory` - use in-memory Qdrant instance.
134
+ If `": memory:" ` - use in-memory Qdrant instance.
134
135
If `str` - use it as a URL parameter.
135
136
If `None` - use default values for host and port.
136
137
:param url:
@@ -164,7 +165,7 @@ def __init__(
164
165
Dimension of the embeddings.
165
166
:param on_disk:
166
167
Whether to store the collection on disk.
167
- :param use_sparse_embedding :
168
+ :param use_sparse_embeddings :
168
169
If set to `True`, enables support for sparse embeddings.
169
170
:param sparse_idf:
170
171
If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
@@ -232,7 +233,6 @@ def __init__(
232
233
self .path = path
233
234
self .force_disable_check_same_thread = force_disable_check_same_thread
234
235
self .metadata = metadata or {}
235
- self .api_key = api_key
236
236
237
237
# Store the Qdrant collection specific attributes
238
238
self .shard_number = shard_number
@@ -258,9 +258,10 @@ def __init__(
258
258
self .write_batch_size = write_batch_size
259
259
self .scroll_size = scroll_size
260
260
261
- def _initialize_client (self ):
261
+ def _initialize_client (self ) -> None :
262
262
if self ._client is None :
263
263
client_params = self ._prepare_client_params ()
264
+ # This step adds the api-key and User-Agent to metadata
264
265
self ._client = qdrant_client .QdrantClient (** client_params )
265
266
# Make sure the collection is properly set up
266
267
self ._set_up_collection (
@@ -274,7 +275,7 @@ def _initialize_client(self):
274
275
self .payload_fields_to_index ,
275
276
)
276
277
277
- async def _initialize_async_client (self ):
278
+ async def _initialize_async_client (self ) -> None :
278
279
"""
279
280
Returns the asynchronous Qdrant client, initializing it if necessary.
280
281
"""
@@ -628,8 +629,6 @@ def get_documents_by_id(
628
629
629
630
:param ids:
630
631
A list of document IDs to retrieve.
631
- :param index:
632
- The name of the index to retrieve documents from.
633
632
:returns:
634
633
A list of documents.
635
634
"""
@@ -661,8 +660,6 @@ async def get_documents_by_id_async(
661
660
662
661
:param ids:
663
662
A list of document IDs to retrieve.
664
- :param index:
665
- The name of the index to retrieve documents from.
666
663
:returns:
667
664
A list of documents.
668
665
"""
@@ -1210,7 +1207,7 @@ def get_distance(self, similarity: str) -> rest.Distance:
1210
1207
)
1211
1208
raise QdrantStoreError (msg ) from ke
1212
1209
1213
- def _create_payload_index (self , collection_name : str , payload_fields_to_index : Optional [List [dict ]] = None ):
1210
+ def _create_payload_index (self , collection_name : str , payload_fields_to_index : Optional [List [dict ]] = None ) -> None :
1214
1211
"""
1215
1212
Create payload index for the collection if payload_fields_to_index is provided
1216
1213
See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
@@ -1229,7 +1226,7 @@ def _create_payload_index(self, collection_name: str, payload_fields_to_index: O
1229
1226
1230
1227
async def _create_payload_index_async (
1231
1228
self , collection_name : str , payload_fields_to_index : Optional [List [dict ]] = None
1232
- ):
1229
+ ) -> None :
1233
1230
"""
1234
1231
Asynchronously create payload index for the collection if payload_fields_to_index is provided
1235
1232
See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
@@ -1257,7 +1254,7 @@ def _set_up_collection(
1257
1254
sparse_idf : bool ,
1258
1255
on_disk : bool = False ,
1259
1256
payload_fields_to_index : Optional [List [dict ]] = None ,
1260
- ):
1257
+ ) -> None :
1261
1258
"""
1262
1259
Sets up the Qdrant collection with the specified parameters.
1263
1260
:param collection_name:
@@ -1313,7 +1310,7 @@ async def _set_up_collection_async(
1313
1310
sparse_idf : bool ,
1314
1311
on_disk : bool = False ,
1315
1312
payload_fields_to_index : Optional [List [dict ]] = None ,
1316
- ):
1313
+ ) -> None :
1317
1314
"""
1318
1315
Asynchronously sets up the Qdrant collection with the specified parameters.
1319
1316
:param collection_name:
@@ -1367,7 +1364,7 @@ def recreate_collection(
1367
1364
on_disk : Optional [bool ] = None ,
1368
1365
use_sparse_embeddings : Optional [bool ] = None ,
1369
1366
sparse_idf : bool = False ,
1370
- ):
1367
+ ) -> None :
1371
1368
"""
1372
1369
Recreates the Qdrant collection with the specified parameters.
1373
1370
@@ -1410,7 +1407,7 @@ async def recreate_collection_async(
1410
1407
on_disk : Optional [bool ] = None ,
1411
1408
use_sparse_embeddings : Optional [bool ] = None ,
1412
1409
sparse_idf : bool = False ,
1413
- ):
1410
+ ) -> None :
1414
1411
"""
1415
1412
Asynchronously recreates the Qdrant collection with the specified parameters.
1416
1413
@@ -1449,7 +1446,7 @@ def _handle_duplicate_documents(
1449
1446
self ,
1450
1447
documents : List [Document ],
1451
1448
policy : DuplicatePolicy = None ,
1452
- ):
1449
+ ) -> List [ Document ] :
1453
1450
"""
1454
1451
Checks whether any of the passed documents is already existing in the chosen index and returns a list of
1455
1452
documents that are not in the index yet.
@@ -1476,7 +1473,7 @@ async def _handle_duplicate_documents_async(
1476
1473
self ,
1477
1474
documents : List [Document ],
1478
1475
policy : DuplicatePolicy = None ,
1479
- ):
1476
+ ) -> List [ Document ] :
1480
1477
"""
1481
1478
Asynchronously checks whether any of the passed documents is already existing
1482
1479
in the chosen index and returns a list of
@@ -1521,7 +1518,7 @@ def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]
1521
1518
1522
1519
return _documents
1523
1520
1524
- def _prepare_collection_params (self ):
1521
+ def _prepare_collection_params (self ) -> Dict [ str , Any ] :
1525
1522
"""
1526
1523
Prepares the common parameters for collection creation.
1527
1524
"""
@@ -1537,7 +1534,7 @@ def _prepare_collection_params(self):
1537
1534
"init_from" : self .init_from ,
1538
1535
}
1539
1536
1540
- def _prepare_client_params (self ):
1537
+ def _prepare_client_params (self ) -> Dict [ str , Any ] :
1541
1538
"""
1542
1539
Prepares the common parameters for client initialization.
1543
1540
@@ -1554,7 +1551,10 @@ def _prepare_client_params(self):
1554
1551
"timeout" : self .timeout ,
1555
1552
"host" : self .host ,
1556
1553
"path" : self .path ,
1557
- "metadata" : self .metadata ,
1554
+ # NOTE: We purposefully expand the fields of self.metadata to avoid modifying the original self.metadata
1555
+ # class attribute. For example, the resolved api key is added to metadata by the QdrantClient class
1556
+ # when using a hosted Qdrant service, which means running to_dict() exposes the api key.
1557
+ "metadata" : {** self .metadata },
1558
1558
"force_disable_check_same_thread" : self .force_disable_check_same_thread ,
1559
1559
}
1560
1560
@@ -1565,7 +1565,7 @@ def _prepare_collection_config(
1565
1565
on_disk : Optional [bool ] = None ,
1566
1566
use_sparse_embeddings : Optional [bool ] = None ,
1567
1567
sparse_idf : bool = False ,
1568
- ):
1568
+ ) -> Tuple [ Dict [ str , rest . VectorParams ], Optional [ Dict [ str , rest . SparseVectorParams ]]] :
1569
1569
"""
1570
1570
Prepares the configuration for creating or recreating a Qdrant collection.
1571
1571
@@ -1595,9 +1595,12 @@ def _prepare_collection_config(
1595
1595
1596
1596
return vectors_config , sparse_vectors_config
1597
1597
1598
- def _validate_filters (self , filters : Optional [Union [Dict [str , Any ], rest .Filter ]] = None ):
1598
+ def _validate_filters (self , filters : Optional [Union [Dict [str , Any ], rest .Filter ]] = None ) -> None :
1599
1599
"""
1600
1600
Validates the filters provided for querying.
1601
+
1602
+ :param filters: Filters to validate. Can be a dictionary or an instance of `qdrant_client.http.models.Filter`.
1603
+ :raises ValueError: If the filters are not in the correct format or syntax.
1601
1604
"""
1602
1605
if filters and not isinstance (filters , dict ) and not isinstance (filters , rest .Filter ):
1603
1606
msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
@@ -1607,7 +1610,7 @@ def _validate_filters(self, filters: Optional[Union[Dict[str, Any], rest.Filter]
1607
1610
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
1608
1611
raise ValueError (msg )
1609
1612
1610
- def _process_query_point_results (self , results , scale_score : bool = False ):
1613
+ def _process_query_point_results (self , results : List [ QdrantPoint ] , scale_score : bool = False ) -> List [ Document ] :
1611
1614
"""
1612
1615
Processes query results from Qdrant.
1613
1616
"""
@@ -1627,7 +1630,7 @@ def _process_query_point_results(self, results, scale_score: bool = False):
1627
1630
1628
1631
return documents
1629
1632
1630
- def _process_group_results (self , groups ) :
1633
+ def _process_group_results (self , groups : List [ rest . PointGroup ]) -> List [ Document ] :
1631
1634
"""
1632
1635
Processes grouped query results from Qdrant.
1633
1636
@@ -1647,7 +1650,7 @@ def _validate_collection_compatibility(
1647
1650
collection_info ,
1648
1651
distance ,
1649
1652
embedding_dim : int ,
1650
- ):
1653
+ ) -> None :
1651
1654
"""
1652
1655
Validates that an existing collection is compatible with the current configuration.
1653
1656
"""
0 commit comments