9
9
from haystack .document_stores .types import DuplicatePolicy
10
10
from haystack .utils import Secret , deserialize_secrets_inplace
11
11
12
- from .astra_client import AstraClient
12
+ from .astra_client import AstraClient , QueryResponse
13
13
from .errors import AstraDocumentStoreFilterError
14
14
from .filters import _convert_filters
15
15
@@ -150,7 +150,7 @@ def write_documents(
150
150
self ,
151
151
documents : List [Document ],
152
152
policy : DuplicatePolicy = DuplicatePolicy .NONE ,
153
- ):
153
+ ) -> int :
154
154
"""
155
155
Indexes documents for later queries.
156
156
@@ -176,7 +176,7 @@ def write_documents(
176
176
177
177
batch_size = MAX_BATCH_SIZE
178
178
179
- def _convert_input_document (document : Union [dict , Document ]):
179
+ def _convert_input_document (document : Union [dict , Document ]) -> Dict [ str , Any ] :
180
180
if isinstance (document , Document ):
181
181
document_dict = document .to_dict (flatten = False )
182
182
elif isinstance (document , dict ):
@@ -217,7 +217,7 @@ def _convert_input_document(document: Union[dict, Document]):
217
217
documents_to_write = [_convert_input_document (doc ) for doc in documents ]
218
218
219
219
duplicate_documents = []
220
- new_documents : List [Document ] = []
220
+ new_documents : List [Dict ] = []
221
221
i = 0
222
222
while i < len (documents_to_write ):
223
223
doc = documents_to_write [i ]
@@ -238,7 +238,7 @@ def _convert_input_document(document: Union[dict, Document]):
238
238
if policy == DuplicatePolicy .SKIP :
239
239
if len (new_documents ) > 0 :
240
240
for batch in _batches (new_documents , batch_size ):
241
- inserted_ids = self .index .insert (batch ) # type: ignore
241
+ inserted_ids = self .index .insert (batch )
242
242
insertion_counter += len (inserted_ids )
243
243
logger .info (f"write_documents inserted documents with id { inserted_ids } " )
244
244
else :
@@ -247,7 +247,7 @@ def _convert_input_document(document: Union[dict, Document]):
247
247
elif policy == DuplicatePolicy .OVERWRITE :
248
248
if len (new_documents ) > 0 :
249
249
for batch in _batches (new_documents , batch_size ):
250
- inserted_ids = self .index .insert (batch ) # type: ignore
250
+ inserted_ids = self .index .insert (batch )
251
251
insertion_counter += len (inserted_ids )
252
252
logger .info (f"write_documents inserted documents with id { inserted_ids } " )
253
253
else :
@@ -256,7 +256,7 @@ def _convert_input_document(document: Union[dict, Document]):
256
256
if len (duplicate_documents ) > 0 :
257
257
updated_ids = []
258
258
for duplicate_doc in duplicate_documents :
259
- updated = self .index .update_document (duplicate_doc , "_id" ) # type: ignore
259
+ updated = self .index .update_document (duplicate_doc , "_id" )
260
260
if updated :
261
261
updated_ids .append (duplicate_doc ["_id" ])
262
262
insertion_counter = insertion_counter + len (updated_ids )
@@ -267,7 +267,7 @@ def _convert_input_document(document: Union[dict, Document]):
267
267
elif policy == DuplicatePolicy .FAIL :
268
268
if len (new_documents ) > 0 :
269
269
for batch in _batches (new_documents , batch_size ):
270
- inserted_ids = self .index .insert (batch ) # type: ignore
270
+ inserted_ids = self .index .insert (batch )
271
271
insertion_counter = insertion_counter + len (inserted_ids )
272
272
logger .info (f"write_documents inserted documents with id { inserted_ids } " )
273
273
else :
@@ -326,15 +326,18 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc
326
326
return documents
327
327
328
328
@staticmethod
329
- def _get_result_to_documents (results ) -> List [Document ]:
329
+ def _get_result_to_documents (results : QueryResponse ) -> List [Document ]:
330
330
documents = []
331
331
for match in results .matches :
332
+ metadata = match .metadata
333
+ blob = metadata .pop ("blob" , None ) if metadata else None
334
+ meta = metadata .pop ("meta" , {}) if metadata else {}
332
335
document = Document (
333
336
content = match .text ,
334
337
id = match .document_id ,
335
338
embedding = match .values ,
336
- blob = match . metadata . pop ( " blob" , None ) ,
337
- meta = match . metadata . pop ( " meta" , None ) ,
339
+ blob = blob ,
340
+ meta = meta ,
338
341
score = match .score ,
339
342
)
340
343
documents .append (document )
0 commit comments