6
6
from typing import Any , Dict , List , Optional , Type , Union
7
7
8
8
from azure .core .credentials import AzureKeyCredential
9
- from azure .core .exceptions import ClientAuthenticationError , HttpResponseError , ResourceNotFoundError
9
+ from azure .core .exceptions import (
10
+ ClientAuthenticationError ,
11
+ HttpResponseError ,
12
+ ResourceNotFoundError ,
13
+ )
10
14
from azure .core .pipeline .policies import UserAgentPolicy
11
15
from azure .identity import DefaultAzureCredential
12
16
from azure .search .documents import SearchClient
67
71
68
72
DEFAULT_VECTOR_SEARCH = VectorSearch (
69
73
profiles = [
70
- VectorSearchProfile (name = "default-vector-config" , algorithm_configuration_name = "cosine-algorithm-config" )
74
+ VectorSearchProfile (
75
+ name = "default-vector-config" ,
76
+ algorithm_configuration_name = "cosine-algorithm-config" ,
77
+ )
71
78
],
72
79
algorithms = [
73
80
HnswAlgorithmConfiguration (
@@ -94,6 +101,7 @@ def __init__(
94
101
embedding_dimension : int = 768 ,
95
102
metadata_fields : Optional [Dict [str , Union [SearchField , type ]]] = None ,
96
103
vector_search_configuration : Optional [VectorSearch ] = None ,
104
+ include_search_metadata : bool = False ,
97
105
** index_creation_kwargs : Any ,
98
106
):
99
107
"""
@@ -123,6 +131,10 @@ def __init__(
123
131
:param vector_search_configuration: Configuration option related to vector search.
124
132
Default configuration uses the HNSW algorithm with cosine similarity to handle vector searches.
125
133
134
+ :param include_search_metadata: Whether to include Azure AI Search metadata fields
135
+ in the returned documents. When set to True, the `meta` field of the returned
136
+ documents will contain the @search.score, @search.reranker_score, @search.highlights,
137
+ @search.captions, and other fields returned by Azure AI Search.
126
138
:param index_creation_kwargs: Optional keyword parameters to be passed to `SearchIndex` class
127
139
during index creation. Some of the supported parameters:
128
140
- `semantic_search`: Defines semantic configuration of the search index. This parameter is needed
@@ -143,6 +155,7 @@ def __init__(
143
155
self ._dummy_vector = [- 10.0 ] * self ._embedding_dimension
144
156
self ._metadata_fields = self ._normalize_metadata_index_fields (metadata_fields )
145
157
self ._vector_search_configuration = vector_search_configuration or DEFAULT_VECTOR_SEARCH
158
+ self ._include_search_metadata = include_search_metadata
146
159
self ._index_creation_kwargs = index_creation_kwargs
147
160
148
161
@property
@@ -256,7 +269,9 @@ def _create_index(self) -> None:
256
269
self ._index_client .create_index (index )
257
270
258
271
@staticmethod
259
- def _serialize_index_creation_kwargs (index_creation_kwargs : Dict [str , Any ]) -> Dict [str , Any ]:
272
+ def _serialize_index_creation_kwargs (
273
+ index_creation_kwargs : Dict [str , Any ],
274
+ ) -> Dict [str , Any ]:
260
275
"""
261
276
Serializes the index creation kwargs to a dictionary.
262
277
This is needed to handle serialization of Azure AI Search classes
@@ -300,7 +315,7 @@ def to_dict(self) -> Dict[str, Any]:
300
315
"""
301
316
return default_to_dict (
302
317
self ,
303
- azure_endpoint = self ._azure_endpoint .to_dict () if self ._azure_endpoint else None ,
318
+ azure_endpoint = ( self ._azure_endpoint .to_dict () if self ._azure_endpoint else None ) ,
304
319
api_key = self ._api_key .to_dict () if self ._api_key else None ,
305
320
index_name = self ._index_name ,
306
321
embedding_dimension = self ._embedding_dimension ,
@@ -423,19 +438,28 @@ def _convert_search_result_to_documents(self, azure_docs: List[Dict[str, Any]])
423
438
424
439
for azure_doc in azure_docs :
425
440
embedding = azure_doc .get ("embedding" )
441
+ score = azure_doc .get ("@search.score" , None )
426
442
if embedding == self ._dummy_vector :
427
443
embedding = None
444
+ meta = {}
428
445
429
446
# Anything besides default fields (id, content, and embedding) is considered metadata
430
- meta = {
431
- key : value
432
- for key , value in azure_doc .items ()
433
- if key not in ["id" , "content" , "embedding" ] and key in self ._index_fields and value is not None
434
- }
447
+ if self ._include_search_metadata :
448
+ meta = {key : value for key , value in azure_doc .items () if key not in ["id" , "content" , "embedding" ]}
449
+ else :
450
+ meta = {
451
+ key : value
452
+ for key , value in azure_doc .items ()
453
+ if key not in ["id" , "content" , "embedding" ] and key in self ._index_fields and value is not None
454
+ }
435
455
436
456
# Create the document with meta only if it's non-empty
437
457
doc = Document (
438
- id = azure_doc ["id" ], content = azure_doc ["content" ], embedding = embedding , meta = meta if meta else {}
458
+ id = azure_doc ["id" ],
459
+ content = azure_doc ["content" ],
460
+ embedding = embedding ,
461
+ meta = meta ,
462
+ score = score ,
439
463
)
440
464
441
465
documents .append (doc )
0 commit comments