Skip to content

Commit 62f8b58

Browse files
Add query rewriting option (#2437)
Upgrade Search SDK and add support for query rewriting by setting AZURE_SEARCH_QUERY_REWRITING --------- Co-authored-by: Pamela Fox <[email protected]>
1 parent b6f9b76 commit 62f8b58

File tree

92 files changed

+197
-12
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+197
-12
lines changed

.azdo/pipelines/azure-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ steps:
5959
AZURE_SEARCH_QUERY_LANGUAGE: $(AZURE_SEARCH_QUERY_LANGUAGE)
6060
AZURE_SEARCH_QUERY_SPELLER: $(AZURE_SEARCH_QUERY_SPELLER)
6161
AZURE_SEARCH_SEMANTIC_RANKER: $(AZURE_SEARCH_SEMANTIC_RANKER)
62+
AZURE_SEARCH_QUERY_REWRITING: $(AZURE_SEARCH_QUERY_REWRITING)
6263
AZURE_STORAGE_ACCOUNT: $(AZURE_STORAGE_ACCOUNT)
6364
AZURE_STORAGE_RESOURCE_GROUP: $(AZURE_STORAGE_RESOURCE_GROUP)
6465
AZURE_STORAGE_SKU: $(AZURE_STORAGE_SKU)

.github/workflows/azure-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ jobs:
4949
AZURE_SEARCH_QUERY_LANGUAGE: ${{ vars.AZURE_SEARCH_QUERY_LANGUAGE }}
5050
AZURE_SEARCH_QUERY_SPELLER: ${{ vars.AZURE_SEARCH_QUERY_SPELLER }}
5151
AZURE_SEARCH_SEMANTIC_RANKER: ${{ vars.AZURE_SEARCH_SEMANTIC_RANKER }}
52+
AZURE_SEARCH_QUERY_REWRITING: ${{ vars.AZURE_SEARCH_QUERY_REWRITING }}
5253
AZURE_STORAGE_ACCOUNT: ${{ vars.AZURE_STORAGE_ACCOUNT }}
5354
AZURE_STORAGE_RESOURCE_GROUP: ${{ vars.AZURE_STORAGE_RESOURCE_GROUP }}
5455
AZURE_STORAGE_SKU: ${{ vars.AZURE_STORAGE_SKU }}

app/backend/app.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
CONFIG_INGESTER,
7070
CONFIG_LANGUAGE_PICKER_ENABLED,
7171
CONFIG_OPENAI_CLIENT,
72+
CONFIG_QUERY_REWRITING_ENABLED,
7273
CONFIG_SEARCH_CLIENT,
7374
CONFIG_SEMANTIC_RANKER_DEPLOYED,
7475
CONFIG_SPEECH_INPUT_ENABLED,
@@ -291,6 +292,7 @@ def config():
291292
{
292293
"showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED],
293294
"showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
295+
"showQueryRewritingOption": current_app.config[CONFIG_QUERY_REWRITING_ENABLED],
294296
"showVectorOption": current_app.config[CONFIG_VECTOR_SEARCH_ENABLED],
295297
"showUserUpload": current_app.config[CONFIG_USER_UPLOAD_ENABLED],
296298
"showLanguagePicker": current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED],
@@ -453,6 +455,7 @@ async def setup_clients():
453455
AZURE_SEARCH_QUERY_LANGUAGE = os.getenv("AZURE_SEARCH_QUERY_LANGUAGE") or "en-us"
454456
AZURE_SEARCH_QUERY_SPELLER = os.getenv("AZURE_SEARCH_QUERY_SPELLER") or "lexicon"
455457
AZURE_SEARCH_SEMANTIC_RANKER = os.getenv("AZURE_SEARCH_SEMANTIC_RANKER", "free").lower()
458+
AZURE_SEARCH_QUERY_REWRITING = os.getenv("AZURE_SEARCH_QUERY_REWRITING", "false").lower()
456459

457460
AZURE_SPEECH_SERVICE_ID = os.getenv("AZURE_SPEECH_SERVICE_ID")
458461
AZURE_SPEECH_SERVICE_LOCATION = os.getenv("AZURE_SPEECH_SERVICE_LOCATION")
@@ -634,6 +637,9 @@ async def setup_clients():
634637

635638
current_app.config[CONFIG_GPT4V_DEPLOYED] = bool(USE_GPT4V)
636639
current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED] = AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
640+
current_app.config[CONFIG_QUERY_REWRITING_ENABLED] = (
641+
AZURE_SEARCH_QUERY_REWRITING == "true" and AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
642+
)
637643
current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false"
638644
current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD)
639645
current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED] = ENABLE_LANGUAGE_PICKER

app/backend/approaches/approach.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ async def search(
149149
use_semantic_captions: bool,
150150
minimum_search_score: Optional[float],
151151
minimum_reranker_score: Optional[float],
152+
use_query_rewriting: Optional[bool] = None,
152153
) -> List[Document]:
153154
search_text = query_text if use_text_search else ""
154155
search_vectors = vectors if use_vector_search else []
@@ -158,6 +159,7 @@ async def search(
158159
filter=filter,
159160
top=top,
160161
query_caption="extractive|highlight-false" if use_semantic_captions else None,
162+
query_rewrites="generative" if use_query_rewriting else None,
161163
vector_queries=search_vectors,
162164
query_type=QueryType.SEMANTIC,
163165
query_language=self.query_language,

app/backend/approaches/chatreadretrieveread.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ async def run_until_final_call(
8989
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
9090
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
9191
use_semantic_captions = True if overrides.get("semantic_captions") else False
92+
use_query_rewriting = True if overrides.get("query_rewriting") else False
9293
top = overrides.get("top", 3)
9394
minimum_search_score = overrides.get("minimum_search_score", 0.0)
9495
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
@@ -147,6 +148,7 @@ async def run_until_final_call(
147148
use_semantic_captions,
148149
minimum_search_score,
149150
minimum_reranker_score,
151+
use_query_rewriting,
150152
)
151153

152154
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
@@ -190,6 +192,7 @@ async def run_until_final_call(
190192
{
191193
"use_semantic_captions": use_semantic_captions,
192194
"use_semantic_ranker": use_semantic_ranker,
195+
"use_query_rewriting": use_query_rewriting,
193196
"top": top,
194197
"filter": filter,
195198
"use_vector_search": use_vector_search,

app/backend/approaches/chatreadretrievereadvision.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ async def run_until_final_call(
8181
use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
8282
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
8383
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
84+
use_query_rewriting = True if overrides.get("query_rewriting") else False
8485
use_semantic_captions = True if overrides.get("semantic_captions") else False
8586
top = overrides.get("top", 3)
8687
minimum_search_score = overrides.get("minimum_search_score", 0.0)
@@ -151,6 +152,7 @@ async def run_until_final_call(
151152
use_semantic_captions,
152153
minimum_search_score,
153154
minimum_reranker_score,
155+
use_query_rewriting,
154156
)
155157

156158
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
@@ -207,6 +209,7 @@ async def run_until_final_call(
207209
{
208210
"use_semantic_captions": use_semantic_captions,
209211
"use_semantic_ranker": use_semantic_ranker,
212+
"use_query_rewriting": use_query_rewriting,
210213
"top": top,
211214
"filter": filter,
212215
"vector_fields": vector_fields,

app/backend/approaches/retrievethenread.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ async def run(
6767
use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
6868
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
6969
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
70+
use_query_rewriting = True if overrides.get("query_rewriting") else False
7071
use_semantic_captions = True if overrides.get("semantic_captions") else False
7172
top = overrides.get("top", 3)
7273
minimum_search_score = overrides.get("minimum_search_score", 0.0)
@@ -89,6 +90,7 @@ async def run(
8990
use_semantic_captions,
9091
minimum_search_score,
9192
minimum_reranker_score,
93+
use_query_rewriting,
9294
)
9395

9496
# Process results
@@ -118,6 +120,7 @@ async def run(
118120
{
119121
"use_semantic_captions": use_semantic_captions,
120122
"use_semantic_ranker": use_semantic_ranker,
123+
"use_query_rewriting": use_query_rewriting,
121124
"top": top,
122125
"filter": filter,
123126
"use_vector_search": use_vector_search,

app/backend/approaches/retrievethenreadvision.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ async def run(
7676
use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
7777
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
7878
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
79+
use_query_rewriting = True if overrides.get("query_rewriting") else False
7980
use_semantic_captions = True if overrides.get("semantic_captions") else False
8081
top = overrides.get("top", 3)
8182
minimum_search_score = overrides.get("minimum_search_score", 0.0)
@@ -108,6 +109,7 @@ async def run(
108109
use_semantic_captions,
109110
minimum_search_score,
110111
minimum_reranker_score,
112+
use_query_rewriting,
111113
)
112114

113115
# Process results
@@ -145,6 +147,7 @@ async def run(
145147
{
146148
"use_semantic_captions": use_semantic_captions,
147149
"use_semantic_ranker": use_semantic_ranker,
150+
"use_query_rewriting": use_query_rewriting,
148151
"top": top,
149152
"filter": filter,
150153
"vector_fields": vector_fields,

app/backend/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
CONFIG_AUTH_CLIENT = "auth_client"
1111
CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed"
1212
CONFIG_SEMANTIC_RANKER_DEPLOYED = "semantic_ranker_deployed"
13+
CONFIG_QUERY_REWRITING_ENABLED = "query_rewriting_enabled"
1314
CONFIG_VECTOR_SEARCH_ENABLED = "vector_search_enabled"
1415
CONFIG_SEARCH_CLIENT = "search_client"
1516
CONFIG_OPENAI_CLIENT = "openai_client"

app/backend/requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ tenacity
77
azure-ai-documentintelligence==1.0.0b4
88
azure-cognitiveservices-speech
99
azure-cosmos
10-
azure-search-documents==11.6.0b6
10+
azure-search-documents==11.6.0b9
1111
azure-storage-blob
1212
azure-storage-file-datalake
1313
uvicorn

0 commit comments

Comments
 (0)