Skip to content

Commit 7304f3b

Browse files
author
Zohaib
committed
fix: address PR INCF#93 review feedback - move RRF import to top, remove ID overwrite, restore local fuzzy search
1 parent b8d6a40 commit 7304f3b

2 files changed

Lines changed: 20 additions & 24 deletions

File tree

backend/agents.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from ks_search_tool import general_search, general_search_async, global_fuzzy_keyword_search
1212
from retrieval import get_retriever
13+
from rrf import reciprocal_rank_fusion
1314

1415
# LLM (Gemini) client setup
1516
try:
@@ -425,8 +426,6 @@ async def execute_search(state: AgentState) -> Dict[str, Any]:
425426
return {"ks_results": all_ks_results, "vector_results": vec_results}
426427

427428

428-
from rrf import reciprocal_rank_fusion
429-
430429
def fuse_results(state: AgentState) -> AgentState:
431430
print("--- Node: Result Fusion (RRF) ---")
432431
ks_results = state.get("ks_results", [])

backend/ks_search_tool.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -93,30 +93,27 @@ def search_across_all_fields(query: str, all_configs: dict, threshold: float = 0
9393

9494
def global_fuzzy_keyword_search(keywords: Iterable[str], top_k: int = 20) -> List[dict]:
9595
"""
96-
Keyword search utilizing the public API with explicitly extracted keywords.
96+
For each keyword, run search_across_all_fields across all datasources_config and combine unique hits.
9797
"""
98-
if not keywords:
99-
return []
100-
101-
query_str = " OR ".join([f'"{kw}"' if ' ' in kw else kw for kw in keywords if kw])
102-
if not query_str:
103-
return []
104-
105-
try:
106-
# Search via public API using the combined keyword OR-query string
107-
res = general_search(query_str, top_k=top_k, enrich_details=True)
108-
out = res.get("combined_results", [])
109-
110-
# Differentiate IDs for RRF matching, though RRF
111-
# also naturally merges duplicate links/IDs.
112-
for i, item in enumerate(out):
113-
item["_id"] = f"fuzzy_{i}"
114-
item["id"] = f"fuzzy_{i}"
115-
116-
return out
117-
except Exception as e:
118-
print(f" -> Error in global_fuzzy_keyword_search: {e}")
98+
config_path = "datasources_config.json"
99+
if not os.path.exists(config_path):
119100
return []
101+
with open(config_path, "r", encoding="utf-8") as fh:
102+
all_configs = json.load(fh)
103+
out: List[dict] = []
104+
seen = set()
105+
for kw in keywords or []:
106+
if not kw:
107+
continue
108+
results = search_across_all_fields(kw, all_configs, threshold=0.8)
109+
for r in results:
110+
rid = r.get("_id") or r.get("id")
111+
if rid and rid not in seen:
112+
seen.add(rid)
113+
out.append(r)
114+
if len(out) >= top_k:
115+
break
116+
return out[:top_k]
120117

121118

122119
def extract_datasource_info_from_link(link: str) -> tuple:

0 commit comments

Comments
 (0)