@@ -93,30 +93,27 @@ def search_across_all_fields(query: str, all_configs: dict, threshold: float = 0
9393
9494def global_fuzzy_keyword_search (keywords : Iterable [str ], top_k : int = 20 ) -> List [dict ]:
9595 """
96- Keyword search utilizing the public API with explicitly extracted keywords .
96+ For each keyword, run search_across_all_fields across all datasources_config and combine unique hits .
9797 """
98- if not keywords :
99- return []
100-
101- query_str = " OR " .join ([f'"{ kw } "' if ' ' in kw else kw for kw in keywords if kw ])
102- if not query_str :
103- return []
104-
105- try :
106- # Search via public API using the combined keyword OR-query string
107- res = general_search (query_str , top_k = top_k , enrich_details = True )
108- out = res .get ("combined_results" , [])
109-
110- # Differentiate IDs for RRF matching, though RRF
111- # also naturally merges duplicate links/IDs.
112- for i , item in enumerate (out ):
113- item ["_id" ] = f"fuzzy_{ i } "
114- item ["id" ] = f"fuzzy_{ i } "
115-
116- return out
117- except Exception as e :
118- print (f" -> Error in global_fuzzy_keyword_search: { e } " )
98+ config_path = "datasources_config.json"
99+ if not os .path .exists (config_path ):
119100 return []
101+ with open (config_path , "r" , encoding = "utf-8" ) as fh :
102+ all_configs = json .load (fh )
103+ out : List [dict ] = []
104+ seen = set ()
105+ for kw in keywords or []:
106+ if not kw :
107+ continue
108+ results = search_across_all_fields (kw , all_configs , threshold = 0.8 )
109+ for r in results :
110+ rid = r .get ("_id" ) or r .get ("id" )
111+ if rid and rid not in seen :
112+ seen .add (rid )
113+ out .append (r )
114+ if len (out ) >= top_k :
115+ break
116+ return out [:top_k ]
120117
121118
122119def extract_datasource_info_from_link (link : str ) -> tuple :
0 commit comments