SciPhi-AI · emrgnt-cmplxty · Oct 9, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/docs/cookbooks/graphrag.mdx b/docs/cookbooks/graphrag.mdx
diff --git a/docs/cookbooks/walkthrough.mdx b/docs/cookbooks/walkthrough.mdx
@@ -626,7 +626,7 @@ r2r search --query="Who founded Airbnb?" --use-kg-search --kg-search-type=local
 ```python
 client.search("Who founded Airbnb?", kg_search_settings={
     "use_kg_search": True,
-    "kg_search_type": "global",
+    "kg_search_type": "local",
     "kg_search_level": 0, # level of community to search
     "max_community_description_length": 65536,
     "max_llm_queries_for_global_search": 250,
@@ -643,7 +643,7 @@ client.search("Who founded Airbnb?", kg_search_settings={
 ```javascript
 await client.search("Who founded Airbnb?", true, {}, 10, false, {}, {
     useKgSearch: true,
-    kgSearchType: "global",
+    kgSearchType: "local",
     kgSearchLevel: "0",
     maxCommunityDescriptionLength: 65536,
     maxLlmQueriesForGlobalSearch: 250,
@@ -664,7 +664,7 @@ curl -X POST http://localhost:7272/v2/search \
     "query": "Who founded Airbnb?",
     "kg_search_settings": {
       "use_kg_search": true,
-      "kg_search_type": "global",
+      "kg_search_type": "local",
       "kg_search_level": "0",
       "max_community_description_length": 65536,
       "max_llm_queries_for_global_search": 250,
@@ -682,7 +682,7 @@ curl -X POST http://localhost:7272/v2/search \
 Key configurable parameters for knowledge graph search include:
 
 - `use_kg_search`: Enable knowledge graph search.
-- `kg_search_type`: Choose between "global" or "local" search.
+- `kg_search_type`: "local"
 - `kg_search_level`: Specify the level of community to search.
 - `entity_types`: List of entity types to include in the search.
 - `relationships`: List of relationship types to include in the search.

diff --git a/docs/documentation/configuration/rag.mdx b/docs/documentation/configuration/rag.mdx
@@ -42,7 +42,7 @@ vector_search_settings = {
 # Configure graphRAG search
 kg_search_settings = {
     "use_kg_search": True,
-    "kg_search_type": "global",
+    "kg_search_type": "local",
     "kg_search_level": None,
     "generation_config": {
         "model": "gpt-4",

diff --git a/docs/documentation/configuration/retrieval/knowledge-graph.mdx b/docs/documentation/configuration/retrieval/knowledge-graph.mdx
@@ -7,7 +7,7 @@ Knowledge graph search settings can be configured both server-side and at runtim
 ```python
 kg_search_settings = {
     "use_kg_search": True,
-    "kg_search_type": "global",
+    "kg_search_type": "local",
     "kg_search_level": None,
     "generation_config": {
         "model": "gpt-4",

diff --git a/docs/documentation/js-sdk/retrieval.mdx b/docs/documentation/js-sdk/retrieval.mdx
@@ -122,7 +122,7 @@ const searchResponse = await client.search("What was Uber's profit in 2020?");
     </ParamField>
 
     <ParamField path="kg_search_type" type="str" default="local">
-      The type of knowledge graph search to perform. Valid options are "local" or "global".
+      The type of knowledge graph search to perform. Supported value: "local".
     </ParamField>
 
     <ParamField path="kg_search_level" type="Optional[str]" default="None">
@@ -358,7 +358,7 @@ const ragResponse = await client.rag("What was Uber's profit in 2020?");
   </ParamField>
 
   <ParamField path="kg_search_type" type="str" default="local">
-    The type of knowledge graph search to perform. Valid options are "local" or "global".
+    The type of knowledge graph search to perform. Supported value: "local".
   </ParamField>
 
   <ParamField path="kg_search_level" type="Optional[str]" default="None">

diff --git a/docs/documentation/python-sdk/retrieval.mdx b/docs/documentation/python-sdk/retrieval.mdx
@@ -125,7 +125,7 @@ search_response = client.search("What was Uber's profit in 2020?")
     Whether to use knowledge graph search.
     </ParamField>
 
-    <ParamField path="kg_search_type" type="str" default="global">
+    <ParamField path="kg_search_type" type="str" default="local">
     Type of knowledge graph search. Can be 'global' or 'local'.
     </ParamField>
 
@@ -389,7 +389,7 @@ rag_response = client.rag("What was Uber's profit in 2020?")
     Whether to use knowledge graph search.
     </ParamField>
 
-    <ParamField path="kg_search_type" type="str" default="global">
+    <ParamField path="kg_search_type" type="str" default="local">
     Type of knowledge graph search. Can be 'global' or 'local'.
     </ParamField>
 
@@ -699,7 +699,7 @@ Note that any of the customization seen in AI powered search and RAG documentati
     Whether to use knowledge graph search.
     </ParamField>
 
-    <ParamField path="kg_search_type" type="str" default="global">
+    <ParamField path="kg_search_type" type="str" default="local">
     Type of knowledge graph search. Can be 'global' or 'local'.
     </ParamField>
 

diff --git a/docs/images/kg_extraction_progress.png b/docs/images/kg_extraction_progress.png
diff --git a/docs/images/kg_ingestion_status.png b/docs/images/kg_ingestion_status.png
diff --git a/js/sdk/src/models.tsx b/js/sdk/src/models.tsx
@@ -57,7 +57,7 @@ export interface KGSearchSettings {
   filters?: Record<string, any>;
   selected_collection_ids?: string[];
   graphrag_map_system_prompt?: string;
-  kg_search_type?: "global" | "local";
+  kg_search_type?: "local";
   kg_search_level?: number | null;
   generation_config?: GenerationConfig;
   // entity_types?: any[];

diff --git a/py/core/main/api/kg_router.py b/py/core/main/api/kg_router.py
@@ -44,13 +44,25 @@ def _load_openapi_extras(self):
         return yaml_content
 
     def _register_workflows(self):
+
+        workflow_messages = {}
+        if self.orchestration_provider.config.provider == "hatchet":
+            workflow_messages["create-graph"] = (
+                "Graph creation task queued successfully."
+            )
+            workflow_messages["enrich-graph"] = (
+                "Graph enrichment task queued successfully."
+            )
+        else:
+            workflow_messages["create-graph"] = (
+                "Graph created successfully, please run enrich-graph to enrich the graph for GraphRAG."
+            )
+            workflow_messages["enrich-graph"] = "Graph enriched successfully."
+
         self.orchestration_provider.register_workflows(
             Workflow.KG,
             self.service,
-            {
-                "create-graph": "Graph creation task queued successfully.",
-                "enrich-graph": "Graph enrichment task queued successfully.",
-            },
+            workflow_messages,
         )
 
     def _setup_routes(self):
@@ -74,7 +86,8 @@ async def create_graph(
             auth_user=Depends(self.service.providers.auth.auth_wrapper),
         ) -> WrappedKGCreationResponse:  # type: ignore
             """
-            Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system.
+            Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.
+            If document IDs are not provided, the graph will be created on all documents in the system.
             This step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.
             In order to do GraphRAG, you will need to run the enrich_graph endpoint.
             """
@@ -135,7 +148,9 @@ async def enrich_graph(
             auth_user=Depends(self.service.providers.auth.auth_wrapper),
         ) -> WrappedKGEnrichmentResponse:
             """
-            This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work.
+            This endpoint enriches the graph with additional information.
+            It creates communities of nodes based on their similarity and adds embeddings to the graph.
+            This step is necessary for GraphRAG to work.
             """
             if not auth_user.is_superuser:
                 logger.warning("Implement permission checks here.")

diff --git a/py/core/pipes/retrieval/kg_search_pipe.py b/py/core/pipes/retrieval/kg_search_pipe.py
@@ -209,107 +209,6 @@ async def local_search(
                     },
                 )
 
-    async def global_search(
-        self,
-        input: GeneratorPipe.Input,
-        state: AsyncState,
-        run_id: UUID,
-        kg_search_settings: KGSearchSettings,
-        *args: Any,
-        **kwargs: Any,
-    ) -> AsyncGenerator[KGSearchResult, None]:
-        # map reduce
-        async for message in input.message:
-            map_responses = []
-            communities = await self.kg_provider.get_communities(  # type: ignore
-                level=kg_search_settings.kg_search_level
-            )
-
-            if len(communities) == 0:
-                raise R2RException(
-                    "No communities found. Please make sure you have run the KG enrichment step before running the search: r2r create-graph and r2r enrich-graph",
-                    400,
-                )
-
-            async def preprocess_communities(communities):
-                merged_report = ""
-                for community in communities:
-                    community_report = community.summary
-                    if (
-                        len(merged_report) + len(community_report)
-                        > kg_search_settings.max_community_description_length
-                    ):
-                        yield merged_report.strip()
-                        merged_report = ""
-                    merged_report += community_report + "\n\n"
-                if merged_report:
-                    yield merged_report.strip()
-
-            async def process_community(merged_report):
-                output = await self.llm_provider.aget_completion(
-                    messages=self.prompt_provider._get_message_payload(
-                        task_prompt_name=self.kg_provider.config.kg_search_settings.graphrag_map_system_prompt,
-                        task_inputs={
-                            "context_data": merged_report,
-                            "input": message,
-                        },
-                    ),
-                    generation_config=kg_search_settings.generation_config,
-                )
-
-                return output.choices[0].message.content
-
-            preprocessed_reports = [
-                merged_report
-                async for merged_report in preprocess_communities(communities)
-            ]
-
-            # Use asyncio.gather to process all preprocessed community reports concurrently
-            logger.info(
-                f"Processing {len(communities)} communities, {len(preprocessed_reports)} reports, Max LLM queries = {kg_search_settings.max_llm_queries_for_global_search}"
-            )
-
-            map_responses = await asyncio.gather(
-                *[
-                    process_community(report)
-                    for report in preprocessed_reports[
-                        : kg_search_settings.max_llm_queries_for_global_search
-                    ]
-                ]
-            )
-            # Filter only the relevant responses
-            filtered_responses = self.filter_responses(map_responses)
-
-            # reducing the outputs
-            output = await self.llm_provider.aget_completion(
-                messages=self.prompt_provider._get_message_payload(
-                    task_prompt_name=self.kg_provider.config.kg_search_settings.graphrag_reduce_system_prompt,
-                    task_inputs={
-                        "response_type": "multiple paragraphs",
-                        "report_data": filtered_responses,
-                        "input": message,
-                    },
-                ),
-                generation_config=kg_search_settings.generation_config,
-            )
-
-            output_text = output.choices[0].message.content
-
-            if not output_text:
-                logger.warning(f"No output generated for query: {message}.")
-                raise R2RException(
-                    "No output generated for query.",
-                    400,
-                )
-
-            yield KGSearchResult(
-                content=KGGlobalResult(
-                    name="Global Result", description=output_text
-                ),
-                method=KGSearchMethod.GLOBAL,
-                metadata={"associated_query": message},
-            )
-
     async def _run_logic(  # type: ignore
         self,
         input: GeneratorPipe.Input,
@@ -321,17 +220,11 @@ async def _run_logic(  # type: ignore
     ) -> AsyncGenerator[KGSearchResult, None]:
         kg_search_type = kg_search_settings.kg_search_type
 
-        # runs local and/or global search
-        if kg_search_type == "local" or kg_search_type == "local_and_global":
+        if kg_search_type == "local":
             logger.info("Performing KG local search")
             async for result in self.local_search(
                 input, state, run_id, kg_search_settings
             ):
                 yield result
-
-        if kg_search_type == "global" or kg_search_type == "local_and_global":
-            logger.info("Performing KG global search")
-            async for result in self.global_search(
-                input, state, run_id, kg_search_settings
-            ):
-                yield result
+        else:
+            raise ValueError(f"Unsupported KG search type: {kg_search_type}")
diff --git a/py/core/providers/kg/postgres.py b/py/core/providers/kg/postgres.py
@@ -996,7 +996,7 @@ async def get_triples(
             ORDER BY id
             OFFSET ${len(params) + 1} LIMIT ${len(params) + 2}
         """
-        params.extend([str(offset), str(limit)])
+        params.extend([offset, limit])  # type: ignore
 
         results = await self.fetch_query(query, params)
         total_entries = await self.get_triple_count(

diff --git a/py/shared/abstractions/search.py b/py/shared/abstractions/search.py
@@ -61,7 +61,6 @@ class KGSearchResultType(str, Enum):
 
 class KGSearchMethod(str, Enum):
     LOCAL = "local"
-    GLOBAL = "global"
 
 
 class KGEntityResult(R2RSerializable):
@@ -357,7 +356,7 @@ class Config:
         json_encoders = {UUID: str}
         json_schema_extra = {
             "use_kg_search": True,
-            "kg_search_type": "global",
+            "kg_search_type": "local",
             "kg_search_level": "0",
             "generation_config": GenerationConfig.Config.json_schema_extra,
             "max_community_description_length": 65536,