Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add graphrag docs #1362

Merged
merged 5 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
352 changes: 174 additions & 178 deletions docs/cookbooks/graphrag.mdx

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions docs/cookbooks/walkthrough.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ r2r search --query="Who founded Airbnb?" --use-kg-search --kg-search-type=local
```python
client.search("Who founded Airbnb?", kg_search_settings={
"use_kg_search": True,
"kg_search_type": "global",
"kg_search_type": "local",
"kg_search_level": 0, # level of community to search
"max_community_description_length": 65536,
"max_llm_queries_for_global_search": 250,
Expand All @@ -643,7 +643,7 @@ client.search("Who founded Airbnb?", kg_search_settings={
```javascript
await client.search("Who founded Airbnb?", true, {}, 10, false, {}, {
useKgSearch: true,
kgSearchType: "global",
kgSearchType: "local",
kgSearchLevel: "0",
maxCommunityDescriptionLength: 65536,
maxLlmQueriesForGlobalSearch: 250,
Expand All @@ -664,7 +664,7 @@ curl -X POST http://localhost:7272/v2/search \
"query": "Who founded Airbnb?",
"kg_search_settings": {
"use_kg_search": true,
"kg_search_type": "global",
"kg_search_type": "local",
"kg_search_level": "0",
"max_community_description_length": 65536,
"max_llm_queries_for_global_search": 250,
Expand All @@ -682,7 +682,7 @@ curl -X POST http://localhost:7272/v2/search \
Key configurable parameters for knowledge graph search include:

- `use_kg_search`: Enable knowledge graph search.
- `kg_search_type`: Choose between "global" or "local" search.
- `kg_search_type`: "local"
- `kg_search_level`: Specify the level of community to search.
- `entity_types`: List of entity types to include in the search.
- `relationships`: List of relationship types to include in the search.
Expand Down
2 changes: 1 addition & 1 deletion docs/documentation/configuration/rag.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ vector_search_settings = {
# Configure graphRAG search
kg_search_settings = {
"use_kg_search": True,
"kg_search_type": "global",
"kg_search_type": "local",
"kg_search_level": None,
"generation_config": {
"model": "gpt-4",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Knowledge graph search settings can be configured both server-side and at runtim
```python
kg_search_settings = {
"use_kg_search": True,
"kg_search_type": "global",
"kg_search_type": "local",
"kg_search_level": None,
"generation_config": {
"model": "gpt-4",
Expand Down
4 changes: 2 additions & 2 deletions docs/documentation/js-sdk/retrieval.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ const searchResponse = await client.search("What was Uber's profit in 2020?");
</ParamField>

<ParamField path="kg_search_type" type="str" default="local">
The type of knowledge graph search to perform. Valid options are "local" or "global".
The type of knowledge graph search to perform. Supported value: "local".
</ParamField>

<ParamField path="kg_search_level" type="Optional[str]" default="None">
Expand Down Expand Up @@ -358,7 +358,7 @@ const ragResponse = await client.rag("What was Uber's profit in 2020?");
</ParamField>

<ParamField path="kg_search_type" type="str" default="local">
The type of knowledge graph search to perform. Valid options are "local" or "global".
The type of knowledge graph search to perform. Supported value: "local".
</ParamField>

<ParamField path="kg_search_level" type="Optional[str]" default="None">
Expand Down
6 changes: 3 additions & 3 deletions docs/documentation/python-sdk/retrieval.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ search_response = client.search("What was Uber's profit in 2020?")
Whether to use knowledge graph search.
</ParamField>

<ParamField path="kg_search_type" type="str" default="global">
<ParamField path="kg_search_type" type="str" default="local">
Type of knowledge graph search. Can be 'global' or 'local'.
</ParamField>

Expand Down Expand Up @@ -389,7 +389,7 @@ rag_response = client.rag("What was Uber's profit in 2020?")
Whether to use knowledge graph search.
</ParamField>

<ParamField path="kg_search_type" type="str" default="global">
<ParamField path="kg_search_type" type="str" default="local">
Type of knowledge graph search. Can be 'global' or 'local'.
</ParamField>

Expand Down Expand Up @@ -699,7 +699,7 @@ Note that any of the customization seen in AI powered search and RAG documentati
Whether to use knowledge graph search.
</ParamField>

<ParamField path="kg_search_type" type="str" default="global">
<ParamField path="kg_search_type" type="str" default="local">
Type of knowledge graph search. Can be 'global' or 'local'.
</ParamField>

Expand Down
Binary file added docs/images/kg_extraction_progress.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/kg_ingestion_status.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion js/sdk/src/models.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export interface KGSearchSettings {
filters?: Record<string, any>;
selected_collection_ids?: string[];
graphrag_map_system_prompt?: string;
kg_search_type?: "global" | "local";
kg_search_type?: "local";
kg_search_level?: number | null;
generation_config?: GenerationConfig;
// entity_types?: any[];
Expand Down
27 changes: 21 additions & 6 deletions py/core/main/api/kg_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,25 @@ def _load_openapi_extras(self):
return yaml_content

def _register_workflows(self):

workflow_messages = {}
if self.orchestration_provider.config.provider == "hatchet":
workflow_messages["create-graph"] = (
"Graph creation task queued successfully."
)
workflow_messages["enrich-graph"] = (
"Graph enrichment task queued successfully."
)
else:
workflow_messages["create-graph"] = (
"Graph created successfully, please run enrich-graph to enrich the graph for GraphRAG."
)
workflow_messages["enrich-graph"] = "Graph enriched successfully."

self.orchestration_provider.register_workflows(
Workflow.KG,
self.service,
{
"create-graph": "Graph creation task queued successfully.",
"enrich-graph": "Graph enrichment task queued successfully.",
},
workflow_messages,
)

def _setup_routes(self):
Expand All @@ -74,7 +86,8 @@ async def create_graph(
auth_user=Depends(self.service.providers.auth.auth_wrapper),
) -> WrappedKGCreationResponse: # type: ignore
"""
Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system.
Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.
If document IDs are not provided, the graph will be created on all documents in the system.
This step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.
In order to do GraphRAG, you will need to run the enrich_graph endpoint.
"""
Expand Down Expand Up @@ -135,7 +148,9 @@ async def enrich_graph(
auth_user=Depends(self.service.providers.auth.auth_wrapper),
) -> WrappedKGEnrichmentResponse:
"""
This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work.
This endpoint enriches the graph with additional information.
It creates communities of nodes based on their similarity and adds embeddings to the graph.
This step is necessary for GraphRAG to work.
"""
if not auth_user.is_superuser:
logger.warning("Implement permission checks here.")
Expand Down
113 changes: 3 additions & 110 deletions py/core/pipes/retrieval/kg_search_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,107 +209,6 @@ async def local_search(
},
)

async def global_search(
self,
input: GeneratorPipe.Input,
state: AsyncState,
run_id: UUID,
kg_search_settings: KGSearchSettings,
*args: Any,
**kwargs: Any,
) -> AsyncGenerator[KGSearchResult, None]:
# map reduce
async for message in input.message:
map_responses = []
communities = await self.kg_provider.get_communities( # type: ignore
level=kg_search_settings.kg_search_level
)

if len(communities) == 0:
raise R2RException(
"No communities found. Please make sure you have run the KG enrichment step before running the search: r2r create-graph and r2r enrich-graph",
400,
)

async def preprocess_communities(communities):
merged_report = ""
for community in communities:
community_report = community.summary
if (
len(merged_report) + len(community_report)
> kg_search_settings.max_community_description_length
):
yield merged_report.strip()
merged_report = ""
merged_report += community_report + "\n\n"
if merged_report:
yield merged_report.strip()

async def process_community(merged_report):
output = await self.llm_provider.aget_completion(
messages=self.prompt_provider._get_message_payload(
task_prompt_name=self.kg_provider.config.kg_search_settings.graphrag_map_system_prompt,
task_inputs={
"context_data": merged_report,
"input": message,
},
),
generation_config=kg_search_settings.generation_config,
)

return output.choices[0].message.content

preprocessed_reports = [
merged_report
async for merged_report in preprocess_communities(communities)
]

# Use asyncio.gather to process all preprocessed community reports concurrently
logger.info(
f"Processing {len(communities)} communities, {len(preprocessed_reports)} reports, Max LLM queries = {kg_search_settings.max_llm_queries_for_global_search}"
)

map_responses = await asyncio.gather(
*[
process_community(report)
for report in preprocessed_reports[
: kg_search_settings.max_llm_queries_for_global_search
]
]
)
# Filter only the relevant responses
filtered_responses = self.filter_responses(map_responses)

# reducing the outputs
output = await self.llm_provider.aget_completion(
messages=self.prompt_provider._get_message_payload(
task_prompt_name=self.kg_provider.config.kg_search_settings.graphrag_reduce_system_prompt,
task_inputs={
"response_type": "multiple paragraphs",
"report_data": filtered_responses,
"input": message,
},
),
generation_config=kg_search_settings.generation_config,
)

output_text = output.choices[0].message.content

if not output_text:
logger.warning(f"No output generated for query: {message}.")
raise R2RException(
"No output generated for query.",
400,
)

yield KGSearchResult(
content=KGGlobalResult(
name="Global Result", description=output_text
),
method=KGSearchMethod.GLOBAL,
metadata={"associated_query": message},
)

async def _run_logic( # type: ignore
self,
input: GeneratorPipe.Input,
Expand All @@ -321,17 +220,11 @@ async def _run_logic( # type: ignore
) -> AsyncGenerator[KGSearchResult, None]:
kg_search_type = kg_search_settings.kg_search_type

# runs local and/or global search
if kg_search_type == "local" or kg_search_type == "local_and_global":
if kg_search_type == "local":
logger.info("Performing KG local search")
async for result in self.local_search(
input, state, run_id, kg_search_settings
):
yield result

if kg_search_type == "global" or kg_search_type == "local_and_global":
logger.info("Performing KG global search")
async for result in self.global_search(
input, state, run_id, kg_search_settings
):
yield result
else:
raise ValueError(f"Unsupported KG search type: {kg_search_type}")
2 changes: 1 addition & 1 deletion py/core/providers/kg/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ async def get_triples(
ORDER BY id
OFFSET ${len(params) + 1} LIMIT ${len(params) + 2}
"""
params.extend([str(offset), str(limit)])
params.extend([offset, limit]) # type: ignore

results = await self.fetch_query(query, params)
total_entries = await self.get_triple_count(
Expand Down
3 changes: 1 addition & 2 deletions py/shared/abstractions/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ class KGSearchResultType(str, Enum):

class KGSearchMethod(str, Enum):
LOCAL = "local"
GLOBAL = "global"


class KGEntityResult(R2RSerializable):
Expand Down Expand Up @@ -357,7 +356,7 @@ class Config:
json_encoders = {UUID: str}
json_schema_extra = {
"use_kg_search": True,
"kg_search_type": "global",
"kg_search_type": "local",
"kg_search_level": "0",
"generation_config": GenerationConfig.Config.json_schema_extra,
"max_community_description_length": 65536,
Expand Down
Loading
Loading