Skip to content

Commit

Permalink
fix integration suite
Browse files Browse the repository at this point in the history
  • Loading branch information
emrgnt-cmplxty committed Oct 4, 2024
1 parent d11ed32 commit 453623d
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 40 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/integration-test-workflow-debian.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@ jobs:
- name: Run integration tests - Ingestion
working-directory: ./py
run: |
poetry run python tests/integration/runner.py test_ingest_sample_files_cli
poetry run python tests/integration/runner.py test_document_ingestion_cli
poetry run python tests/integration/runner.py test_ingest_sample_file_cli
poetry run python tests/integration/runner.py test_document_overview_sample_file_cli
poetry run python tests/integration/runner.py test_vector_search_sample_file_filter_cli
- name: Stop R2R server
if: always()
Expand Down
2 changes: 1 addition & 1 deletion py/core/base/api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@
RAGAgentResponse,
RAGResponse,
SearchResponse,
WrappedCompletionResponse,
WrappedRAGAgentResponse,
WrappedRAGResponse,
WrappedSearchResponse,
WrappedCompletionResponse,
)

__all__ = [
Expand Down
4 changes: 2 additions & 2 deletions py/core/base/providers/kg.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ async def get_entities(
limit: int,
entity_ids: list[str] | None = None,
with_description: bool = False,
) -> list[Entity]:
) -> dict:
"""Abstract method to get entities."""
pass

Expand All @@ -100,7 +100,7 @@ async def get_triples(
offset: int,
limit: int,
triple_ids: list[str] | None = None,
) -> list[Triple]:
) -> dict:
"""Abstract method to get triples."""
pass

Expand Down
19 changes: 10 additions & 9 deletions py/core/main/api/ingestion_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
from io import BytesIO
from pathlib import Path
from typing import Optional
from typing import Optional, Union
from uuid import UUID

import yaml
Expand Down Expand Up @@ -118,7 +118,7 @@ async def ingest_files_app(

file_datas = await self._process_files(files)

messages = []
messages: list[dict[str, Union[str, None]]] = []
for it, file_data in enumerate(file_datas):
content_length = len(file_data["content"])
file_content = BytesIO(base64.b64decode(file_data["content"]))
Expand Down Expand Up @@ -149,7 +149,7 @@ async def ingest_files_app(
file_content,
file_data["content_type"],
)
raw_message = await self.orchestration_provider.run_workflow(
raw_message: dict[str, Union[str, None]] = await self.orchestration_provider.run_workflow( # type: ignore
"ingest-files",
{"request": workflow_input},
options={
Expand All @@ -159,9 +159,10 @@ async def ingest_files_app(
},
)
raw_message["document_id"] = str(document_id)
if "task_id" not in raw_message:
raw_message["task_id"] = None
messages.append(raw_message)

return messages
return messages # type: ignore

update_files_extras = self.openapi_extras.get("update_files", {})
update_files_descriptions = update_files_extras.get(
Expand All @@ -188,7 +189,7 @@ async def update_files_app(
description=ingest_files_descriptions.get("ingestion_config"),
),
auth_user=Depends(self.service.providers.auth.auth_wrapper),
) -> WrappedUpdateResponse: # type: ignore
) -> WrappedUpdateResponse:
"""
Update existing files in the system.
Expand Down Expand Up @@ -255,7 +256,7 @@ async def update_files_app(
)
raw_message["message"] = "Update task queued successfully."
raw_message["document_ids"] = workflow_input["document_ids"]
return raw_message
return raw_message # type: ignore

ingest_chunks_extras = self.openapi_extras.get("ingest_chunks", {})
ingest_chunks_descriptions = ingest_chunks_extras.get(
Expand All @@ -278,7 +279,7 @@ async def ingest_chunks_app(
None, description=ingest_files_descriptions.get("metadata")
),
auth_user=Depends(self.service.providers.auth.auth_wrapper),
) -> WrappedIngestionResponse: # type: ignore
) -> WrappedIngestionResponse:
"""
Ingest text chunks into the system.
Expand Down Expand Up @@ -308,7 +309,7 @@ async def ingest_chunks_app(
},
)
raw_message["document_id"] = str(document_id)
return raw_message
return raw_message # type: ignore

@staticmethod
def _validate_ingestion_config(ingestion_config):
Expand Down
8 changes: 2 additions & 6 deletions py/core/providers/kg/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
KGProvider,
Triple,
)
from shared.abstractions import (
KGCreationSettings,
KGEnrichmentSettings,
KGRunType,
)
from shared.abstractions import KGCreationSettings, KGEnrichmentSettings
from shared.api.models.kg.responses import (
KGCreationEstimationResponse,
KGEnrichmentEstimationResponse,
Expand Down Expand Up @@ -923,7 +919,7 @@ async def get_entities(
with_description: bool = False,
) -> dict:
conditions = []
params = [collection_id]
params: list = [collection_id]

if entity_ids:
conditions.append(f"id = ANY(${len(params) + 1})")
Expand Down
4 changes: 2 additions & 2 deletions py/shared/api/models/ingestion/responses.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TypeVar
from typing import Optional, TypeVar
from uuid import UUID

from pydantic import BaseModel, Field
Expand All @@ -13,7 +13,7 @@ class IngestionResponse(BaseModel):
...,
description="A message describing the result of the ingestion request.",
)
task_id: UUID = Field(
task_id: Optional[UUID] = Field(
...,
description="The task ID of the ingestion request.",
)
Expand Down
77 changes: 59 additions & 18 deletions py/tests/integration/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,46 +16,87 @@ def run_command(command):
return result.stdout


def test_ingest_sample_files_cli():
print("Testing: Ingest sample files")
run_command("poetry run r2r ingest-sample-files")
def test_ingest_sample_file_cli():
print("Testing: Ingest sample file CLI")
run_command("poetry run r2r ingest-sample-file")
print("Ingestion successful")


def test_document_ingestion_cli():
print("Testing: Document ingestion")
def test_document_overview_sample_file_cli():
print("Testing: Document overview contains 'aristotle.txt'")
output = run_command("poetry run r2r documents-overview")
documents = json.loads(output)
output = output.replace("'", '"')
output_lines = output.strip().split('\n')[1:]
documents = [json.loads(ele) for ele in output_lines]

expected_document = {
"id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
aristotle_document = {
"title": "aristotle.txt",
"type": "txt",
"ingestion_status": "success",
"kg_extraction_status": "success",
"kg_extraction_status": "pending",
"version": "v0",
"metadata": {"title": "aristotle.txt", "version": "v0"},
}

# Check if any document in the overview matches the Aristotle document
if not any(
all(doc.get(k) == v for k, v in expected_document.items())
all(doc.get(k) == v for k, v in aristotle_document.items())
for doc in documents
):
print("Document ingestion test failed")
print(f"Expected document not found in output: {output}")
print("Document overview test failed")
print("Aristotle document not found in the overview")
sys.exit(1)
print("Document ingestion test passed")
print("Document overview test passed")


def test_vector_search_cli():
def test_vector_search_sample_file_filter_cli():
print("Testing: Vector search")
output = run_command(
"poetry run r2r search --query='What was Uber's profit in 2020?'"
"""poetry run r2r search --query="Who was aristotle?" """
)
results = json.loads(output)
if not results.get("results"):
# Split the output into lines and remove the first and last lines
output_lines = output.strip().split('\n')[1:-1]
# Replace single quotes with double quotes in each line
cleaned_output_lines = [line.replace("'", '"') for line in output_lines]
results = []
for line in cleaned_output_lines:
try:
result = json.loads(line)
results.append(result)
# Skip lines that are not valid JSON b/c of the single quote replacement
except json.JSONDecodeError:
continue

if not results:
print("Vector search test failed: No results returned")
sys.exit(1)

expected_lead_search_result = {
"extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
"document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
"user_id": "2acb499e-8428-543b-bd85-0d9098718220",
"score": 0.7820796370506287,
"text": """Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science."""
}
lead_result = results[0]

if lead_result['text'] != expected_lead_search_result['text']:
print('Vector search test failed: Incorrect search result text')
print('Expected lead search text:', expected_lead_search_result['text'])
print('Actual lead search text:', lead_result['text'])
sys.exit(1)

if lead_result['extraction_id'] != expected_lead_search_result['extraction_id']:
print("Vector search test failed: Incorrect extraction_id")
print('Expected extraction_id:', expected_lead_search_result['extraction_id'])
print('Actual extraction_id:', lead_result['extraction_id'])
sys.exit(1)

if lead_result['document_id'] != expected_lead_search_result['document_id']:
print("Vector search test failed: Incorrect document_id")
print('Expected document_id:', expected_lead_search_result['document_id'])
print('Actual document_id:', lead_result['document_id'])
sys.exit(1)

print("Vector search test passed")


Expand Down

0 comments on commit 453623d

Please sign in to comment.