Skip to content

Commit 453623d

Browse files
fix integration suite
1 parent d11ed32 commit 453623d

File tree

7 files changed

+79
-40
lines changed

7 files changed

+79
-40
lines changed

.github/workflows/integration-test-workflow-debian.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,9 @@ jobs:
7878
- name: Run integration tests - Ingestion
7979
working-directory: ./py
8080
run: |
81-
poetry run python tests/integration/runner.py test_ingest_sample_files_cli
82-
poetry run python tests/integration/runner.py test_document_ingestion_cli
81+
poetry run python tests/integration/runner.py test_ingest_sample_file_cli
82+
poetry run python tests/integration/runner.py test_document_overview_sample_file_cli
83+
poetry run python tests/integration/runner.py test_vector_search_sample_file_filter_cli
8384
8485
- name: Stop R2R server
8586
if: always()

py/core/base/api/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@
5353
RAGAgentResponse,
5454
RAGResponse,
5555
SearchResponse,
56+
WrappedCompletionResponse,
5657
WrappedRAGAgentResponse,
5758
WrappedRAGResponse,
5859
WrappedSearchResponse,
59-
WrappedCompletionResponse,
6060
)
6161

6262
__all__ = [

py/core/base/providers/kg.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ async def get_entities(
8989
limit: int,
9090
entity_ids: list[str] | None = None,
9191
with_description: bool = False,
92-
) -> list[Entity]:
92+
) -> dict:
9393
"""Abstract method to get entities."""
9494
pass
9595

@@ -100,7 +100,7 @@ async def get_triples(
100100
offset: int,
101101
limit: int,
102102
triple_ids: list[str] | None = None,
103-
) -> list[Triple]:
103+
) -> dict:
104104
"""Abstract method to get triples."""
105105
pass
106106

py/core/main/api/ingestion_router.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
from io import BytesIO
44
from pathlib import Path
5-
from typing import Optional
5+
from typing import Optional, Union
66
from uuid import UUID
77

88
import yaml
@@ -118,7 +118,7 @@ async def ingest_files_app(
118118

119119
file_datas = await self._process_files(files)
120120

121-
messages = []
121+
messages: list[dict[str, Union[str, None]]] = []
122122
for it, file_data in enumerate(file_datas):
123123
content_length = len(file_data["content"])
124124
file_content = BytesIO(base64.b64decode(file_data["content"]))
@@ -149,7 +149,7 @@ async def ingest_files_app(
149149
file_content,
150150
file_data["content_type"],
151151
)
152-
raw_message = await self.orchestration_provider.run_workflow(
152+
raw_message: dict[str, Union[str, None]] = await self.orchestration_provider.run_workflow( # type: ignore
153153
"ingest-files",
154154
{"request": workflow_input},
155155
options={
@@ -159,9 +159,10 @@ async def ingest_files_app(
159159
},
160160
)
161161
raw_message["document_id"] = str(document_id)
162+
if "task_id" not in raw_message:
163+
raw_message["task_id"] = None
162164
messages.append(raw_message)
163-
164-
return messages
165+
return messages # type: ignore
165166

166167
update_files_extras = self.openapi_extras.get("update_files", {})
167168
update_files_descriptions = update_files_extras.get(
@@ -188,7 +189,7 @@ async def update_files_app(
188189
description=ingest_files_descriptions.get("ingestion_config"),
189190
),
190191
auth_user=Depends(self.service.providers.auth.auth_wrapper),
191-
) -> WrappedUpdateResponse: # type: ignore
192+
) -> WrappedUpdateResponse:
192193
"""
193194
Update existing files in the system.
194195
@@ -255,7 +256,7 @@ async def update_files_app(
255256
)
256257
raw_message["message"] = "Update task queued successfully."
257258
raw_message["document_ids"] = workflow_input["document_ids"]
258-
return raw_message
259+
return raw_message # type: ignore
259260

260261
ingest_chunks_extras = self.openapi_extras.get("ingest_chunks", {})
261262
ingest_chunks_descriptions = ingest_chunks_extras.get(
@@ -278,7 +279,7 @@ async def ingest_chunks_app(
278279
None, description=ingest_files_descriptions.get("metadata")
279280
),
280281
auth_user=Depends(self.service.providers.auth.auth_wrapper),
281-
) -> WrappedIngestionResponse: # type: ignore
282+
) -> WrappedIngestionResponse:
282283
"""
283284
Ingest text chunks into the system.
284285
@@ -308,7 +309,7 @@ async def ingest_chunks_app(
308309
},
309310
)
310311
raw_message["document_id"] = str(document_id)
311-
return raw_message
312+
return raw_message # type: ignore
312313

313314
@staticmethod
314315
def _validate_ingestion_config(ingestion_config):

py/core/providers/kg/postgres.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,7 @@
1616
KGProvider,
1717
Triple,
1818
)
19-
from shared.abstractions import (
20-
KGCreationSettings,
21-
KGEnrichmentSettings,
22-
KGRunType,
23-
)
19+
from shared.abstractions import KGCreationSettings, KGEnrichmentSettings
2420
from shared.api.models.kg.responses import (
2521
KGCreationEstimationResponse,
2622
KGEnrichmentEstimationResponse,
@@ -923,7 +919,7 @@ async def get_entities(
923919
with_description: bool = False,
924920
) -> dict:
925921
conditions = []
926-
params = [collection_id]
922+
params: list = [collection_id]
927923

928924
if entity_ids:
929925
conditions.append(f"id = ANY(${len(params) + 1})")

py/shared/api/models/ingestion/responses.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import TypeVar
1+
from typing import Optional, TypeVar
22
from uuid import UUID
33

44
from pydantic import BaseModel, Field
@@ -13,7 +13,7 @@ class IngestionResponse(BaseModel):
1313
...,
1414
description="A message describing the result of the ingestion request.",
1515
)
16-
task_id: UUID = Field(
16+
task_id: Optional[UUID] = Field(
1717
...,
1818
description="The task ID of the ingestion request.",
1919
)

py/tests/integration/runner.py

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,46 +16,87 @@ def run_command(command):
1616
return result.stdout
1717

1818

19-
def test_ingest_sample_files_cli():
20-
print("Testing: Ingest sample files")
21-
run_command("poetry run r2r ingest-sample-files")
19+
def test_ingest_sample_file_cli():
20+
print("Testing: Ingest sample file CLI")
21+
run_command("poetry run r2r ingest-sample-file")
2222
print("Ingestion successful")
2323

2424

25-
def test_document_ingestion_cli():
26-
print("Testing: Document ingestion")
25+
def test_document_overview_sample_file_cli():
26+
print("Testing: Document overview contains 'aristotle.txt'")
2727
output = run_command("poetry run r2r documents-overview")
28-
documents = json.loads(output)
28+
output = output.replace("'", '"')
29+
output_lines = output.strip().split('\n')[1:]
30+
documents = [json.loads(ele) for ele in output_lines]
2931

30-
expected_document = {
31-
"id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
32+
aristotle_document = {
3233
"title": "aristotle.txt",
3334
"type": "txt",
3435
"ingestion_status": "success",
35-
"kg_extraction_status": "success",
36+
"kg_extraction_status": "pending",
3637
"version": "v0",
3738
"metadata": {"title": "aristotle.txt", "version": "v0"},
3839
}
3940

41+
# Check if any document in the overview matches the Aristotle document
4042
if not any(
41-
all(doc.get(k) == v for k, v in expected_document.items())
43+
all(doc.get(k) == v for k, v in aristotle_document.items())
4244
for doc in documents
4345
):
44-
print("Document ingestion test failed")
45-
print(f"Expected document not found in output: {output}")
46+
print("Document overview test failed")
47+
print("Aristotle document not found in the overview")
4648
sys.exit(1)
47-
print("Document ingestion test passed")
49+
print("Document overview test passed")
4850

49-
50-
def test_vector_search_cli():
51+
def test_vector_search_sample_file_filter_cli():
5152
print("Testing: Vector search")
5253
output = run_command(
53-
"poetry run r2r search --query='What was Uber's profit in 2020?'"
54+
"""poetry run r2r search --query="Who was aristotle?" """
5455
)
55-
results = json.loads(output)
56-
if not results.get("results"):
56+
# Split the output into lines and remove the first and last lines
57+
output_lines = output.strip().split('\n')[1:-1]
58+
# Replace single quotes with double quotes in each line
59+
cleaned_output_lines = [line.replace("'", '"') for line in output_lines]
60+
results = []
61+
for line in cleaned_output_lines:
62+
try:
63+
result = json.loads(line)
64+
results.append(result)
65+
# Skip lines that are not valid JSON b/c of the single quote replacement
66+
except json.JSONDecodeError:
67+
continue
68+
69+
if not results:
5770
print("Vector search test failed: No results returned")
5871
sys.exit(1)
72+
73+
expected_lead_search_result = {
74+
"extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
75+
"document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
76+
"user_id": "2acb499e-8428-543b-bd85-0d9098718220",
77+
"score": 0.7820796370506287,
78+
"text": """Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science."""
79+
}
80+
lead_result = results[0]
81+
82+
if lead_result['text'] != expected_lead_search_result['text']:
83+
print('Vector search test failed: Incorrect search result text')
84+
print('Expected lead search text:', expected_lead_search_result['text'])
85+
print('Actual lead search text:', lead_result['text'])
86+
sys.exit(1)
87+
88+
if lead_result['extraction_id'] != expected_lead_search_result['extraction_id']:
89+
print("Vector search test failed: Incorrect extraction_id")
90+
print('Expected extraction_id:', expected_lead_search_result['extraction_id'])
91+
print('Actual extraction_id:', lead_result['extraction_id'])
92+
sys.exit(1)
93+
94+
if lead_result['document_id'] != expected_lead_search_result['document_id']:
95+
print("Vector search test failed: Incorrect document_id")
96+
print('Expected document_id:', expected_lead_search_result['document_id'])
97+
print('Actual document_id:', lead_result['document_id'])
98+
sys.exit(1)
99+
59100
print("Vector search test passed")
60101

61102

0 commit comments

Comments
 (0)