Skip to content

Commit

Permalink
up
Browse files Browse the repository at this point in the history
  • Loading branch information
emrgnt-cmplxty committed Oct 4, 2024
1 parent 7d02203 commit dfebad3
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 21 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/integration-test-workflow-debian.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,12 @@ jobs:
- name: Run integration tests - Ingestion
working-directory: ./py
run: |
poetry run python tests/integration/runner.py test_ingest_sample_file_cli
poetry run python tests/integration/runner.py test_document_overview_sample_file_cli
poetry run python tests/integration/runner.py test_vector_search_sample_file_filter_cli
poetry run python tests/integration/harness_cli.py test_ingest_sample_file_cli
poetry run python tests/integration/harness_cli.py test_document_overview_sample_file_cli
poetry run python tests/integration/harness_cli.py test_vector_search_sample_file_filter_cli
poetry run python tests/integration/harness_cli.py test_ingest_sample_file_sdk
poetry run python tests/integration/harness_cli.py test_reingest_sample_file_sdk
- name: Stop R2R server
if: always()
Expand Down
2 changes: 1 addition & 1 deletion py/core/main/api/ingestion_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ async def ingest_files_app(
file_content,
file_data["content_type"],
)
raw_message: dict[str, Union[str, None]] = await self.orchestration_provider.run_workflow( # type: ignore
raw_message: dict[str, Union[str, None]] = await self.orchestration_provider.run_workflow( # type: ignore
"ingest-files",
{"request": workflow_input},
options={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import subprocess
import sys


def compare_result_fields(result, expected_fields):
for field, expected_value in expected_fields.items():
if callable(expected_value):
Expand All @@ -19,6 +20,7 @@ def compare_result_fields(result, expected_fields):
print(f"Actual {field}:", result[field])
sys.exit(1)


def run_command(command):
result = subprocess.run(
command, shell=True, capture_output=True, text=True
Expand All @@ -41,7 +43,7 @@ def test_document_overview_sample_file_cli():
print("Testing: Document overview contains 'aristotle.txt'")
output = run_command("poetry run r2r documents-overview")
output = output.replace("'", '"')
output_lines = output.strip().split('\n')[1:]
output_lines = output.strip().split("\n")[1:]
documents = [json.loads(ele) for ele in output_lines]

aristotle_document = {
Expand All @@ -64,12 +66,13 @@ def test_document_overview_sample_file_cli():
print("Document overview test passed")
print("~" * 100)


def test_vector_search_sample_file_filter_cli():
print("Testing: Vector search")
output = run_command(
"""poetry run r2r search --query="Who was aristotle?" --filters='{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}'"""
)
output_lines = output.strip().split('\n')[1:-1]
output_lines = output.strip().split("\n")[1:-1]
cleaned_output_lines = [line.replace("'", '"') for line in output_lines]
results = []
for line in cleaned_output_lines:
Expand All @@ -89,19 +92,20 @@ def test_vector_search_sample_file_filter_cli():
"extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
"document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
"user_id": "2acb499e-8428-543b-bd85-0d9098718220",
"score": lambda x: 0.77 <= x <= 0.79
"score": lambda x: 0.77 <= x <= 0.79,
}
compare_result_fields(lead_result, expected_lead_search_result)

print("Vector search test passed")
print("~" * 100)


def test_hybrid_search_sample_file_filter_cli():
print("Testing: Vector search")
output = run_command(
"""poetry run r2r search --query="Who was aristotle?" --use-hybrid-search --filters='{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}'"""
)
output_lines = output.strip().split('\n')[1:-1]
output_lines = output.strip().split("\n")[1:-1]
cleaned_output_lines = [line.replace("'", '"') for line in output_lines]
results = []
for line in cleaned_output_lines:
Expand All @@ -114,7 +118,7 @@ def test_hybrid_search_sample_file_filter_cli():
if not results:
print("Vector search test failed: No results returned")
sys.exit(1)

# TODO - Fix loading of CLI result to allow comparison below
# (e.g. lead result does not properly load as a dictionary)
# lead_result = results[0]
Expand All @@ -132,46 +136,65 @@ def test_hybrid_search_sample_file_filter_cli():
print("Vector search test passed")
print("~" * 100)


def test_rag_response_sample_file_cli():
print("Testing: RAG query for Aristotle's birth year")
output = run_command("poetry run r2r rag --query='What year was Aristotle born?'")
output = run_command(
"poetry run r2r rag --query='What year was Aristotle born?'"
)
# TODO - Can we fix the test to check by loading JSON output?
# response = json.loads(output)

expected_answer = "Aristotle was born in 384 BC"

if expected_answer not in output:
print(f"RAG query test failed: Expected answer '{expected_answer}' not found in '{output}'")
print(
f"RAG query test failed: Expected answer '{expected_answer}' not found in '{output}'"
)
sys.exit(1)

print("RAG response test passed")
print("~" * 100)


def test_rag_response_stream_sample_file_cli():
print("Testing: Streaming RAG query for who Aristotle was")

# Run the command and capture the output
# output = run_command("poetry run r2r rag --query='who was aristotle' --use-hybrid-search --stream", capture_output=True)
process = subprocess.Popen(
["poetry", "run", "r2r", "rag", "--query='who was aristotle'", "--use-hybrid-search", "--stream"],
[
"poetry",
"run",
"r2r",
"rag",
"--query='who was aristotle'",
"--use-hybrid-search",
"--stream",
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
text=True,
)
output, _ = process.communicate()
output, _ = process.communicate()

# Check if the output contains the search and completion tags
if "<search>" not in output or "</search>" not in output:
print("Streaming RAG query test failed: Search results not found in output")
print(
"Streaming RAG query test failed: Search results not found in output"
)
sys.exit(1)

if "<completion>" not in output or "</completion>" not in output:
print("Streaming RAG query test failed: Completion not found in output")
print(
"Streaming RAG query test failed: Completion not found in output"
)
sys.exit(1)

print("RAG response stream test passed")
print("~" * 100)


if __name__ == "__main__":
if len(sys.argv) < 2:
print("Please specify a test function to run")
Expand Down
194 changes: 194 additions & 0 deletions py/tests/integration/harness_sdk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import json
import sys

from r2r import R2RClient

client = R2RClient()


def compare_result_fields(result, expected_fields):
for field, expected_value in expected_fields.items():
if callable(expected_value):
if not expected_value(result[field]):
print(f"Test failed: Incorrect {field}")
print(f"Expected {field} to satisfy the condition")
print(f"Actual {field}:", result[field])
sys.exit(1)
else:
if result[field] != expected_value:
print(f"Test failed: Incorrect {field}")
print(f"Expected {field}:", expected_value)
print(f"Actual {field}:", result[field])
sys.exit(1)


def test_ingest_sample_file_sdk():
print("Testing: Ingest sample file SDK")
file_paths = ["core/examples/data/uber_2021.pdf"]
ingest_response = client.ingest_files(file_paths=file_paths)

if not ingest_response["results"]:
print("Ingestion test failed")
sys.exit(1)

print("Ingestion successful")
print("~" * 100)


def test_reingest_sample_file_sdk():
print("Testing: Ingest sample file SDK")
file_paths = ["core/examples/data/uber_2021.pdf"]
try:
reingest_response = client.ingest_files(file_paths=file_paths)
print(
"Re-ingestion test failed: Expected an error but ingestion succeeded"
)
sys.exit(1)
except Exception as e:
error_message = str(e)
if (
"Must increment version number before attempting to overwrite document"
not in error_message
):
print(
f"Re-ingestion test failed: Unexpected error - {error_message}"
)
sys.exit(1)
else:
print("Re-ingestion failed as expected")

print("Re-ingestion test passed")
print("~" * 100)


def test_document_overview_sample_file_sdk():
print("Testing: Document overview contains 'uber.txt'")
documents_overview = client.documents_overview()

aristotle_document = {
"title": "aristotle.txt",
"type": "txt",
"ingestion_status": "success",
"kg_extraction_status": "pending",
"version": "v0",
"metadata": {"title": "aristotle.txt", "version": "v0"},
}

if not any(
all(doc.get(k) == v for k, v in aristotle_document.items())
for doc in documents_overview
):
print("Document overview test failed")
print("Aristotle document not found in the overview")
sys.exit(1)
print("Document overview test passed")
print("~" * 100)


def test_vector_search_sample_file_filter_sdk():
print("Testing: Vector search")
results = client.search(
query="Who was aristotle?",
filters={
"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}
},
)

if not results:
print("Vector search test failed: No results returned")
sys.exit(1)

lead_result = results[0]
expected_lead_search_result = {
"text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.",
"extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
"document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
"user_id": "2acb499e-8428-543b-bd85-0d9098718220",
"score": lambda x: 0.77 <= x <= 0.79,
}
compare_result_fields(lead_result, expected_lead_search_result)

print("Vector search test passed")
print("~" * 100)


def test_hybrid_search_sample_file_filter_sdk():
print("Testing: Hybrid search")
results = client.search(
query="Who was aristotle?",
vector_search_settings={"use_hybrid_search": True},
filters={
"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}
},
)

if not results:
print("Hybrid search test failed: No results returned")
sys.exit(1)

lead_result = results[0]
expected_lead_search_result = {
"text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.",
"extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
"document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
"user_id": "2acb499e-8428-543b-bd85-0d9098718220",
"score": lambda x: 0.77 <= x <= 0.79,
}
compare_result_fields(lead_result, expected_lead_search_result)

print("Hybrid search test passed")
print("~" * 100)


def test_rag_response_sample_file_sdk():
print("Testing: RAG query for Aristotle's birth year")
response = client.rag(query="What year was Aristotle born?")

expected_answer = "Aristotle was born in 384 BC"

if expected_answer not in response:
print(
f"RAG query test failed: Expected answer '{expected_answer}' not found in '{response}'"
)
sys.exit(1)

print("RAG response test passed")
print("~" * 100)


def test_rag_response_stream_sample_file_sdk():
print("Testing: Streaming RAG query for who Aristotle was")

rag_agent_response = client.agent(
messages=[{"role": "user", "content": "who was aristotle"}],
vector_search_settings={"use_hybrid_search": True},
rag_generation_config={"stream": True},
)

output = ""
for response in rag_agent_response:
output += response

if "<search>" not in output or "</search>" not in output:
print(
"Streaming RAG query test failed: Search results not found in output"
)
sys.exit(1)

if "<completion>" not in output or "</completion>" not in output:
print(
"Streaming RAG query test failed: Completion not found in output"
)
sys.exit(1)

print("RAG response stream test passed")
print("~" * 100)


if __name__ == "__main__":
if len(sys.argv) < 2:
print("Please specify a test function to run")
sys.exit(1)

test_function = sys.argv[1]
globals()[test_function]()

0 comments on commit dfebad3

Please sign in to comment.