Skip to content

Commit

Permalink
fix integration suite
Browse files Browse the repository at this point in the history
  • Loading branch information
emrgnt-cmplxty committed Oct 4, 2024
1 parent 453623d commit b44ce40
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/integration-test-workflow-debian.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
- name: Stop R2R server
if: always()
run: pkill -f "r2r serve"
run: ps aux | grep "r2r serve" | awk '{print $2}' | xargs kill || true

- name: Uninstall PostgreSQL after tests (Optional)
if: always()
Expand Down
92 changes: 63 additions & 29 deletions py/tests/integration/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@
import subprocess
import sys

def compare_result_fields(result, expected_fields):
for field, expected_value in expected_fields.items():
if callable(expected_value):
if not expected_value(result[field]):
print(f"Test failed: Incorrect {field}")
print(f"Expected {field} to satisfy the condition")
print(f"Actual {field}:", result[field])
sys.exit(1)
else:
if result[field] != expected_value:
print(f"Test failed: Incorrect {field}")
print(f"Expected {field}:", expected_value)
print(f"Actual {field}:", result[field])
sys.exit(1)

def run_command(command):
result = subprocess.run(
Expand All @@ -20,6 +34,7 @@ def test_ingest_sample_file_cli():
print("Testing: Ingest sample file CLI")
run_command("poetry run r2r ingest-sample-file")
print("Ingestion successful")
print("~" * 100)


def test_document_overview_sample_file_cli():
Expand Down Expand Up @@ -47,69 +62,88 @@ def test_document_overview_sample_file_cli():
print("Aristotle document not found in the overview")
sys.exit(1)
print("Document overview test passed")
print("~" * 100)

def test_vector_search_sample_file_filter_cli():
print("Testing: Vector search")
output = run_command(
"""poetry run r2r search --query="Who was aristotle?" """
"""poetry run r2r search --query="Who was aristotle?" --filters='{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}'"""
)
# Split the output into lines and remove the first and last lines
output_lines = output.strip().split('\n')[1:-1]
# Replace single quotes with double quotes in each line
cleaned_output_lines = [line.replace("'", '"') for line in output_lines]
results = []
for line in cleaned_output_lines:
try:
result = json.loads(line)
results.append(result)
# Skip lines that are not valid JSON b/c of the single quote replacement
except json.JSONDecodeError:
continue

if not results:
print("Vector search test failed: No results returned")
sys.exit(1)

lead_result = results[0]
expected_lead_search_result = {
"text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.",
"extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
"document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
"document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
"user_id": "2acb499e-8428-543b-bd85-0d9098718220",
"score": 0.7820796370506287,
"text": """Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science."""
"score": lambda x: 0.77 <= x <= 0.79
}
lead_result = results[0]
compare_result_fields(lead_result, expected_lead_search_result)

if lead_result['text'] != expected_lead_search_result['text']:
print('Vector search test failed: Incorrect search result text')
print('Expected lead search text:', expected_lead_search_result['text'])
print('Actual lead search text:', lead_result['text'])
sys.exit(1)
print("Vector search test passed")

if lead_result['extraction_id'] != expected_lead_search_result['extraction_id']:
print("Vector search test failed: Incorrect extraction_id")
print('Expected extraction_id:', expected_lead_search_result['extraction_id'])
print('Actual extraction_id:', lead_result['extraction_id'])
sys.exit(1)
def test_hybrid_search_sample_file_filter_cli():
print("Testing: Vector search")
output = run_command(
"""poetry run r2r search --query="Who was aristotle?" --use-hybrid-search --filters='{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}'"""
)
output_lines = output.strip().split('\n')[1:-1]
cleaned_output_lines = [line.replace("'", '"') for line in output_lines]
results = []
for line in cleaned_output_lines:
try:
result = json.loads(line)
results.append(result)
except json.JSONDecodeError:
continue

if lead_result['document_id'] != expected_lead_search_result['document_id']:
print("Vector search test failed: Incorrect document_id")
print('Expected document_id:', expected_lead_search_result['document_id'])
print('Actual document_id:', lead_result['document_id'])
if not results:
print("Vector search test failed: No results returned")
sys.exit(1)

# TODO - Fix loading of CLI result to allow comparison below
# (e.g. lead result does not properly load as a dictionary)
# lead_result = results[0]
# expected_lead_search_result = {
# "text": "Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n\nAristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent some time within the Macedonian palace, making his first connections with the Macedonian monarchy.[7]",
# "extraction_id": "f6f5cfb6-8654-5e1c-b574-849a8a313452",
# "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
# "user_id": "2acb499e-8428-543b-bd85-0d9098718220",
# "score": lambda x: 0.016 <= x <= 0.018,
# "full_text_rank": 10,
# "semantic_rank": 5,
# }
# compare_result_fields(lead_result, expected_lead_search_result)

print("Vector search test passed")

def test_rag_query_aristotle_birth_year_cli():
print("Testing: RAG query for Aristotle's birth year")
output = run_command("poetry run r2r rag --query='What year was Aristotle born?'")
# TODO - Can we fix the test to check by loading JSON output?
# response = json.loads(output)

def test_rag_query_cli():
print("Testing: RAG query")
output = run_command("poetry run r2r rag --query='Who was Aristotle?'")
response = json.loads(output)
if not response.get("answer"):
print("RAG query test failed: No answer returned")
expected_answer = "Aristotle was born in 384 BC"

if expected_answer not in output:
print(f"RAG query test failed: Expected answer '{expected_answer}' not found in '{output}'")
sys.exit(1)

print("RAG query test passed")


if __name__ == "__main__":
if len(sys.argv) < 2:
print("Please specify a test function to run")
Expand Down

0 comments on commit b44ce40

Please sign in to comment.