From b44ce40e1fd16598ab40575b2b270c9483625c56 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty Date: Fri, 4 Oct 2024 15:44:07 -0700 Subject: [PATCH] fix integration suite --- .../integration-test-workflow-debian.yml | 2 +- py/tests/integration/runner.py | 92 +++++++++++++------ 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/.github/workflows/integration-test-workflow-debian.yml b/.github/workflows/integration-test-workflow-debian.yml index ba49fac7b..5b0fb65e9 100644 --- a/.github/workflows/integration-test-workflow-debian.yml +++ b/.github/workflows/integration-test-workflow-debian.yml @@ -84,7 +84,7 @@ jobs: - name: Stop R2R server if: always() - run: pkill -f "r2r serve" + run: ps aux | grep "r2r serve" | awk '{print $2}' | xargs kill || true - name: Uninstall PostgreSQL after tests (Optional) if: always() diff --git a/py/tests/integration/runner.py b/py/tests/integration/runner.py index 21bddb4d2..5ee890fbe 100644 --- a/py/tests/integration/runner.py +++ b/py/tests/integration/runner.py @@ -4,6 +4,20 @@ import subprocess import sys +def compare_result_fields(result, expected_fields): + for field, expected_value in expected_fields.items(): + if callable(expected_value): + if not expected_value(result[field]): + print(f"Test failed: Incorrect {field}") + print(f"Expected {field} to satisfy the condition") + print(f"Actual {field}:", result[field]) + sys.exit(1) + else: + if result[field] != expected_value: + print(f"Test failed: Incorrect {field}") + print(f"Expected {field}:", expected_value) + print(f"Actual {field}:", result[field]) + sys.exit(1) def run_command(command): result = subprocess.run( @@ -20,6 +34,7 @@ def test_ingest_sample_file_cli(): print("Testing: Ingest sample file CLI") run_command("poetry run r2r ingest-sample-file") print("Ingestion successful") + print("~" * 100) def test_document_overview_sample_file_cli(): @@ -47,22 +62,20 @@ def test_document_overview_sample_file_cli(): print("Aristotle document not found in the overview") sys.exit(1) print("Document overview test passed") + print("~" * 100) def test_vector_search_sample_file_filter_cli(): print("Testing: Vector search") output = run_command( - """poetry run r2r search --query="Who was aristotle?" """ + """poetry run r2r search --query="Who was aristotle?" --filters='{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}'""" ) - # Split the output into lines and remove the first and last lines output_lines = output.strip().split('\n')[1:-1] - # Replace single quotes with double quotes in each line cleaned_output_lines = [line.replace("'", '"') for line in output_lines] results = [] for line in cleaned_output_lines: try: result = json.loads(line) results.append(result) - # Skip lines that are not valid JSON b/c of the single quote replacement except json.JSONDecodeError: continue @@ -70,46 +83,67 @@ def test_vector_search_sample_file_filter_cli(): print("Vector search test failed: No results returned") sys.exit(1) + lead_result = results[0] expected_lead_search_result = { + "text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.", "extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c", - "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", + "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "score": 0.7820796370506287, - "text": """Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.""" + "score": lambda x: 0.77 <= x <= 0.79 } - lead_result = results[0] + compare_result_fields(lead_result, expected_lead_search_result) - if lead_result['text'] != expected_lead_search_result['text']: - print('Vector search test failed: Incorrect search result text') - print('Expected lead search text:', expected_lead_search_result['text']) - print('Actual lead search text:', lead_result['text']) - sys.exit(1) + print("Vector search test passed") - if lead_result['extraction_id'] != expected_lead_search_result['extraction_id']: - print("Vector search test failed: Incorrect extraction_id") - print('Expected extraction_id:', expected_lead_search_result['extraction_id']) - print('Actual extraction_id:', lead_result['extraction_id']) - sys.exit(1) +def test_hybrid_search_sample_file_filter_cli(): + print("Testing: Vector search") + output = run_command( + """poetry run r2r search --query="Who was aristotle?" --use-hybrid-search --filters='{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}'""" + ) + output_lines = output.strip().split('\n')[1:-1] + cleaned_output_lines = [line.replace("'", '"') for line in output_lines] + results = [] + for line in cleaned_output_lines: + try: + result = json.loads(line) + results.append(result) + except json.JSONDecodeError: + continue - if lead_result['document_id'] != expected_lead_search_result['document_id']: - print("Vector search test failed: Incorrect document_id") - print('Expected document_id:', expected_lead_search_result['document_id']) - print('Actual document_id:', lead_result['document_id']) + if not results: + print("Vector search test failed: No results returned") sys.exit(1) + + # TODO - Fix loading of CLI result to allow comparison below + # (e.g. lead result does not properly load as a dictionary) + # lead_result = results[0] + # expected_lead_search_result = { + # "text": "Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n\nAristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent some time within the Macedonian palace, making his first connections with the Macedonian monarchy.[7]", + # "extraction_id": "f6f5cfb6-8654-5e1c-b574-849a8a313452", + # "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", + # "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + # "score": lambda x: 0.016 <= x <= 0.018, + # "full_text_rank": 10, + # "semantic_rank": 5, + # } + # compare_result_fields(lead_result, expected_lead_search_result) print("Vector search test passed") +def test_rag_query_aristotle_birth_year_cli(): + print("Testing: RAG query for Aristotle's birth year") + output = run_command("poetry run r2r rag --query='What year was Aristotle born?'") + # TODO - Can we fix the test to check by loading JSON output? + # response = json.loads(output) -def test_rag_query_cli(): - print("Testing: RAG query") - output = run_command("poetry run r2r rag --query='Who was Aristotle?'") - response = json.loads(output) - if not response.get("answer"): - print("RAG query test failed: No answer returned") + expected_answer = "Aristotle was born in 384 BC" + + if expected_answer not in output: + print(f"RAG query test failed: Expected answer '{expected_answer}' not found in '{output}'") sys.exit(1) + print("RAG query test passed") - if __name__ == "__main__": if len(sys.argv) < 2: print("Please specify a test function to run")