up

SciPhi-AI · Oct 4, 2024 · dfebad3 · dfebad3
1 parent 7d02203
commit dfebad3
Show file tree

Hide file tree

Showing 4 changed files with 241 additions and 21 deletions.
diff --git a/.github/workflows/integration-test-workflow-debian.yml b/.github/workflows/integration-test-workflow-debian.yml
@@ -78,9 +78,12 @@ jobs:
     - name: Run integration tests - Ingestion
       working-directory: ./py
       run: |
-        poetry run python tests/integration/runner.py test_ingest_sample_file_cli
-        poetry run python tests/integration/runner.py test_document_overview_sample_file_cli
-        poetry run python tests/integration/runner.py test_vector_search_sample_file_filter_cli
+        poetry run python tests/integration/harness_cli.py test_ingest_sample_file_cli
+        poetry run python tests/integration/harness_cli.py test_document_overview_sample_file_cli
+        poetry run python tests/integration/harness_cli.py test_vector_search_sample_file_filter_cli
+
+        poetry run python tests/integration/harness_cli.py test_ingest_sample_file_sdk
+        poetry run python tests/integration/harness_cli.py test_reingest_sample_file_sdk
 
     - name: Stop R2R server
       if: always()

diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py
@@ -149,7 +149,7 @@ async def ingest_files_app(
                     file_content,
                     file_data["content_type"],
                 )
-                raw_message: dict[str, Union[str, None]] = await self.orchestration_provider.run_workflow( # type: ignore
+                raw_message: dict[str, Union[str, None]] = await self.orchestration_provider.run_workflow(  # type: ignore
                     "ingest-files",
                     {"request": workflow_input},
                     options={

diff --git a/py/tests/integration/runner.py → py/tests/integration/harness_cli.py b/py/tests/integration/runner.py → py/tests/integration/harness_cli.py
@@ -4,6 +4,7 @@
 import subprocess
 import sys
 
+
 def compare_result_fields(result, expected_fields):
     for field, expected_value in expected_fields.items():
         if callable(expected_value):
@@ -19,6 +20,7 @@ def compare_result_fields(result, expected_fields):
                 print(f"Actual {field}:", result[field])
                 sys.exit(1)
 
+
 def run_command(command):
     result = subprocess.run(
         command, shell=True, capture_output=True, text=True
@@ -41,7 +43,7 @@ def test_document_overview_sample_file_cli():
     print("Testing: Document overview contains 'aristotle.txt'")
     output = run_command("poetry run r2r documents-overview")
     output = output.replace("'", '"')
-    output_lines = output.strip().split('\n')[1:]
+    output_lines = output.strip().split("\n")[1:]
     documents = [json.loads(ele) for ele in output_lines]
 
     aristotle_document = {
@@ -64,12 +66,13 @@ def test_document_overview_sample_file_cli():
     print("Document overview test passed")
     print("~" * 100)
 
+
 def test_vector_search_sample_file_filter_cli():
     print("Testing: Vector search")
     output = run_command(
         """poetry run r2r search --query="Who was aristotle?" --filters='{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}'"""
     )
-    output_lines = output.strip().split('\n')[1:-1]
+    output_lines = output.strip().split("\n")[1:-1]
     cleaned_output_lines = [line.replace("'", '"') for line in output_lines]
     results = []
     for line in cleaned_output_lines:
@@ -89,19 +92,20 @@ def test_vector_search_sample_file_filter_cli():
         "extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
         "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
         "user_id": "2acb499e-8428-543b-bd85-0d9098718220",
-        "score": lambda x: 0.77 <= x <= 0.79
+        "score": lambda x: 0.77 <= x <= 0.79,
     }
     compare_result_fields(lead_result, expected_lead_search_result)
 
     print("Vector search test passed")
     print("~" * 100)
 
+
 def test_hybrid_search_sample_file_filter_cli():
     print("Testing: Vector search")
     output = run_command(
         """poetry run r2r search --query="Who was aristotle?" --use-hybrid-search --filters='{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}'"""
     )
-    output_lines = output.strip().split('\n')[1:-1]
+    output_lines = output.strip().split("\n")[1:-1]
     cleaned_output_lines = [line.replace("'", '"') for line in output_lines]
     results = []
     for line in cleaned_output_lines:
@@ -114,7 +118,7 @@ def test_hybrid_search_sample_file_filter_cli():
     if not results:
         print("Vector search test failed: No results returned")
         sys.exit(1)
-        
+
     # TODO - Fix loading of CLI result to allow comparison below
     # (e.g. lead result does not properly load as a dictionary)
     # lead_result = results[0]
@@ -132,46 +136,65 @@ def test_hybrid_search_sample_file_filter_cli():
     print("Vector search test passed")
     print("~" * 100)
 
+
 def test_rag_response_sample_file_cli():
     print("Testing: RAG query for Aristotle's birth year")
-    output = run_command("poetry run r2r rag --query='What year was Aristotle born?'")
+    output = run_command(
+        "poetry run r2r rag --query='What year was Aristotle born?'"
+    )
     # TODO - Can we fix the test to check by loading JSON output?
     # response = json.loads(output)
 
     expected_answer = "Aristotle was born in 384 BC"
-    
+
     if expected_answer not in output:
-        print(f"RAG query test failed: Expected answer '{expected_answer}' not found in '{output}'")
+        print(
+            f"RAG query test failed: Expected answer '{expected_answer}' not found in '{output}'"
+        )
         sys.exit(1)
-    
+
     print("RAG response test passed")
     print("~" * 100)
 
+
 def test_rag_response_stream_sample_file_cli():
     print("Testing: Streaming RAG query for who Aristotle was")
-    
+
     # Run the command and capture the output
     # output = run_command("poetry run r2r rag --query='who was aristotle' --use-hybrid-search --stream", capture_output=True)
     process = subprocess.Popen(
-        ["poetry", "run", "r2r", "rag", "--query='who was aristotle'", "--use-hybrid-search", "--stream"],
+        [
+            "poetry",
+            "run",
+            "r2r",
+            "rag",
+            "--query='who was aristotle'",
+            "--use-hybrid-search",
+            "--stream",
+        ],
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
-        text=True
+        text=True,
     )
-    output, _ = process.communicate()    
-    
+    output, _ = process.communicate()
+
     # Check if the output contains the search and completion tags
     if "<search>" not in output or "</search>" not in output:
-        print("Streaming RAG query test failed: Search results not found in output")
+        print(
+            "Streaming RAG query test failed: Search results not found in output"
+        )
         sys.exit(1)
-    
+
     if "<completion>" not in output or "</completion>" not in output:
-        print("Streaming RAG query test failed: Completion not found in output")
+        print(
+            "Streaming RAG query test failed: Completion not found in output"
+        )
         sys.exit(1)
 
     print("RAG response stream test passed")
     print("~" * 100)
 
+
 if __name__ == "__main__":
     if len(sys.argv) < 2:
         print("Please specify a test function to run")

diff --git a/py/tests/integration/harness_sdk.py b/py/tests/integration/harness_sdk.py
@@ -0,0 +1,194 @@
+import json
+import sys
+
+from r2r import R2RClient
+
+client = R2RClient()
+
+
+def compare_result_fields(result, expected_fields):
+    for field, expected_value in expected_fields.items():
+        if callable(expected_value):
+            if not expected_value(result[field]):
+                print(f"Test failed: Incorrect {field}")
+                print(f"Expected {field} to satisfy the condition")
+                print(f"Actual {field}:", result[field])
+                sys.exit(1)
+        else:
+            if result[field] != expected_value:
+                print(f"Test failed: Incorrect {field}")
+                print(f"Expected {field}:", expected_value)
+                print(f"Actual {field}:", result[field])
+                sys.exit(1)
+
+
+def test_ingest_sample_file_sdk():
+    print("Testing: Ingest sample file SDK")
+    file_paths = ["core/examples/data/uber_2021.pdf"]
+    ingest_response = client.ingest_files(file_paths=file_paths)
+
+    if not ingest_response["results"]:
+        print("Ingestion test failed")
+        sys.exit(1)
+
+    print("Ingestion successful")
+    print("~" * 100)
+
+
+def test_reingest_sample_file_sdk():
+    print("Testing: Ingest sample file SDK")
+    file_paths = ["core/examples/data/uber_2021.pdf"]
+    try:
+        reingest_response = client.ingest_files(file_paths=file_paths)
+        print(
+            "Re-ingestion test failed: Expected an error but ingestion succeeded"
+        )
+        sys.exit(1)
+    except Exception as e:
+        error_message = str(e)
+        if (
+            "Must increment version number before attempting to overwrite document"
+            not in error_message
+        ):
+            print(
+                f"Re-ingestion test failed: Unexpected error - {error_message}"
+            )
+            sys.exit(1)
+        else:
+            print("Re-ingestion failed as expected")
+
+    print("Re-ingestion test passed")
+    print("~" * 100)
+
+
+def test_document_overview_sample_file_sdk():
+    print("Testing: Document overview contains 'uber.txt'")
+    documents_overview = client.documents_overview()
+
+    aristotle_document = {
+        "title": "aristotle.txt",
+        "type": "txt",
+        "ingestion_status": "success",
+        "kg_extraction_status": "pending",
+        "version": "v0",
+        "metadata": {"title": "aristotle.txt", "version": "v0"},
+    }
+
+    if not any(
+        all(doc.get(k) == v for k, v in aristotle_document.items())
+        for doc in documents_overview
+    ):
+        print("Document overview test failed")
+        print("Aristotle document not found in the overview")
+        sys.exit(1)
+    print("Document overview test passed")
+    print("~" * 100)
+
+
+def test_vector_search_sample_file_filter_sdk():
+    print("Testing: Vector search")
+    results = client.search(
+        query="Who was aristotle?",
+        filters={
+            "document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}
+        },
+    )
+
+    if not results:
+        print("Vector search test failed: No results returned")
+        sys.exit(1)
+
+    lead_result = results[0]
+    expected_lead_search_result = {
+        "text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.",
+        "extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
+        "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
+        "user_id": "2acb499e-8428-543b-bd85-0d9098718220",
+        "score": lambda x: 0.77 <= x <= 0.79,
+    }
+    compare_result_fields(lead_result, expected_lead_search_result)
+
+    print("Vector search test passed")
+    print("~" * 100)
+
+
+def test_hybrid_search_sample_file_filter_sdk():
+    print("Testing: Hybrid search")
+    results = client.search(
+        query="Who was aristotle?",
+        vector_search_settings={"use_hybrid_search": True},
+        filters={
+            "document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}
+        },
+    )
+
+    if not results:
+        print("Hybrid search test failed: No results returned")
+        sys.exit(1)
+
+    lead_result = results[0]
+    expected_lead_search_result = {
+        "text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.",
+        "extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c",
+        "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1",
+        "user_id": "2acb499e-8428-543b-bd85-0d9098718220",
+        "score": lambda x: 0.77 <= x <= 0.79,
+    }
+    compare_result_fields(lead_result, expected_lead_search_result)
+
+    print("Hybrid search test passed")
+    print("~" * 100)
+
+
+def test_rag_response_sample_file_sdk():
+    print("Testing: RAG query for Aristotle's birth year")
+    response = client.rag(query="What year was Aristotle born?")
+
+    expected_answer = "Aristotle was born in 384 BC"
+
+    if expected_answer not in response:
+        print(
+            f"RAG query test failed: Expected answer '{expected_answer}' not found in '{response}'"
+        )
+        sys.exit(1)
+
+    print("RAG response test passed")
+    print("~" * 100)
+
+
+def test_rag_response_stream_sample_file_sdk():
+    print("Testing: Streaming RAG query for who Aristotle was")
+
+    rag_agent_response = client.agent(
+        messages=[{"role": "user", "content": "who was aristotle"}],
+        vector_search_settings={"use_hybrid_search": True},
+        rag_generation_config={"stream": True},
+    )
+
+    output = ""
+    for response in rag_agent_response:
+        output += response
+
+    if "<search>" not in output or "</search>" not in output:
+        print(
+            "Streaming RAG query test failed: Search results not found in output"
+        )
+        sys.exit(1)
+
+    if "<completion>" not in output or "</completion>" not in output:
+        print(
+            "Streaming RAG query test failed: Completion not found in output"
+        )
+        sys.exit(1)
+
+    print("RAG response stream test passed")
+    print("~" * 100)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Please specify a test function to run")
+        sys.exit(1)
+
+    test_function = sys.argv[1]
+    globals()[test_function]()