@@ -16,46 +16,87 @@ def run_command(command):
1616 return result .stdout
1717
1818
19- def test_ingest_sample_files_cli ():
20- print ("Testing: Ingest sample files " )
21- run_command ("poetry run r2r ingest-sample-files " )
19+ def test_ingest_sample_file_cli ():
20+ print ("Testing: Ingest sample file CLI " )
21+ run_command ("poetry run r2r ingest-sample-file " )
2222 print ("Ingestion successful" )
2323
2424
25- def test_document_ingestion_cli ():
26- print ("Testing: Document ingestion " )
25+ def test_document_overview_sample_file_cli ():
26+ print ("Testing: Document overview contains 'aristotle.txt' " )
2727 output = run_command ("poetry run r2r documents-overview" )
28- documents = json .loads (output )
28+ output = output .replace ("'" , '"' )
29+ output_lines = output .strip ().split ('\n ' )[1 :]
30+ documents = [json .loads (ele ) for ele in output_lines ]
2931
30- expected_document = {
31- "id" : "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" ,
32+ aristotle_document = {
3233 "title" : "aristotle.txt" ,
3334 "type" : "txt" ,
3435 "ingestion_status" : "success" ,
35- "kg_extraction_status" : "success " ,
36+ "kg_extraction_status" : "pending " ,
3637 "version" : "v0" ,
3738 "metadata" : {"title" : "aristotle.txt" , "version" : "v0" },
3839 }
3940
41+ # Check if any document in the overview matches the Aristotle document
4042 if not any (
41- all (doc .get (k ) == v for k , v in expected_document .items ())
43+ all (doc .get (k ) == v for k , v in aristotle_document .items ())
4244 for doc in documents
4345 ):
44- print ("Document ingestion test failed" )
45- print (f"Expected document not found in output: { output } " )
46+ print ("Document overview test failed" )
47+ print ("Aristotle document not found in the overview " )
4648 sys .exit (1 )
47- print ("Document ingestion test passed" )
49+ print ("Document overview test passed" )
4850
49-
50- def test_vector_search_cli ():
51+ def test_vector_search_sample_file_filter_cli ():
5152 print ("Testing: Vector search" )
5253 output = run_command (
53- "poetry run r2r search --query='What was Uber's profit in 2020?' "
54+ """ poetry run r2r search --query="Who was aristotle?" "" "
5455 )
55- results = json .loads (output )
56- if not results .get ("results" ):
56+ # Split the output into lines and remove the first and last lines
57+ output_lines = output .strip ().split ('\n ' )[1 :- 1 ]
58+ # Replace single quotes with double quotes in each line
59+ cleaned_output_lines = [line .replace ("'" , '"' ) for line in output_lines ]
60+ results = []
61+ for line in cleaned_output_lines :
62+ try :
63+ result = json .loads (line )
64+ results .append (result )
65+ # Skip lines that are not valid JSON b/c of the single quote replacement
66+ except json .JSONDecodeError :
67+ continue
68+
69+ if not results :
5770 print ("Vector search test failed: No results returned" )
5871 sys .exit (1 )
72+
73+ expected_lead_search_result = {
74+ "extraction_id" : "ff8accdb-791e-5b6d-a83a-5adc32c4222c" ,
75+ "document_id" : "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" ,
76+ "user_id" : "2acb499e-8428-543b-bd85-0d9098718220" ,
77+ "score" : 0.7820796370506287 ,
78+ "text" : """Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science."""
79+ }
80+ lead_result = results [0 ]
81+
82+ if lead_result ['text' ] != expected_lead_search_result ['text' ]:
83+ print ('Vector search test failed: Incorrect search result text' )
84+ print ('Expected lead search text:' , expected_lead_search_result ['text' ])
85+ print ('Actual lead search text:' , lead_result ['text' ])
86+ sys .exit (1 )
87+
88+ if lead_result ['extraction_id' ] != expected_lead_search_result ['extraction_id' ]:
89+ print ("Vector search test failed: Incorrect extraction_id" )
90+ print ('Expected extraction_id:' , expected_lead_search_result ['extraction_id' ])
91+ print ('Actual extraction_id:' , lead_result ['extraction_id' ])
92+ sys .exit (1 )
93+
94+ if lead_result ['document_id' ] != expected_lead_search_result ['document_id' ]:
95+ print ("Vector search test failed: Incorrect document_id" )
96+ print ('Expected document_id:' , expected_lead_search_result ['document_id' ])
97+ print ('Actual document_id:' , lead_result ['document_id' ])
98+ sys .exit (1 )
99+
59100 print ("Vector search test passed" )
60101
61102
0 commit comments