diff --git a/test/components/preprocessors/test_csv_document_splitter.py b/test/components/preprocessors/test_csv_document_splitter.py index e29e5f423e..bf44357e88 100644 --- a/test/components/preprocessors/test_csv_document_splitter.py +++ b/test/components/preprocessors/test_csv_document_splitter.py @@ -128,7 +128,7 @@ def test_column_split(self, splitter: CSVDocumentSplitter, two_tables_sep_by_two for i, table in enumerate(result): assert table.content == expected_tables[i] - def test_recursive_split(self, splitter: CSVDocumentSplitter) -> None: + def test_recursive_split_one_level(self, splitter: CSVDocumentSplitter) -> None: csv_content = """A,B,,,X,Y 1,2,,,7,8 ,,,,, @@ -143,6 +143,21 @@ def test_recursive_split(self, splitter: CSVDocumentSplitter) -> None: for i, table in enumerate(result): assert table.content == expected_tables[i] + def test_recursive_split_two_levels(self, splitter: CSVDocumentSplitter) -> None: + csv_content = """A,B,,,X,Y +1,2,,,7,8 +,,,,M,N +,,,,9,10 +P,Q,,,, +3,4,,,, +""" + doc = Document(content=csv_content) + result = splitter.run([doc])["documents"] + assert len(result) == 3 + expected_tables = ["A,B\n1,2\n", "P,Q\n3,4\n", "X,Y\n7,8\nM,N\n9,10\n"] + for i, table in enumerate(result): + assert table.content == expected_tables[i] + def test_threshold_no_effect(self, two_tables_sep_by_two_empty_rows: str) -> None: splitter = CSVDocumentSplitter(row_split_threshold=3) doc = Document(content=two_tables_sep_by_two_empty_rows)