Skip to content

Commit

Permalink
More complicated test
Browse files Browse the repository at this point in the history
  • Loading branch information
sjrl committed Feb 7, 2025
1 parent 4ecaea6 commit 1003904
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion test/components/preprocessors/test_csv_document_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_column_split(self, splitter: CSVDocumentSplitter, two_tables_sep_by_two
for i, table in enumerate(result):
assert table.content == expected_tables[i]

def test_recursive_split(self, splitter: CSVDocumentSplitter) -> None:
def test_recursive_split_one_level(self, splitter: CSVDocumentSplitter) -> None:
csv_content = """A,B,,,X,Y
1,2,,,7,8
,,,,,
Expand All @@ -143,6 +143,21 @@ def test_recursive_split(self, splitter: CSVDocumentSplitter) -> None:
for i, table in enumerate(result):
assert table.content == expected_tables[i]

def test_recursive_split_two_levels(self, splitter: CSVDocumentSplitter) -> None:
csv_content = """A,B,,,X,Y
1,2,,,7,8
,,,,M,N
,,,,9,10
P,Q,,,,
3,4,,,,
"""
doc = Document(content=csv_content)
result = splitter.run([doc])["documents"]
assert len(result) == 3
expected_tables = ["A,B\n1,2\n", "P,Q\n3,4\n", "X,Y\n7,8\nM,N\n9,10\n"]
for i, table in enumerate(result):
assert table.content == expected_tables[i]

def test_threshold_no_effect(self, two_tables_sep_by_two_empty_rows: str) -> None:
splitter = CSVDocumentSplitter(row_split_threshold=3)
doc = Document(content=two_tables_sep_by_two_empty_rows)
Expand Down

0 comments on commit 1003904

Please sign in to comment.