Skip to content

Commit 2c0a728

Browse files
authored
Fix splitter when table is only one row wide (#8839)
1 parent f189a1c commit 2c0a728

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

haystack/components/preprocessors/csv_document_splitter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def _split_dataframe(
195195
df_length = df.shape[0] if axis == "row" else df.shape[1]
196196
for empty_start_idx, empty_end_idx in split_indices + [(df_length, df_length)]:
197197
# Avoid empty splits
198-
if empty_start_idx - table_start_idx > 1:
198+
if empty_start_idx - table_start_idx >= 1:
199199
if axis == "row":
200200
sub_table = df.iloc[table_start_idx:empty_start_idx]
201201
else:

test/components/preprocessors/test_csv_document_splitter.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,12 @@ def test_csv_with_blank_lines(self, splitter: CSVDocumentSplitter) -> None:
227227
assert table.content == expected_tables[i]
228228
assert table.meta == expected_meta[i]
229229

230+
def test_sub_table_with_one_row(self):
231+
splitter = CSVDocumentSplitter(row_split_threshold=1)
232+
doc = Document(content="""A,B,C\n1,2,3\n,,\n4,5,6""")
233+
split_result = splitter.run([doc])
234+
assert len(split_result["documents"]) == 2
235+
230236
def test_threshold_no_effect(self, two_tables_sep_by_two_empty_rows: str) -> None:
231237
splitter = CSVDocumentSplitter(row_split_threshold=3)
232238
doc = Document(content=two_tables_sep_by_two_empty_rows)

0 commit comments

Comments
 (0)