Skip to content

Commit 394ee3d

Browse files
authored
Ignore KV elements in LAYOUT_LIST
2 parents 7218d8e + 4fb3796 commit 394ee3d

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

textractor/entities/layout.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def get_text_and_words(
139139
final_text = add_id_to_html_tag(config.list_layout_prefix, self.id, config)
140140
final_words = []
141141
for i, child in enumerate(
142-
sorted(self.children, key=lambda x: x.reading_order)
142+
sorted(filter(lambda c: isinstance(c, Layout), self.children), key=lambda x: x.reading_order)
143143
):
144144
child_text, child_words = child.get_text_and_words(config)
145145
child_prefix = add_id_to_html_tag(config.list_element_prefix, child.id, config)

textractor/parsers/response_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,7 @@ def parse_document_api_response(response: dict) -> Document:
13711371
and kv.id not in kv_added
13721372
):
13731373
# Ignore if the KV is already overlapping with a table
1374-
if any([w.cell_id for w in kv.words]):
1374+
if any([w.cell_id for w in kv.words]) or layout.layout_type == LAYOUT_LIST:
13751375
kv_added.add(kv.id)
13761376
continue
13771377
# Removing the duplicate words

0 commit comments

Comments
 (0)