Skip to content

Commit f96eb41

Browse files
committed
Fix table duplication in container layout elements
1 parent 976e2cb commit f96eb41

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

textractor/entities/layout.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def get_text_and_words(
242242
)
243243
elif self.layout_type == LAYOUT_KEY_VALUE:
244244
final_text = (
245-
config.table_layout_prefix + final_text + config.table_layout_suffix
245+
config.key_value_layout_prefix + final_text + config.key_value_layout_suffix
246246
)
247247
if config.add_prefixes_and_suffixes_as_words:
248248
if self.layout_type == LAYOUT_TABLE:

textractor/parsers/response_parser.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,10 +1367,13 @@ def parse_document_api_response(response: dict) -> Document:
13671367

13681368
# Final clean up of the layout objects
13691369
word_set = set()
1370-
for layout in sorted(page.leaf_layouts, key=lambda l: l.reading_order):
1370+
for layout in sorted(page.layouts, key=lambda l: l.reading_order):
13711371
layout.visit(word_set)
13721372
if not layout.children:
1373-
page.leaf_layouts.remove(layout)
1373+
try:
1374+
page.leaf_layouts.remove(layout)
1375+
except:
1376+
page.container_layouts.remove(layout)
13741377

13751378
document.pages = sorted(list(pages.values()), key=lambda x: x.page_num)
13761379
document.response = response

0 commit comments

Comments
 (0)