Skip to content

Commit

Permalink
Add children to output after reading-order
Browse files Browse the repository at this point in the history
Signed-off-by: Christoph Auer <[email protected]>
  • Loading branch information
cau-git committed Feb 19, 2025
2 parents d788bf2 + 27c0400 commit 4e68da9
Show file tree
Hide file tree
Showing 5 changed files with 612 additions and 292 deletions.
53 changes: 45 additions & 8 deletions docling/models/readingorder_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
from docling_core.types.doc import (
BoundingBox,
CoordOrigin,
DocItem,
DocItemLabel,
DoclingDocument,
DocumentOrigin,
GroupLabel,
NodeItem,
ProvenanceItem,
RefItem,
TableData,
Expand All @@ -24,6 +26,7 @@
from pydantic import BaseModel, ConfigDict

from docling.datamodel.base_models import (
BasePageElement,
Cluster,
ContainerElement,
FigureElement,
Expand Down Expand Up @@ -80,6 +83,35 @@ def _assembled_to_readingorder_elements(

return elements

def _add_child_elements(
self, element: BasePageElement, doc_item: NodeItem, doc: DoclingDocument
):

child: Cluster
for child in element.cluster.children:
c_label = child.label
c_bbox = child.bbox.to_bottom_left_origin(
doc.pages[element.page_no].size.height
)
c_text = " ".join(
[
cell.text.replace("\x02", "-").strip()
for cell in child.cells
if len(cell.text.strip()) > 0
]
)

c_prov = ProvenanceItem(
page_no=element.page_no, charspan=(0, len(c_text)), bbox=c_bbox
)
if c_label == DocItemLabel.LIST_ITEM:
# TODO: Infer if this is a numbered or a bullet list item
doc.add_list_item(parent=doc_item, text=c_text, prov=c_prov)
elif c_label == DocItemLabel.SECTION_HEADER:
doc.add_heading(parent=doc_item, text=c_text, prov=c_prov)
else:
doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)

def _readingorder_elements_to_docling_doc(
self,
conv_res: ConversionResult,
Expand Down Expand Up @@ -123,8 +155,6 @@ def _readingorder_elements_to_docling_doc(
for cid in lst
}

# TODO: handle merges

for rel in ro_elements:
if rel.cid in skippable_cids:
continue
Expand Down Expand Up @@ -181,7 +211,7 @@ def _readingorder_elements_to_docling_doc(

tbl.footnotes.append(new_footnote_item.get_ref())

# TODO: handle element.cluster.children.
# TODO: Consider adding children of Table.

elif isinstance(element, FigureElement):
cap_text = ""
Expand Down Expand Up @@ -210,12 +240,19 @@ def _readingorder_elements_to_docling_doc(

pic.footnotes.append(new_footnote_item.get_ref())

# TODO: handle element.cluster.children.
# _add_child_elements(pic, doc, obj, pelem)
self._add_child_elements(element, pic, out_doc)

elif isinstance(element, ContainerElement): # Form, KV region
pass
# TODO: handle element.cluster.children.
label = element.label
group_label = GroupLabel.UNSPECIFIED
if label == DocItemLabel.FORM:
group_label = GroupLabel.FORM_AREA
elif label == DocItemLabel.KEY_VALUE_REGION:
group_label = GroupLabel.KEY_VALUE_AREA

container_el = out_doc.add_group(label=group_label)

self._add_child_elements(element, container_el, out_doc)

return out_doc

Expand Down Expand Up @@ -284,7 +321,7 @@ def _merge_elements(self, element, merged_elem, new_item, page_height):
bbox=element.cluster.bbox.to_bottom_left_origin(page_height),
)
new_item.text += f" {merged_elem.text}"
new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete.
new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete, we don't have the `orig` field of the merged element.
new_item.prov.append(prov)

def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
Expand Down
343 changes: 343 additions & 0 deletions docs/examples/pictures_description.ipynb

Large diffs are not rendered by default.

48 changes: 0 additions & 48 deletions docs/examples/pictures_description.py

This file was deleted.

2 changes: 1 addition & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ nav:
- "Figure enrichment": examples/develop_picture_enrichment.py
- "Table export": examples/export_tables.py
- "Multimodal export": examples/export_multimodal.py
- "Annotate picture with local vlm": examples/pictures_description.py
- "Annotate picture with local vlm": examples/pictures_description.ipynb
- "Annotate picture with remote vlm": examples/pictures_description_api.py
- "Force full page OCR": examples/full_page_ocr.py
- "Automatic OCR language detection with tesseract": examples/tesseract_lang_detection.py
Expand Down
Loading

0 comments on commit 4e68da9

Please sign in to comment.