Skip to content

Commit 4e68da9

Browse files
committed
Add children to output after reading-order
Signed-off-by: Christoph Auer <[email protected]>
2 parents d788bf2 + 27c0400 commit 4e68da9

File tree

5 files changed

+612
-292
lines changed

5 files changed

+612
-292
lines changed

docling/models/readingorder_model.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
from docling_core.types.doc import (
77
BoundingBox,
88
CoordOrigin,
9+
DocItem,
910
DocItemLabel,
1011
DoclingDocument,
1112
DocumentOrigin,
1213
GroupLabel,
14+
NodeItem,
1315
ProvenanceItem,
1416
RefItem,
1517
TableData,
@@ -24,6 +26,7 @@
2426
from pydantic import BaseModel, ConfigDict
2527

2628
from docling.datamodel.base_models import (
29+
BasePageElement,
2730
Cluster,
2831
ContainerElement,
2932
FigureElement,
@@ -80,6 +83,35 @@ def _assembled_to_readingorder_elements(
8083

8184
return elements
8285

86+
def _add_child_elements(
87+
self, element: BasePageElement, doc_item: NodeItem, doc: DoclingDocument
88+
):
89+
90+
child: Cluster
91+
for child in element.cluster.children:
92+
c_label = child.label
93+
c_bbox = child.bbox.to_bottom_left_origin(
94+
doc.pages[element.page_no].size.height
95+
)
96+
c_text = " ".join(
97+
[
98+
cell.text.replace("\x02", "-").strip()
99+
for cell in child.cells
100+
if len(cell.text.strip()) > 0
101+
]
102+
)
103+
104+
c_prov = ProvenanceItem(
105+
page_no=element.page_no, charspan=(0, len(c_text)), bbox=c_bbox
106+
)
107+
if c_label == DocItemLabel.LIST_ITEM:
108+
# TODO: Infer if this is a numbered or a bullet list item
109+
doc.add_list_item(parent=doc_item, text=c_text, prov=c_prov)
110+
elif c_label == DocItemLabel.SECTION_HEADER:
111+
doc.add_heading(parent=doc_item, text=c_text, prov=c_prov)
112+
else:
113+
doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
114+
83115
def _readingorder_elements_to_docling_doc(
84116
self,
85117
conv_res: ConversionResult,
@@ -123,8 +155,6 @@ def _readingorder_elements_to_docling_doc(
123155
for cid in lst
124156
}
125157

126-
# TODO: handle merges
127-
128158
for rel in ro_elements:
129159
if rel.cid in skippable_cids:
130160
continue
@@ -181,7 +211,7 @@ def _readingorder_elements_to_docling_doc(
181211

182212
tbl.footnotes.append(new_footnote_item.get_ref())
183213

184-
# TODO: handle element.cluster.children.
214+
# TODO: Consider adding children of Table.
185215

186216
elif isinstance(element, FigureElement):
187217
cap_text = ""
@@ -210,12 +240,19 @@ def _readingorder_elements_to_docling_doc(
210240

211241
pic.footnotes.append(new_footnote_item.get_ref())
212242

213-
# TODO: handle element.cluster.children.
214-
# _add_child_elements(pic, doc, obj, pelem)
243+
self._add_child_elements(element, pic, out_doc)
215244

216245
elif isinstance(element, ContainerElement): # Form, KV region
217-
pass
218-
# TODO: handle element.cluster.children.
246+
label = element.label
247+
group_label = GroupLabel.UNSPECIFIED
248+
if label == DocItemLabel.FORM:
249+
group_label = GroupLabel.FORM_AREA
250+
elif label == DocItemLabel.KEY_VALUE_REGION:
251+
group_label = GroupLabel.KEY_VALUE_AREA
252+
253+
container_el = out_doc.add_group(label=group_label)
254+
255+
self._add_child_elements(element, container_el, out_doc)
219256

220257
return out_doc
221258

@@ -284,7 +321,7 @@ def _merge_elements(self, element, merged_elem, new_item, page_height):
284321
bbox=element.cluster.bbox.to_bottom_left_origin(page_height),
285322
)
286323
new_item.text += f" {merged_elem.text}"
287-
new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete.
324+
new_item.orig += f" {merged_elem.text}" # TODO: This is incomplete, we don't have the `orig` field of the merged element.
288325
new_item.prov.append(prov)
289326

290327
def __call__(self, conv_res: ConversionResult) -> DoclingDocument:

docs/examples/pictures_description.ipynb

Lines changed: 343 additions & 0 deletions
Large diffs are not rendered by default.

docs/examples/pictures_description.py

Lines changed: 0 additions & 48 deletions
This file was deleted.

mkdocs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ nav:
7575
- "Figure enrichment": examples/develop_picture_enrichment.py
7676
- "Table export": examples/export_tables.py
7777
- "Multimodal export": examples/export_multimodal.py
78-
- "Annotate picture with local vlm": examples/pictures_description.py
78+
- "Annotate picture with local vlm": examples/pictures_description.ipynb
7979
- "Annotate picture with remote vlm": examples/pictures_description_api.py
8080
- "Force full page OCR": examples/full_page_ocr.py
8181
- "Automatic OCR language detection with tesseract": examples/tesseract_lang_detection.py

0 commit comments

Comments
 (0)