docling-project
diff --git a/‎docling/models/readingorder_model.py
Lines changed: 45 additions & 8 deletions b/‎docling/models/readingorder_model.py
Lines changed: 45 additions & 8 deletions
diff --git a/‎docs/examples/pictures_description.ipynb
Lines changed: 343 additions & 0 deletions b/‎docs/examples/pictures_description.ipynb
Lines changed: 343 additions & 0 deletions
diff --git a/‎docs/examples/pictures_description.py
Lines changed: 0 additions & 48 deletions b/‎docs/examples/pictures_description.py
Lines changed: 0 additions & 48 deletions
diff --git a/‎mkdocs.yml
Lines changed: 1 addition & 1 deletion b/‎mkdocs.yml
Lines changed: 1 addition & 1 deletion
@@ -6,10 +6,12 @@
 from docling_core.types.doc import (
     BoundingBox,
     CoordOrigin,
+    DocItem,
     DocItemLabel,
     DoclingDocument,
     DocumentOrigin,
     GroupLabel,
+    NodeItem,
     ProvenanceItem,
     RefItem,
     TableData,
@@ -24,6 +26,7 @@
 from pydantic import BaseModel, ConfigDict
 
 from docling.datamodel.base_models import (
+    BasePageElement,
     Cluster,
     ContainerElement,
     FigureElement,
@@ -80,6 +83,35 @@ def _assembled_to_readingorder_elements(
 
         return elements
 
+    def _add_child_elements(
+        self, element: BasePageElement, doc_item: NodeItem, doc: DoclingDocument
+    ):
+
+        child: Cluster
+        for child in element.cluster.children:
+            c_label = child.label
+            c_bbox = child.bbox.to_bottom_left_origin(
+                doc.pages[element.page_no].size.height
+            )
+            c_text = " ".join(
+                [
+                    cell.text.replace("\x02", "-").strip()
+                    for cell in child.cells
+                    if len(cell.text.strip()) > 0
+                ]
+            )
+
+            c_prov = ProvenanceItem(
+                page_no=element.page_no, charspan=(0, len(c_text)), bbox=c_bbox
+            )
+            if c_label == DocItemLabel.LIST_ITEM:
+                # TODO: Infer if this is a numbered or a bullet list item
+                doc.add_list_item(parent=doc_item, text=c_text, prov=c_prov)
+            elif c_label == DocItemLabel.SECTION_HEADER:
+                doc.add_heading(parent=doc_item, text=c_text, prov=c_prov)
+            else:
+                doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
+
     def _readingorder_elements_to_docling_doc(
         self,
         conv_res: ConversionResult,
@@ -123,8 +155,6 @@ def _readingorder_elements_to_docling_doc(
             for cid in lst
         }
 
-        # TODO: handle merges
-
         for rel in ro_elements:
             if rel.cid in skippable_cids:
                 continue
@@ -181,7 +211,7 @@ def _readingorder_elements_to_docling_doc(
 
                         tbl.footnotes.append(new_footnote_item.get_ref())
 
-                # TODO: handle element.cluster.children.
+                # TODO: Consider adding children of Table.
 
             elif isinstance(element, FigureElement):
                 cap_text = ""
@@ -210,12 +240,19 @@ def _readingorder_elements_to_docling_doc(
 
                         pic.footnotes.append(new_footnote_item.get_ref())
 
-                # TODO: handle element.cluster.children.
-                # _add_child_elements(pic, doc, obj, pelem)
+                self._add_child_elements(element, pic, out_doc)
 
             elif isinstance(element, ContainerElement):  # Form, KV region
-                pass
-                # TODO: handle element.cluster.children.
+                label = element.label
+                group_label = GroupLabel.UNSPECIFIED
+                if label == DocItemLabel.FORM:
+                    group_label = GroupLabel.FORM_AREA
+                elif label == DocItemLabel.KEY_VALUE_REGION:
+                    group_label = GroupLabel.KEY_VALUE_AREA
+
+                container_el = out_doc.add_group(label=group_label)
+
+                self._add_child_elements(element, container_el, out_doc)
 
         return out_doc
 
@@ -284,7 +321,7 @@ def _merge_elements(self, element, merged_elem, new_item, page_height):
             bbox=element.cluster.bbox.to_bottom_left_origin(page_height),
         )
         new_item.text += f" {merged_elem.text}"
-        new_item.orig += f" {merged_elem.text}"  # TODO: This is incomplete.
+        new_item.orig += f" {merged_elem.text}"  # TODO: This is incomplete, we don't have the `orig` field of the merged element.
         new_item.prov.append(prov)
 
     def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
 
@@ -75,7 +75,7 @@ nav:
       - "Figure enrichment": examples/develop_picture_enrichment.py
       - "Table export": examples/export_tables.py
       - "Multimodal export": examples/export_multimodal.py
-      - "Annotate picture with local vlm": examples/pictures_description.py
+      - "Annotate picture with local vlm": examples/pictures_description.ipynb
       - "Annotate picture with remote vlm": examples/pictures_description_api.py
       - "Force full page OCR": examples/full_page_ocr.py
       - "Automatic OCR language detection with tesseract": examples/tesseract_lang_detection.py