Code for layout parser #34

Yashsethi24 · 2021-02-23T15:36:16Z

Hey! Excellent work.
But can you please share the code which you used to get the annotations of the images?

vladamisici · 2025-05-23T09:40:20Z

Hey! Excellent work. But can you please share the code which you used to get the annotations of the images?

Hi. Here's a snippet for that:

#!/usr/bin/env python3
"""
Usage:
    python publaynet_annotations_extractor.py --input path/to/train.json [--output annotations.json] [--limit 100]

"""
import json
import argparse
from pathlib import Path

from pycocotools.coco import COCO

# PubLayNet uses COCO categories but with document layout labels
CATEGORY_NAMES = {
    1: "text",
    2: "title",
    3: "list",
    4: "table",
    5: "figure",
}

def parse_args():
    parser = argparse.ArgumentParser(
        description="Load PubLayNet annotations and extract per-image annotation lists."
    )
    parser.add_argument(
        "-i", "--input", required=True,
        help="Path to the COCO-format PubLayNet JSON (train.json, val.json, or test.json)"
    )
    parser.add_argument(
        "-o", "--output", default="publaynet_image_annotations.json",
        help="Where to save the output JSON mapping"
    )
    parser.add_argument(
        "-l", "--limit", type=int,
        help="Process only the first N images (for quick testing)"
    )
    return parser.parse_args()

def load_coco_annotations(json_path: Path) -> COCO:
    """
    Load and return a COCO object from the given JSON file.
    """
    if not json_path.exists():
        raise FileNotFoundError(f"Annotation file not found: {json_path}")
    return COCO(str(json_path))

def gather_image_annotations(coco: COCO, limit: int = None) -> dict:
    """
    Return a dict mapping image filenames to their list of annotations.

    Each annotation contains:
      - bbox: [x, y, width, height]
      - category: human-readable name
      - area: float
      - iscrowd: 0 or 1
    """
    image_ids = coco.getImgIds()
    if limit:
        image_ids = image_ids[:limit]

    records = {}
    for img_id in image_ids:
        info = coco.loadImgs(img_id)[0]
        filename = info.get("file_name", f"<id_{img_id}>")

        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)

        annotations = []
        for ann in anns:
            annotations.append({
                "bbox": ann["bbox"],
                "category": CATEGORY_NAMES.get(ann["category_id"], "unknown"),
                "area": ann.get("area", 0),
                "iscrowd": ann.get("iscrowd", 0),
            })

        records[filename] = annotations

    return records

def save_annotations(data: dict, out_path: Path) -> None:
    """
    Write the collected annotation data to a JSON file.
    """
    with out_path.open("w", encoding="utf-8") as f:
        json.dump(data, f, indent=2)
    print(f"Saved annotations for {len(data)} images to {out_path}")

def main():
    args = parse_args()
    input_path = Path(args.input)
    output_path = Path(args.output)

    print(f"Loading annotations from {input_path}...")
    coco = load_coco_annotations(input_path)

    print("Gathering per-image annotation data...")
    image_data = gather_image_annotations(coco, limit=args.limit)

    save_annotations(image_data, output_path)

if __name__ == "__main__":
    main()

Create LICENSE.pretrained.model.md

5b8e10f

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Code for layout parser #34

Code for layout parser #34

Uh oh!

Yashsethi24 commented Feb 23, 2021

Uh oh!

vladamisici commented May 23, 2025 •

edited

Loading

Uh oh!

Uh oh!

Code for layout parser #34

Are you sure you want to change the base?

Code for layout parser #34

Uh oh!

Conversation

Yashsethi24 commented Feb 23, 2021

Uh oh!

vladamisici commented May 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

vladamisici commented May 23, 2025 •

edited

Loading