Skip to content

Code for layout parser #34

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open

Code for layout parser #34

wants to merge 1 commit into from

Conversation

Yashsethi24
Copy link

Hey! Excellent work.
But can you please share the code which you used to get the annotations of the images?

@vladamisici
Copy link

vladamisici commented May 23, 2025

Hey! Excellent work. But can you please share the code which you used to get the annotations of the images?

Hi. Here's a snippet for that:

#!/usr/bin/env python3
"""
Usage:
    python publaynet_annotations_extractor.py --input path/to/train.json [--output annotations.json] [--limit 100]

"""
import json
import argparse
from pathlib import Path

from pycocotools.coco import COCO

# PubLayNet uses COCO categories but with document layout labels
CATEGORY_NAMES = {
    1: "text",
    2: "title",
    3: "list",
    4: "table",
    5: "figure",
}

def parse_args():
    parser = argparse.ArgumentParser(
        description="Load PubLayNet annotations and extract per-image annotation lists."
    )
    parser.add_argument(
        "-i", "--input", required=True,
        help="Path to the COCO-format PubLayNet JSON (train.json, val.json, or test.json)"
    )
    parser.add_argument(
        "-o", "--output", default="publaynet_image_annotations.json",
        help="Where to save the output JSON mapping"
    )
    parser.add_argument(
        "-l", "--limit", type=int,
        help="Process only the first N images (for quick testing)"
    )
    return parser.parse_args()

def load_coco_annotations(json_path: Path) -> COCO:
    """
    Load and return a COCO object from the given JSON file.
    """
    if not json_path.exists():
        raise FileNotFoundError(f"Annotation file not found: {json_path}")
    return COCO(str(json_path))

def gather_image_annotations(coco: COCO, limit: int = None) -> dict:
    """
    Return a dict mapping image filenames to their list of annotations.

    Each annotation contains:
      - bbox: [x, y, width, height]
      - category: human-readable name
      - area: float
      - iscrowd: 0 or 1
    """
    image_ids = coco.getImgIds()
    if limit:
        image_ids = image_ids[:limit]

    records = {}
    for img_id in image_ids:
        info = coco.loadImgs(img_id)[0]
        filename = info.get("file_name", f"<id_{img_id}>")

        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)

        annotations = []
        for ann in anns:
            annotations.append({
                "bbox": ann["bbox"],
                "category": CATEGORY_NAMES.get(ann["category_id"], "unknown"),
                "area": ann.get("area", 0),
                "iscrowd": ann.get("iscrowd", 0),
            })

        records[filename] = annotations

    return records

def save_annotations(data: dict, out_path: Path) -> None:
    """
    Write the collected annotation data to a JSON file.
    """
    with out_path.open("w", encoding="utf-8") as f:
        json.dump(data, f, indent=2)
    print(f"Saved annotations for {len(data)} images to {out_path}")

def main():
    args = parse_args()
    input_path = Path(args.input)
    output_path = Path(args.output)

    print(f"Loading annotations from {input_path}...")
    coco = load_coco_annotations(input_path)

    print("Gathering per-image annotation data...")
    image_data = gather_image_annotations(coco, limit=args.limit)

    save_annotations(image_data, output_path)

if __name__ == "__main__":
    main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants