11import json
22import os
33import re
4+ from collections import defaultdict
5+
6+ from tqdm import tqdm
47
58from .image_utils import load_labelmap
69
@@ -96,29 +99,46 @@ def _map_annotations_to_images_1to1(images, annotations):
9699 return countmapped > 0
97100
98101
99- def _map_annotations_to_images_1tomany (images , annotations ):
100- annotationsByDirname = {}
101- for ann in annotations :
102- dirname = ann ["dirname" ]
103- annotationsByDirname .setdefault (dirname , []).append (ann )
104- for image in images :
102+ def _map_annotations_to_images_1tomany (images , annotationFiles ):
103+ annotationsByDirname = _list_map (annotationFiles , "dirname" )
104+ imgRefMap , annotationMap = _build_image_and_annotation_maps (annotationFiles )
105+
106+ for image in tqdm (images ):
105107 dirname = image ["dirname" ]
106108 annotationsInSameDir = annotationsByDirname .get (dirname , [])
107109 if annotationsInSameDir :
108110 if len (annotationsInSameDir ) > 1 :
109111 print (f"warning: found multiple annotation files on dir { dirname } " )
110- annotation = annotationsInSameDir [0 ]
111- format = annotation ["parsedType" ]
112- image ["annotationfile" ] = _filterIndividualAnnotations (image , annotation , format )
112+ annotationFile = annotationsInSameDir [0 ]
113+ format = annotationFile ["parsedType" ]
114+ image ["annotationfile" ] = _filterIndividualAnnotations (
115+ image , annotationFile , format , imgRefMap , annotationMap
116+ )
117+
118+
119+ def _build_image_and_annotation_maps (annotationFiles ):
120+ imgRefMap = {}
121+ annotationMap = defaultdict (list )
122+ for annFile in annotationFiles :
123+ filename , dirname , parsed , parsedType = (
124+ annFile ["file" ],
125+ annFile ["dirname" ],
126+ annFile ["parsed" ],
127+ annFile ["parsedType" ],
128+ )
129+ if parsedType == "coco" :
130+ for imageRef in parsed ["images" ]:
131+ imgRefMap [f"{ filename } /{ imageRef ['file_name' ]} " ] = imageRef
132+ for annotation in parsed ["annotations" ]:
133+ annotationMap [f"{ dirname } /{ annotation ['image_id' ]} " ].append (annotation )
134+ return imgRefMap , annotationMap
113135
114136
115- def _filterIndividualAnnotations (image , annotation , format ):
137+ def _filterIndividualAnnotations (image , annotation , format , imgRefMap , annotationMap ):
116138 parsed = annotation ["parsed" ]
117139 if format == "coco" :
118- imgReferences = [i for i in parsed ["images" ] if i ["file_name" ] == image ["name" ]]
119- if len (imgReferences ) > 1 :
120- print (f"warning: found multiple image entries for image { image ['file' ]} in { annotation ['file' ]} " )
121- if imgReferences :
140+ imgReference = imgRefMap .get (f"{ annotation ['file' ]} /{ image ['name' ]} " )
141+ if imgReference :
122142 # workaround to make Annotations.js correctly identify this as coco in the backend
123143 fake_annotation = {
124144 "id" : 999999999 ,
@@ -128,16 +148,15 @@ def _filterIndividualAnnotations(image, annotation, format):
128148 "segmentation" : [],
129149 "iscrowd" : 0 ,
130150 }
131- imgReference = imgReferences [0 ]
132151 _annotation = {"name" : "annotation.coco.json" }
152+ annotations_for_image = annotationMap .get (f"{ image ['dirname' ]} /{ imgReference ['id' ]} " , [])
133153 _annotation ["rawText" ] = json .dumps (
134154 {
135155 "info" : parsed ["info" ],
136156 "licenses" : parsed ["licenses" ],
137157 "categories" : parsed ["categories" ],
138158 "images" : [imgReference ],
139- "annotations" : [a for a in parsed ["annotations" ] if a ["image_id" ] == imgReference ["id" ]]
140- or [fake_annotation ],
159+ "annotations" : annotations_for_image or [fake_annotation ],
141160 }
142161 )
143162 return _annotation
@@ -241,3 +260,10 @@ def _decide_split(images):
241260 i ["split" ] = "test"
242261 else :
243262 i ["split" ] = "train"
263+
264+
265+ def _list_map (my_list , key ):
266+ d = {}
267+ for i in my_list :
268+ d .setdefault (i [key ], []).append (i )
269+ return d
0 commit comments