1
1
import json
2
2
import os
3
3
import re
4
+ from collections import defaultdict
5
+
6
+ from tqdm import tqdm
4
7
5
8
from .image_utils import load_labelmap
6
9
@@ -96,29 +99,46 @@ def _map_annotations_to_images_1to1(images, annotations):
96
99
return countmapped > 0
97
100
98
101
99
- def _map_annotations_to_images_1tomany (images , annotations ):
100
- annotationsByDirname = {}
101
- for ann in annotations :
102
- dirname = ann ["dirname" ]
103
- annotationsByDirname .setdefault (dirname , []).append (ann )
104
- for image in images :
102
+ def _map_annotations_to_images_1tomany (images , annotationFiles ):
103
+ annotationsByDirname = _list_map (annotationFiles , "dirname" )
104
+ imgRefMap , annotationMap = _build_image_and_annotation_maps (annotationFiles )
105
+
106
+ for image in tqdm (images ):
105
107
dirname = image ["dirname" ]
106
108
annotationsInSameDir = annotationsByDirname .get (dirname , [])
107
109
if annotationsInSameDir :
108
110
if len (annotationsInSameDir ) > 1 :
109
111
print (f"warning: found multiple annotation files on dir { dirname } " )
110
- annotation = annotationsInSameDir [0 ]
111
- format = annotation ["parsedType" ]
112
- image ["annotationfile" ] = _filterIndividualAnnotations (image , annotation , format )
112
+ annotationFile = annotationsInSameDir [0 ]
113
+ format = annotationFile ["parsedType" ]
114
+ image ["annotationfile" ] = _filterIndividualAnnotations (
115
+ image , annotationFile , format , imgRefMap , annotationMap
116
+ )
117
+
118
+
119
+ def _build_image_and_annotation_maps (annotationFiles ):
120
+ imgRefMap = {}
121
+ annotationMap = defaultdict (list )
122
+ for annFile in annotationFiles :
123
+ filename , dirname , parsed , parsedType = (
124
+ annFile ["file" ],
125
+ annFile ["dirname" ],
126
+ annFile ["parsed" ],
127
+ annFile ["parsedType" ],
128
+ )
129
+ if parsedType == "coco" :
130
+ for imageRef in parsed ["images" ]:
131
+ imgRefMap [f"{ filename } /{ imageRef ['file_name' ]} " ] = imageRef
132
+ for annotation in parsed ["annotations" ]:
133
+ annotationMap [f"{ dirname } /{ annotation ['image_id' ]} " ].append (annotation )
134
+ return imgRefMap , annotationMap
113
135
114
136
115
- def _filterIndividualAnnotations (image , annotation , format ):
137
+ def _filterIndividualAnnotations (image , annotation , format , imgRefMap , annotationMap ):
116
138
parsed = annotation ["parsed" ]
117
139
if format == "coco" :
118
- imgReferences = [i for i in parsed ["images" ] if i ["file_name" ] == image ["name" ]]
119
- if len (imgReferences ) > 1 :
120
- print (f"warning: found multiple image entries for image { image ['file' ]} in { annotation ['file' ]} " )
121
- if imgReferences :
140
+ imgReference = imgRefMap .get (f"{ annotation ['file' ]} /{ image ['name' ]} " )
141
+ if imgReference :
122
142
# workaround to make Annotations.js correctly identify this as coco in the backend
123
143
fake_annotation = {
124
144
"id" : 999999999 ,
@@ -128,16 +148,15 @@ def _filterIndividualAnnotations(image, annotation, format):
128
148
"segmentation" : [],
129
149
"iscrowd" : 0 ,
130
150
}
131
- imgReference = imgReferences [0 ]
132
151
_annotation = {"name" : "annotation.coco.json" }
152
+ annotations_for_image = annotationMap .get (f"{ image ['dirname' ]} /{ imgReference ['id' ]} " , [])
133
153
_annotation ["rawText" ] = json .dumps (
134
154
{
135
155
"info" : parsed ["info" ],
136
156
"licenses" : parsed ["licenses" ],
137
157
"categories" : parsed ["categories" ],
138
158
"images" : [imgReference ],
139
- "annotations" : [a for a in parsed ["annotations" ] if a ["image_id" ] == imgReference ["id" ]]
140
- or [fake_annotation ],
159
+ "annotations" : annotations_for_image or [fake_annotation ],
141
160
}
142
161
)
143
162
return _annotation
@@ -241,3 +260,10 @@ def _decide_split(images):
241
260
i ["split" ] = "test"
242
261
else :
243
262
i ["split" ] = "train"
263
+
264
+
265
+ def _list_map (my_list , key ):
266
+ d = {}
267
+ for i in my_list :
268
+ d .setdefault (i [key ], []).append (i )
269
+ return d
0 commit comments