Skip to content

Commit 9c716fb

Browse files
authored
Merge pull request #255 from roboflow/bug/hangging-upload
bugfix: CLI hangs with super big dataset
2 parents 244938d + 02252c3 commit 9c716fb

File tree

2 files changed

+44
-18
lines changed

2 files changed

+44
-18
lines changed

roboflow/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from roboflow.models import CLIPModel, GazeModel # noqa: F401
1515
from roboflow.util.general import write_line
1616

17-
__version__ = "1.1.28"
17+
__version__ = "1.1.29"
1818

1919

2020
def check_key(api_key, model, notebook, num_retries=0):

roboflow/util/folderparser.py

+43-17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import json
22
import os
33
import re
4+
from collections import defaultdict
5+
6+
from tqdm import tqdm
47

58
from .image_utils import load_labelmap
69

@@ -96,29 +99,46 @@ def _map_annotations_to_images_1to1(images, annotations):
9699
return countmapped > 0
97100

98101

99-
def _map_annotations_to_images_1tomany(images, annotations):
100-
annotationsByDirname = {}
101-
for ann in annotations:
102-
dirname = ann["dirname"]
103-
annotationsByDirname.setdefault(dirname, []).append(ann)
104-
for image in images:
102+
def _map_annotations_to_images_1tomany(images, annotationFiles):
103+
annotationsByDirname = _list_map(annotationFiles, "dirname")
104+
imgRefMap, annotationMap = _build_image_and_annotation_maps(annotationFiles)
105+
106+
for image in tqdm(images):
105107
dirname = image["dirname"]
106108
annotationsInSameDir = annotationsByDirname.get(dirname, [])
107109
if annotationsInSameDir:
108110
if len(annotationsInSameDir) > 1:
109111
print(f"warning: found multiple annotation files on dir {dirname}")
110-
annotation = annotationsInSameDir[0]
111-
format = annotation["parsedType"]
112-
image["annotationfile"] = _filterIndividualAnnotations(image, annotation, format)
112+
annotationFile = annotationsInSameDir[0]
113+
format = annotationFile["parsedType"]
114+
image["annotationfile"] = _filterIndividualAnnotations(
115+
image, annotationFile, format, imgRefMap, annotationMap
116+
)
117+
118+
119+
def _build_image_and_annotation_maps(annotationFiles):
120+
imgRefMap = {}
121+
annotationMap = defaultdict(list)
122+
for annFile in annotationFiles:
123+
filename, dirname, parsed, parsedType = (
124+
annFile["file"],
125+
annFile["dirname"],
126+
annFile["parsed"],
127+
annFile["parsedType"],
128+
)
129+
if parsedType == "coco":
130+
for imageRef in parsed["images"]:
131+
imgRefMap[f"{filename}/{imageRef['file_name']}"] = imageRef
132+
for annotation in parsed["annotations"]:
133+
annotationMap[f"{dirname}/{annotation['image_id']}"].append(annotation)
134+
return imgRefMap, annotationMap
113135

114136

115-
def _filterIndividualAnnotations(image, annotation, format):
137+
def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotationMap):
116138
parsed = annotation["parsed"]
117139
if format == "coco":
118-
imgReferences = [i for i in parsed["images"] if i["file_name"] == image["name"]]
119-
if len(imgReferences) > 1:
120-
print(f"warning: found multiple image entries for image {image['file']} in {annotation['file']}")
121-
if imgReferences:
140+
imgReference = imgRefMap.get(f"{annotation['file']}/{image['name']}")
141+
if imgReference:
122142
# workaround to make Annotations.js correctly identify this as coco in the backend
123143
fake_annotation = {
124144
"id": 999999999,
@@ -128,16 +148,15 @@ def _filterIndividualAnnotations(image, annotation, format):
128148
"segmentation": [],
129149
"iscrowd": 0,
130150
}
131-
imgReference = imgReferences[0]
132151
_annotation = {"name": "annotation.coco.json"}
152+
annotations_for_image = annotationMap.get(f"{image['dirname']}/{imgReference['id']}", [])
133153
_annotation["rawText"] = json.dumps(
134154
{
135155
"info": parsed["info"],
136156
"licenses": parsed["licenses"],
137157
"categories": parsed["categories"],
138158
"images": [imgReference],
139-
"annotations": [a for a in parsed["annotations"] if a["image_id"] == imgReference["id"]]
140-
or [fake_annotation],
159+
"annotations": annotations_for_image or [fake_annotation],
141160
}
142161
)
143162
return _annotation
@@ -241,3 +260,10 @@ def _decide_split(images):
241260
i["split"] = "test"
242261
else:
243262
i["split"] = "train"
263+
264+
265+
def _list_map(my_list, key):
266+
d = {}
267+
for i in my_list:
268+
d.setdefault(i[key], []).append(i)
269+
return d

0 commit comments

Comments
 (0)