Skip to content

Commit 2a8917a

Browse files
authored
Merge pull request #246 from roboflow/import-csv-annotations
Add support for global csv annotation files
2 parents 70dc66c + d842a1b commit 2a8917a

File tree

9 files changed

+65
-23
lines changed

9 files changed

+65
-23
lines changed

roboflow/core/project.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -528,9 +528,9 @@ def single_upload(
528528

529529
def _annotation_params(self, annotation_path):
530530
annotation_name, annotation_string = None, None
531-
if isinstance(annotation_path, dict):
531+
if isinstance(annotation_path, dict) and annotation_path.get("rawText"):
532532
annotation_name = annotation_path["name"]
533-
annotation_string = json.dumps(annotation_path["parsed"])
533+
annotation_string = annotation_path["rawText"]
534534
elif os.path.exists(annotation_path):
535535
with open(annotation_path, "r"):
536536
annotation_string = open(annotation_path, "r").read()

roboflow/core/workspace.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,8 @@ def _upload_image(imagedesc):
341341
labelmap = None
342342
annotationdesc = imagedesc.get("annotationfile")
343343
if annotationdesc:
344-
if annotationdesc.get("parsed"):
345-
annotation_path = {"name": annotationdesc["name"], "parsed": annotationdesc["parsed"]}
344+
if annotationdesc.get("rawText"):
345+
annotation_path = annotationdesc
346346
else:
347347
annotation_path = f"{location}{annotationdesc['file']}"
348348
labelmap = annotationdesc.get("labelmap")

roboflow/util/folderparser.py

+41-16
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from .image_utils import load_labelmap
66

77
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp"}
8-
ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml"}
8+
ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml", ".csv"}
99
LABELMAPS_EXTENSIONS = {".labels", ".yaml", ".yml"}
1010

1111

@@ -126,18 +126,17 @@ def _filterIndividualAnnotations(image, annotation, format):
126126
"iscrowd": 0,
127127
}
128128
imgReference = imgReferences[0]
129-
_annotation = {
130-
"name": "annotation.coco.json",
131-
"parsedType": "coco",
132-
"parsed": {
129+
_annotation = {"name": "annotation.coco.json"}
130+
_annotation["rawText"] = json.dumps(
131+
{
133132
"info": parsed["info"],
134133
"licenses": parsed["licenses"],
135134
"categories": parsed["categories"],
136135
"images": [imgReference],
137136
"annotations": [a for a in parsed["annotations"] if a["image_id"] == imgReference["id"]]
138137
or [fake_annotation],
139-
},
140-
}
138+
}
139+
)
141140
return _annotation
142141
elif format == "createml":
143142
imgReferences = [i for i in parsed if i["image"] == image["name"]]
@@ -147,27 +146,53 @@ def _filterIndividualAnnotations(image, annotation, format):
147146
imgReference = imgReferences[0]
148147
_annotation = {
149148
"name": "annotation.createml.json",
150-
"parsedType": "createml",
151-
"parsed": [imgReference],
149+
"rawText": json.dumps([imgReference]),
150+
}
151+
return _annotation
152+
elif format == "csv":
153+
imgLines = [ld["line"] for ld in parsed["lines"] if ld["file_name"] == image["name"]]
154+
if imgLines:
155+
headers = parsed["headers"]
156+
_annotation = {
157+
"name": "annotation.csv",
158+
"rawText": "".join([headers] + imgLines),
152159
}
153160
return _annotation
161+
else:
162+
return None
154163
return None
155164

156165

157166
def _loadAnnotations(folder, annotations):
158-
valid_extensions = {".json"}
167+
valid_extensions = {".json", ".csv"}
159168
annotations = [a for a in annotations if a["extension"] in valid_extensions]
160169
for ann in annotations:
161170
extension = ann["extension"]
162-
with open(f"{folder}{ann['file']}", "r") as f:
163-
parsed = json.load(f)
164-
parsedType = _guessAnnotationFileFormat(parsed, extension)
165-
if parsedType:
166-
ann["parsed"] = parsed
167-
ann["parsedType"] = parsedType
171+
if extension == ".json":
172+
with open(f"{folder}{ann['file']}", "r") as f:
173+
parsed = json.load(f)
174+
parsedType = _guessAnnotationFileFormat(parsed, extension)
175+
if parsedType:
176+
ann["parsed"] = parsed
177+
ann["parsedType"] = parsedType
178+
elif extension == ".csv":
179+
ann["parsedType"] = "csv"
180+
ann["parsed"] = _parseAnnotationCSV(f"{folder}{ann['file']}")
168181
return annotations
169182

170183

184+
def _parseAnnotationCSV(filename):
185+
# TODO: use a proper CSV library?
186+
with open(filename, "r") as f:
187+
lines = f.readlines()
188+
headers = lines[0]
189+
lines = [{"file_name": ld.split(",")[0].strip(), "line": ld} for ld in lines[1:]]
190+
return {
191+
"headers": headers,
192+
"lines": lines,
193+
}
194+
195+
171196
def _guessAnnotationFileFormat(parsed, extension):
172197
if extension == ".json":
173198
if isinstance(parsed, dict):
+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
img_fName,img_w,img_h,class_label,bbx_xtl,bbx_ytl,bbx_xbr,bbx_ybr
2+
train_10307.jpeg,3024,4032,culex,1459,1389,1826,2062
3+
train_10308.jpeg,1058,943,japonicus/koreicus,28,187,908,815
4+
train_10309.jpeg,1024,1365,culex,304,438,614,785
5+
train_10310.jpeg,2976,3968,albopictus,1900,1280,2163,1653
3.38 MB
Loading
199 KB
Loading
37.2 KB
Loading

tests/manual/debugme.py

+1
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,6 @@
4040
# f"import {thisdir}/data/cultura-pepino-yolov8 -w wolfodorpythontests -p yellow-auto -c 100".split() # noqa: E501 // docs
4141
# f"import {thisdir}/data/cultura-pepino-yolov8_voc -w wolfodorpythontests -p yellow-auto -c 100".split() # noqa: E501 // docs
4242
f"import {thisdir}/data/cultura-pepino-yolov5pytorch -w wolfodorpythontests -p yellow-auto -c 100 -n papaiasso".split() # noqa: E501 // docs
43+
# f"import {thisdir}/../datasets/mosquitos -w wolfodorpythontests -p yellow-auto -n papaiasso".split() # noqa: E501 // docs
4344
)
4445
args.func(args)

tests/util/test_folderparser.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@ def test_parse_sharks_coco(self):
1818
parsed = folderparser.parsefolder(sharksfolder)
1919
testImagePath = "/train/sharks_mp4-20_jpg.rf.90ba2e8e9ca0613f71359efb7ed48b26.jpg"
2020
testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
21-
assert len(testImage["annotationfile"]["parsed"]["annotations"]) == 5
21+
assert len(json.loads(testImage["annotationfile"]["rawText"])["annotations"]) == 5
2222

2323
def test_parse_sharks_createml(self):
2424
sharksfolder = f"{thisdir}/../datasets/sharks-tiny-createml"
2525
parsed = folderparser.parsefolder(sharksfolder)
2626
testImagePath = "/train/sharks_mp4-20_jpg.rf.5359121123e86e016401ea2731e47949.jpg"
2727
testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
28-
assert len(testImage["annotationfile"]["parsed"]) == 1
29-
imgReference = testImage["annotationfile"]["parsed"][0]
28+
imgParsedAnnotations = json.loads(testImage["annotationfile"]["rawText"])
29+
assert len(imgParsedAnnotations) == 1
30+
imgReference = imgParsedAnnotations[0]
3031
assert len(imgReference["annotations"]) == 5
3132

3233
def test_parse_sharks_yolov9(self):
@@ -38,6 +39,16 @@ def test_parse_sharks_yolov9(self):
3839
assert testImage["annotationfile"]["file"] == expectAnnotationFile
3940
assert testImage["annotationfile"]["labelmap"] == {0: "fish", 1: "primary", 2: "shark"}
4041

42+
def test_parse_mosquitos_csv(self):
43+
sharksfolder = f"{thisdir}/../datasets/mosquitos"
44+
parsed = folderparser.parsefolder(sharksfolder)
45+
testImagePath = "/train_10308.jpeg"
46+
testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
47+
assert testImage["annotationfile"]["name"] == "annotation.csv"
48+
expected = "img_fName,img_w,img_h,class_label,bbx_xtl,bbx_ytl,bbx_xbr,bbx_ybr\n"
49+
expected += "train_10308.jpeg,1058,943,japonicus/koreicus,28,187,908,815\n"
50+
assert testImage["annotationfile"]["rawText"] == expected
51+
4152

4253
def _assertJsonMatchesFile(actual, filename):
4354
with open(filename, "r") as file:

0 commit comments

Comments
 (0)