Merge pull request #246 from roboflow/import-csv-annotations

tonylampada · web-flow · commit 2a8917a70a42 · 2024-04-26T12:08:33.000-03:00
Add support for global csv annotation files
diff --git a/roboflow/core/project.py b/roboflow/core/project.py
@@ -528,9 +528,9 @@ def single_upload(
 
     def _annotation_params(self, annotation_path):
         annotation_name, annotation_string = None, None
-        if isinstance(annotation_path, dict):
+        if isinstance(annotation_path, dict) and annotation_path.get("rawText"):
             annotation_name = annotation_path["name"]
-            annotation_string = json.dumps(annotation_path["parsed"])
+            annotation_string = annotation_path["rawText"]
         elif os.path.exists(annotation_path):
             with open(annotation_path, "r"):
                 annotation_string = open(annotation_path, "r").read()
diff --git a/roboflow/core/workspace.py b/roboflow/core/workspace.py
@@ -341,8 +341,8 @@ def _upload_image(imagedesc):
             labelmap = None
             annotationdesc = imagedesc.get("annotationfile")
             if annotationdesc:
-                if annotationdesc.get("parsed"):
-                    annotation_path = {"name": annotationdesc["name"], "parsed": annotationdesc["parsed"]}
+                if annotationdesc.get("rawText"):
+                    annotation_path = annotationdesc
                 else:
                     annotation_path = f"{location}{annotationdesc['file']}"
                 labelmap = annotationdesc.get("labelmap")
diff --git a/roboflow/util/folderparser.py b/roboflow/util/folderparser.py
@@ -5,7 +5,7 @@
 from .image_utils import load_labelmap
 
 IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp"}
-ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml"}
+ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml", ".csv"}
 LABELMAPS_EXTENSIONS = {".labels", ".yaml", ".yml"}
 
 
@@ -126,18 +126,17 @@ def _filterIndividualAnnotations(image, annotation, format):
                 "iscrowd": 0,
             }
             imgReference = imgReferences[0]
-            _annotation = {
-                "name": "annotation.coco.json",
-                "parsedType": "coco",
-                "parsed": {
+            _annotation = {"name": "annotation.coco.json"}
+            _annotation["rawText"] = json.dumps(
+                {
                     "info": parsed["info"],
                     "licenses": parsed["licenses"],
                     "categories": parsed["categories"],
                     "images": [imgReference],
                     "annotations": [a for a in parsed["annotations"] if a["image_id"] == imgReference["id"]]
                     or [fake_annotation],
-                },
-            }
+                }
+            )
             return _annotation
     elif format == "createml":
         imgReferences = [i for i in parsed if i["image"] == image["name"]]
@@ -147,27 +146,53 @@ def _filterIndividualAnnotations(image, annotation, format):
             imgReference = imgReferences[0]
             _annotation = {
                 "name": "annotation.createml.json",
-                "parsedType": "createml",
-                "parsed": [imgReference],
+                "rawText": json.dumps([imgReference]),
+            }
+            return _annotation
+    elif format == "csv":
+        imgLines = [ld["line"] for ld in parsed["lines"] if ld["file_name"] == image["name"]]
+        if imgLines:
+            headers = parsed["headers"]
+            _annotation = {
+                "name": "annotation.csv",
+                "rawText": "".join([headers] + imgLines),
             }
             return _annotation
+        else:
+            return None
     return None
 
 
 def _loadAnnotations(folder, annotations):
-    valid_extensions = {".json"}
+    valid_extensions = {".json", ".csv"}
     annotations = [a for a in annotations if a["extension"] in valid_extensions]
     for ann in annotations:
         extension = ann["extension"]
-        with open(f"{folder}{ann['file']}", "r") as f:
-            parsed = json.load(f)
-            parsedType = _guessAnnotationFileFormat(parsed, extension)
-            if parsedType:
-                ann["parsed"] = parsed
-                ann["parsedType"] = parsedType
+        if extension == ".json":
+            with open(f"{folder}{ann['file']}", "r") as f:
+                parsed = json.load(f)
+                parsedType = _guessAnnotationFileFormat(parsed, extension)
+                if parsedType:
+                    ann["parsed"] = parsed
+                    ann["parsedType"] = parsedType
+        elif extension == ".csv":
+            ann["parsedType"] = "csv"
+            ann["parsed"] = _parseAnnotationCSV(f"{folder}{ann['file']}")
     return annotations
 
 
+def _parseAnnotationCSV(filename):
+    # TODO: use a proper CSV library?
+    with open(filename, "r") as f:
+        lines = f.readlines()
+    headers = lines[0]
+    lines = [{"file_name": ld.split(",")[0].strip(), "line": ld} for ld in lines[1:]]
+    return {
+        "headers": headers,
+        "lines": lines,
+    }
+
+
 def _guessAnnotationFileFormat(parsed, extension):
     if extension == ".json":
         if isinstance(parsed, dict):
diff --git a/tests/datasets/mosquitos/mosquitos.csv b/tests/datasets/mosquitos/mosquitos.csv
@@ -0,0 +1,5 @@
+img_fName,img_w,img_h,class_label,bbx_xtl,bbx_ytl,bbx_xbr,bbx_ybr
+train_10307.jpeg,3024,4032,culex,1459,1389,1826,2062
+train_10308.jpeg,1058,943,japonicus/koreicus,28,187,908,815
+train_10309.jpeg,1024,1365,culex,304,438,614,785
+train_10310.jpeg,2976,3968,albopictus,1900,1280,2163,1653
diff --git a/tests/datasets/mosquitos/train_10307.jpeg b/tests/datasets/mosquitos/train_10307.jpeg
diff --git a/tests/datasets/mosquitos/train_10308.jpeg b/tests/datasets/mosquitos/train_10308.jpeg
diff --git a/tests/datasets/mosquitos/train_10309.jpeg b/tests/datasets/mosquitos/train_10309.jpeg
diff --git a/tests/manual/debugme.py b/tests/manual/debugme.py
@@ -40,5 +40,6 @@
         # f"import {thisdir}/data/cultura-pepino-yolov8 -w wolfodorpythontests -p yellow-auto -c 100".split()  # noqa: E501 // docs
         # f"import {thisdir}/data/cultura-pepino-yolov8_voc -w wolfodorpythontests -p yellow-auto -c 100".split()  # noqa: E501 // docs
         f"import {thisdir}/data/cultura-pepino-yolov5pytorch -w wolfodorpythontests -p yellow-auto -c 100 -n papaiasso".split()  # noqa: E501 // docs
+        # f"import {thisdir}/../datasets/mosquitos -w wolfodorpythontests -p yellow-auto -n papaiasso".split()  # noqa: E501 // docs
     )
     args.func(args)
diff --git a/tests/util/test_folderparser.py b/tests/util/test_folderparser.py
@@ -18,15 +18,16 @@ def test_parse_sharks_coco(self):
         parsed = folderparser.parsefolder(sharksfolder)
         testImagePath = "/train/sharks_mp4-20_jpg.rf.90ba2e8e9ca0613f71359efb7ed48b26.jpg"
         testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
-        assert len(testImage["annotationfile"]["parsed"]["annotations"]) == 5
+        assert len(json.loads(testImage["annotationfile"]["rawText"])["annotations"]) == 5
 
     def test_parse_sharks_createml(self):
         sharksfolder = f"{thisdir}/../datasets/sharks-tiny-createml"
         parsed = folderparser.parsefolder(sharksfolder)
         testImagePath = "/train/sharks_mp4-20_jpg.rf.5359121123e86e016401ea2731e47949.jpg"
         testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
-        assert len(testImage["annotationfile"]["parsed"]) == 1
-        imgReference = testImage["annotationfile"]["parsed"][0]
+        imgParsedAnnotations = json.loads(testImage["annotationfile"]["rawText"])
+        assert len(imgParsedAnnotations) == 1
+        imgReference = imgParsedAnnotations[0]
         assert len(imgReference["annotations"]) == 5
 
     def test_parse_sharks_yolov9(self):
@@ -38,6 +39,16 @@ def test_parse_sharks_yolov9(self):
         assert testImage["annotationfile"]["file"] == expectAnnotationFile
         assert testImage["annotationfile"]["labelmap"] == {0: "fish", 1: "primary", 2: "shark"}
 
+    def test_parse_mosquitos_csv(self):
+        sharksfolder = f"{thisdir}/../datasets/mosquitos"
+        parsed = folderparser.parsefolder(sharksfolder)
+        testImagePath = "/train_10308.jpeg"
+        testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
+        assert testImage["annotationfile"]["name"] == "annotation.csv"
+        expected = "img_fName,img_w,img_h,class_label,bbx_xtl,bbx_ytl,bbx_xbr,bbx_ybr\n"
+        expected += "train_10308.jpeg,1058,943,japonicus/koreicus,28,187,908,815\n"
+        assert testImage["annotationfile"]["rawText"] == expected
+
 
 def _assertJsonMatchesFile(actual, filename):
     with open(filename, "r") as file:

Original file line number	Diff line number	Diff line change
`@@ -40,5 +40,6 @@`
`40`	`40`	`# f"import {thisdir}/data/cultura-pepino-yolov8 -w wolfodorpythontests -p yellow-auto -c 100".split() # noqa: E501 // docs`
`41`	`41`	`# f"import {thisdir}/data/cultura-pepino-yolov8_voc -w wolfodorpythontests -p yellow-auto -c 100".split() # noqa: E501 // docs`
`42`	`42`	`f"import {thisdir}/data/cultura-pepino-yolov5pytorch -w wolfodorpythontests -p yellow-auto -c 100 -n papaiasso".split() # noqa: E501 // docs`
	`43`	`+ # f"import {thisdir}/../datasets/mosquitos -w wolfodorpythontests -p yellow-auto -n papaiasso".split() # noqa: E501 // docs`
`43`	`44`	`)`
`44`	`45`	`args.func(args)`