Merge pull request #244 from roboflow/upload-add-retry

tonylampada · web-flow · commit 75abcf78c765 · 2024-03-22T08:17:32.000-03:00
Reliability improvement for CLI upload
diff --git a/roboflow/__init__.py b/roboflow/__init__.py
@@ -14,7 +14,7 @@
 from roboflow.models import CLIPModel, GazeModel  # noqa: F401
 from roboflow.util.general import write_line
 
-__version__ = "1.1.24"
+__version__ = "1.1.25"
 
 
 def check_key(api_key, model, notebook, num_retries=0):
diff --git a/roboflow/core/project.py b/roboflow/core/project.py
@@ -11,7 +11,7 @@
 from roboflow.adapters import rfapi
 from roboflow.config import API_URL, DEMO_KEYS
 from roboflow.core.version import Version
-from roboflow.util.general import retry
+from roboflow.util.general import Retry
 from roboflow.util.image_utils import load_labelmap
 
 ACCEPTED_IMAGE_FORMATS = ["PNG", "JPEG"]
@@ -473,12 +473,12 @@ def single_upload(
             annotation_labelmap = load_labelmap(annotation_labelmap)
         uploaded_image, uploaded_annotation = None, None
         upload_time = None
+        upload_retry_attempts = 0
         if image_path:
             t0 = time.time()
             try:
+                retry = Retry(num_retry_uploads, Exception)
                 uploaded_image = retry(
-                    num_retry_uploads,
-                    Exception,
                     rfapi.upload_image,
                     self.__api_key,
                     project_url,
@@ -492,6 +492,7 @@ def single_upload(
                     **kwargs,
                 )
                 image_id = uploaded_image["id"]
+                upload_retry_attempts = retry.retries
             except BaseException as e:
                 uploaded_image = {"error": e}
             finally:
@@ -522,6 +523,7 @@ def single_upload(
             "annotation": uploaded_annotation,
             "upload_time": upload_time,
             "annotation_time": annotation_time,
+            "upload_retry_attempts": upload_retry_attempts,
         }
 
     def _annotation_params(self, annotation_path):
diff --git a/roboflow/core/workspace.py b/roboflow/core/workspace.py
@@ -275,6 +275,7 @@ def upload_dataset(
         project_license: str = "MIT",
         project_type: str = "object-detection",
         batch_name=None,
+        num_retries=0,
     ):
         """
         Upload a dataset to Roboflow.
@@ -309,12 +310,17 @@ def _log_img_upload(image_path, uploadres):
             image = uploadres.get("image")
             upload_time_str = f"[{uploadres['upload_time']:.1f}s]" if uploadres.get("upload_time") else ""
             annotation_time_str = f"[{uploadres['annotation_time']:.1f}s]" if uploadres.get("annotation_time") else ""
+            retry_attempts = (
+                f" (with {uploadres['upload_retry_attempts']} retries)"
+                if uploadres.get("upload_retry_attempts", 0) > 0
+                else ""
+            )
             if img_duplicate:
-                msg = f"[DUPLICATE] {image_path} ({image_id}) {upload_time_str}"
+                msg = f"[DUPLICATE]{retry_attempts} {image_path} ({image_id}) {upload_time_str}"
             elif img_success:
-                msg = f"[UPLOADED] {image_path} ({image_id}) {upload_time_str}"
+                msg = f"[UPLOADED]{retry_attempts} {image_path} ({image_id}) {upload_time_str}"
             else:
-                msg = f"[ERR] {image_path} ({image}) {upload_time_str}"
+                msg = f"[ERR]{retry_attempts} {image_path} ({image}) {upload_time_str}"
             if annotation:
                 if annotation.get("success"):
                     msg += f" / annotations = OK {annotation_time_str}"
@@ -349,6 +355,7 @@ def _upload_image(imagedesc):
                     sequence_number=imagedesc.get("index"),
                     sequence_size=len(images),
                     batch_name=batch_name,
+                    num_retry_uploads=num_retries,
                 )
                 _log_img_upload(image_path, uploadres)
             except Exception as e:
diff --git a/roboflow/roboflowpy.py b/roboflow/roboflowpy.py
@@ -48,7 +48,11 @@ def import_dataset(args):
     rf = roboflow.Roboflow()
     workspace = rf.workspace(args.workspace)
     workspace.upload_dataset(
-        dataset_path=args.folder, project_name=args.project, num_workers=args.concurrency, batch_name=args.batch_name
+        dataset_path=args.folder,
+        project_name=args.project,
+        num_workers=args.concurrency,
+        batch_name=args.batch_name,
+        num_retries=args.num_retries,
     )
 
 
@@ -263,6 +267,9 @@ def _add_import_parser(subparsers):
         dest="batch_name",
         help="name of batch to upload to within project",
     )
+    import_parser.add_argument(
+        "-r", dest="num_retries", type=int, help="Retry failed uploads this many times (default=0)", default=0
+    )
     import_parser.set_defaults(func=import_dataset)
 
 
diff --git a/roboflow/util/folderparser.py b/roboflow/util/folderparser.py
@@ -116,6 +116,15 @@ def _filterIndividualAnnotations(image, annotation, format):
         if len(imgReferences) > 1:
             print(f"warning: found multiple image entries for image {image['file']} in {annotation['file']}")
         if imgReferences:
+            # workaround to make Annotations.js correctly identify this as coco in the backend
+            fake_annotation = {
+                "id": 999999999,
+                "image_id": 999999999,
+                "category_id": 0,
+                "area": 1,
+                "segmentation": [],
+                "iscrowd": 0,
+            }
             imgReference = imgReferences[0]
             _annotation = {
                 "name": "annotation.coco.json",
@@ -125,7 +134,8 @@ def _filterIndividualAnnotations(image, annotation, format):
                     "licenses": parsed["licenses"],
                     "categories": parsed["categories"],
                     "images": [imgReference],
-                    "annotations": [a for a in parsed["annotations"] if a["image_id"] == imgReference["id"]],
+                    "annotations": [a for a in parsed["annotations"] if a["image_id"] == imgReference["id"]]
+                    or [fake_annotation],
                 },
             }
             return _annotation
diff --git a/roboflow/util/general.py b/roboflow/util/general.py
@@ -7,17 +7,25 @@ def write_line(line):
     sys.stdout.flush()
 
 
-def retry(max_retries, retry_on, func, *args, **kwargs):
-    if not retry_on:
-        retry_on = (Exception,)
-    retries = 0
-    while retries <= max_retries:
-        try:
-            return func(*args, **kwargs)
-        except BaseException as e:
-            if isinstance(e, retry_on):
-                retries += 1
-                if retries > max_retries:
+class Retry:
+    def __init__(self, max_retries, retry_on):
+        self.max_retries = max_retries
+        self.retry_on = retry_on
+        self.retries = 0
+
+    def __call__(self, func, *args, **kwargs):
+        self.retries = 0
+        retry_on = self.retry_on
+        if not retry_on:
+            retry_on = (Exception,)
+        self.retries = 0
+        while self.retries <= self.max_retries:
+            try:
+                return func(*args, **kwargs)
+            except BaseException as e:
+                if isinstance(e, retry_on):
+                    self.retries += 1
+                    if self.retries > self.max_retries:
+                        raise
+                else:
                     raise
-            else:
-                raise
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -161,8 +161,7 @@ def setUp(self):
         # Upload image
         responses.add(
             responses.POST,
-            f"{API_URL}/dataset/{PROJECT_NAME}/upload?api_key={ROBOFLOW_API_KEY}"
-            f"&batch={DEFAULT_BATCH_NAME}",
+            f"{API_URL}/dataset/{PROJECT_NAME}/upload?api_key={ROBOFLOW_API_KEY}" f"&batch={DEFAULT_BATCH_NAME}",
             json={"duplicate": True, "id": "hbALkCFdNr9rssgOUXug"},
             status=200,
         )
diff --git a/tests/models/test_object_detection.py b/tests/models/test_object_detection.py
@@ -47,9 +47,7 @@ def setUp(self):
         self.version_id = f"{self.workspace}/{self.dataset_id}/{self.version}"
 
     def test_init_sets_attributes(self):
-        instance = ObjectDetectionModel(
-            self.api_key, self.version_id, version=self.version
-        )
+        instance = ObjectDetectionModel(self.api_key, self.version_id, version=self.version)
 
         self.assertEqual(instance.id, self.version_id)
         # self.assertEqual(instance.api_url,
@@ -59,9 +57,7 @@ def test_init_sets_attributes(self):
     def test_predict_returns_prediction_group(self):
         print(self.api_url)
         image_path = "tests/images/rabbit.JPG"
-        instance = ObjectDetectionModel(
-            self.api_key, self.version_id, version=self.version
-        )
+        instance = ObjectDetectionModel(self.api_key, self.version_id, version=self.version)
 
         responses.add(responses.POST, self.api_url, json=MOCK_RESPONSE)
 
@@ -72,9 +68,7 @@ def test_predict_returns_prediction_group(self):
     @responses.activate
     def test_predict_with_local_image_request(self):
         image_path = "tests/images/rabbit.JPG"
-        instance = ObjectDetectionModel(
-            self.api_key, self.version_id, version=self.version
-        )
+        instance = ObjectDetectionModel(self.api_key, self.version_id, version=self.version)
 
         responses.add(responses.POST, self.api_url, json=MOCK_RESPONSE)
 
@@ -90,9 +84,7 @@ def test_predict_with_local_image_request(self):
     @responses.activate
     def test_predict_with_a_numpy_array_request(self):
         np_array = np.ones((100, 100, 1), dtype=np.uint8)
-        instance = ObjectDetectionModel(
-            self.api_key, self.version_id, version=self.version
-        )
+        instance = ObjectDetectionModel(self.api_key, self.version_id, version=self.version)
 
         responses.add(responses.POST, self.api_url, json=MOCK_RESPONSE)
 
@@ -107,9 +99,7 @@ def test_predict_with_a_numpy_array_request(self):
 
     def test_predict_with_local_wrong_image_request(self):
         image_path = "tests/images/not_an_image.txt"
-        instance = ObjectDetectionModel(
-            self.api_key, self.version_id, version=self.version
-        )
+        instance = ObjectDetectionModel(self.api_key, self.version_id, version=self.version)
         self.assertRaises(UnidentifiedImageError, instance.predict, image_path)
 
     @responses.activate
@@ -119,9 +109,7 @@ def test_predict_with_hosted_image_request(self):
             **self._default_params,
             "image": image_path,
         }
-        instance = ObjectDetectionModel(
-            self.api_key, self.version_id, version=self.version
-        )
+        instance = ObjectDetectionModel(self.api_key, self.version_id, version=self.version)
 
         # Mock the library validating that the URL is valid before sending to the API
         responses.add(responses.POST, self.api_url, json=MOCK_RESPONSE)
@@ -140,9 +128,7 @@ def test_predict_with_confidence_request(self):
         confidence = "100"
         image_path = "tests/images/rabbit.JPG"
         expected_params = {**self._default_params, "confidence": confidence}
-        instance = ObjectDetectionModel(
-            self.api_key, self.version_id, version=self.version
-        )
+        instance = ObjectDetectionModel(self.api_key, self.version_id, version=self.version)
 
         responses.add(responses.POST, self.api_url, json=MOCK_RESPONSE)
 
@@ -160,9 +146,7 @@ def test_predict_with_non_200_response_raises_http_error(self):
         image_path = "tests/images/rabbit.JPG"
         responses.add(responses.POST, self.api_url, status=403)
 
-        instance = ObjectDetectionModel(
-            self.api_key, self.version_id, version=self.version
-        )
+        instance = ObjectDetectionModel(self.api_key, self.version_id, version=self.version)
 
         with self.assertRaises(HTTPError):
             instance.predict(image_path)