Add scores to PrecisionRecallCurve (#796)

czaloom · web-flow · commit 2a49693f13e0 · 2024-10-11T11:27:32.000-05:00
diff --git a/lite/benchmarks/benchmark_classification.py b/lite/benchmarks/benchmark_classification.py
@@ -211,7 +211,7 @@ def run_benchmarking_analysis(
             )
 
         # evaluate
-        eval_time, _ = time_it(evaluator.evaluate)()
+        eval_time, _ = time_it(evaluator.compute_precision_recall)()
         if eval_time > evaluation_timeout and evaluation_timeout != -1:
             raise TimeoutError(
                 f"Base evaluation timed out with {evaluator.n_datums} datums."
diff --git a/lite/benchmarks/benchmark_objdet.py b/lite/benchmarks/benchmark_objdet.py
@@ -322,7 +322,7 @@ def run_benchmarking_analysis(
                 )
 
             # evaluate - base metrics only
-            eval_time, metrics = time_it(evaluator.evaluate)()
+            eval_time, metrics = time_it(evaluator.compute_precision_recall)()
             if eval_time > evaluation_timeout and evaluation_timeout != -1:
                 raise TimeoutError(
                     f"Base evaluation timed out with {evaluator.n_datums} datums."
diff --git a/lite/examples/object-detection.ipynb b/lite/examples/object-detection.ipynb
diff --git a/lite/tests/object_detection/test_pr_curve.py b/lite/tests/object_detection/test_pr_curve.py
@@ -31,9 +31,15 @@ def test_pr_curve_simple():
         score_thresholds=score_thresholds,
     )
 
-    assert pr_curve.shape == (2, 1, 101)
-    assert np.isclose(pr_curve[0][0], 1.0).all()
-    assert np.isclose(pr_curve[1][0], 1 / 3).all()
+    assert pr_curve.shape == (2, 1, 101, 2)
+
+    # test precision values
+    assert np.isclose(pr_curve[0, 0, :, 0], 1.0).all()
+    assert np.isclose(pr_curve[1, 0, :, 0], 1 / 3).all()
+
+    # test score values
+    assert np.isclose(pr_curve[0, 0, :, 1], 0.95).all()
+    assert np.isclose(pr_curve[1, 0, :, 1], 0.65).all()
 
 
 def test_pr_curve_using_torch_metrics_example(
@@ -59,111 +65,162 @@ def test_pr_curve_using_torch_metrics_example(
         as_dict=True,
     )
 
-    # AP = 1.0
-    a = [1.0 for _ in range(101)]
-
-    # AP = 0.505
-    b = [1.0 for _ in range(51)] + [0.0 for _ in range(50)]
-
-    # AP = 0.791
-    c = (
-        [1.0 for _ in range(71)]
-        + [8 / 9 for _ in range(10)]
-        + [0.0 for _ in range(20)]
-    )
-
-    # AP = 0.722
-    d = (
-        [1.0 for _ in range(41)]
-        + [0.8 for _ in range(40)]
-        + [0.0 for _ in range(20)]
-    )
-
-    # AP = 0.576
-    e = (
-        [1.0 for _ in range(41)]
-        + [0.8571428571428571 for _ in range(20)]
-        + [0.0 for _ in range(40)]
-    )
-
     # test PrecisionRecallCurve
     actual_metrics = [m for m in metrics[MetricType.PrecisionRecallCurve]]
     expected_metrics = [
         {
             "type": "PrecisionRecallCurve",
-            "value": a,
+            "value": {
+                "precisions": [1.0 for _ in range(101)],
+                "scores": (
+                    [0.953 for _ in range(21)]
+                    + [0.805 for _ in range(20)]
+                    + [0.611 for _ in range(20)]
+                    + [0.407 for _ in range(20)]
+                    + [0.335 for _ in range(20)]
+                ),
+            },
             "parameters": {
                 "iou_threshold": 0.5,
                 "label": "0",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": d,
+            "value": {
+                "precisions": (
+                    [1.0 for _ in range(41)]
+                    + [0.8 for _ in range(40)]
+                    + [0.0 for _ in range(20)]
+                ),
+                "scores": (
+                    [0.953 for _ in range(21)]
+                    + [0.805 for _ in range(20)]
+                    + [0.407 for _ in range(20)]
+                    + [0.335 for _ in range(20)]
+                    + [0.0 for _ in range(20)]
+                ),
+            },
             "parameters": {
                 "iou_threshold": 0.75,
                 "label": "0",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": a,
+            "value": {
+                "precisions": [1.0 for _ in range(101)],
+                "scores": [0.3 for _ in range(101)],
+            },
             "parameters": {
                 "iou_threshold": 0.5,
                 "label": "1",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": a,
+            "value": {
+                "precisions": [1.0 for _ in range(101)],
+                "scores": [0.3 for _ in range(101)],
+            },
             "parameters": {
                 "iou_threshold": 0.75,
                 "label": "1",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": b,
+            "value": {
+                "precisions": [1.0 for _ in range(51)]
+                + [0.0 for _ in range(50)],
+                "scores": [0.726 for _ in range(51)]
+                + [0.0 for _ in range(50)],
+            },
             "parameters": {
                 "iou_threshold": 0.5,
                 "label": "2",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": b,
+            "value": {
+                "precisions": [1.0 for _ in range(51)]
+                + [0.0 for _ in range(50)],
+                "scores": [0.726 for _ in range(51)]
+                + [0.0 for _ in range(50)],
+            },
             "parameters": {
                 "iou_threshold": 0.75,
                 "label": "2",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": a,
+            "value": {
+                "precisions": [1.0 for _ in range(101)],
+                "scores": [0.546 for _ in range(51)]
+                + [0.236 for _ in range(50)],
+            },
             "parameters": {
                 "iou_threshold": 0.5,
                 "label": "4",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": a,
+            "value": {
+                "precisions": [1.0 for _ in range(101)],
+                "scores": [0.546 for _ in range(51)]
+                + [0.236 for _ in range(50)],
+            },
             "parameters": {
                 "iou_threshold": 0.75,
                 "label": "4",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": c,
+            "value": {
+                "precisions": (
+                    [1.0 for _ in range(71)]
+                    + [8 / 9 for _ in range(10)]
+                    + [0.0 for _ in range(20)]
+                ),
+                "scores": (
+                    [0.883 for _ in range(11)]
+                    + [0.782 for _ in range(10)]
+                    + [0.561 for _ in range(10)]
+                    + [0.532 for _ in range(10)]
+                    + [0.349 for _ in range(10)]
+                    + [0.271 for _ in range(10)]
+                    + [0.204 for _ in range(10)]
+                    + [0.202 for _ in range(10)]
+                    + [0.0 for _ in range(20)]
+                ),
+            },
             "parameters": {
                 "iou_threshold": 0.5,
                 "label": "49",
             },
         },
         {
             "type": "PrecisionRecallCurve",
-            "value": e,
+            "value": {
+                "precisions": (
+                    [1.0 for _ in range(41)]
+                    + [0.8571428571428571 for _ in range(20)]
+                    + [0.0 for _ in range(40)]
+                ),
+                "scores": (
+                    [0.883 for _ in range(11)]
+                    + [0.782 for _ in range(10)]
+                    + [0.561 for _ in range(10)]
+                    + [0.532 for _ in range(10)]
+                    + [0.271 for _ in range(10)]
+                    + [0.204 for _ in range(10)]
+                    + [0.0 for _ in range(40)]
+                ),
+            },
             "parameters": {
                 "iou_threshold": 0.75,
                 "label": "49",
diff --git a/lite/valor_lite/object_detection/computation.py b/lite/valor_lite/object_detection/computation.py
@@ -334,15 +334,17 @@ def compute_metrics(
     counts = np.zeros((n_ious, n_scores, n_labels, 7))
 
     pd_labels = data[:, 5].astype(int)
-    unique_pd_labels = np.unique(pd_labels)
+    scores = data[:, 6]
+    unique_pd_labels, unique_pd_indices = np.unique(
+        pd_labels, return_index=True
+    )
     gt_count = label_metadata[:, 0]
     running_total_count = np.zeros(
         (n_ious, n_rows),
         dtype=np.float64,
     )
     running_tp_count = np.zeros_like(running_total_count)
     running_gt_count = np.zeros_like(running_total_count)
-    pr_curve = np.zeros((n_ious, n_labels, 101))
 
     mask_score_nonzero = data[:, 6] > 1e-9
     mask_gt_exists = data[:, 1] >= 0.0
@@ -475,20 +477,42 @@ def compute_metrics(
         out=recall,
     )
     recall_index = np.floor(recall * 100.0).astype(int)
+
+    # bin precision-recall curve
+    pr_curve = np.zeros((n_ious, n_labels, 101, 2))
     for iou_idx in range(n_ious):
         p = precision[iou_idx]
         r = recall_index[iou_idx]
-        pr_curve[iou_idx, pd_labels, r] = np.maximum(
-            pr_curve[iou_idx, pd_labels, r], p
+        pr_curve[iou_idx, pd_labels, r, 0] = np.maximum(
+            pr_curve[iou_idx, pd_labels, r, 0],
+            p,
+        )
+        pr_curve[iou_idx, pd_labels, r, 1] = np.maximum(
+            pr_curve[iou_idx, pd_labels, r, 1],
+            scores,
         )
 
     # calculate average precision
-    running_max = np.zeros((n_ious, n_labels))
+    running_max_precision = np.zeros((n_ious, n_labels))
+    running_max_score = np.zeros((n_labels))
     for recall in range(100, -1, -1):
-        precision = pr_curve[:, :, recall]
-        running_max = np.maximum(precision, running_max)
-        average_precision += running_max
-        pr_curve[:, :, recall] = running_max
+
+        # running max precision
+        running_max_precision = np.maximum(
+            pr_curve[:, :, recall, 0],
+            running_max_precision,
+        )
+        pr_curve[:, :, recall, 0] = running_max_precision
+
+        # running max score
+        running_max_score = np.maximum(
+            pr_curve[:, :, recall, 1],
+            running_max_score,
+        )
+        pr_curve[:, :, recall, 1] = running_max_score
+
+        average_precision += running_max_precision
+
     average_precision = average_precision / 101.0
 
     # calculate average recall
diff --git a/lite/valor_lite/object_detection/manager.py b/lite/valor_lite/object_detection/manager.py
@@ -595,7 +595,12 @@ def compute_precision_recall(
 
         metrics[MetricType.PrecisionRecallCurve] = [
             PrecisionRecallCurve(
-                precision=pr_curves[iou_idx][label_idx].astype(float).tolist(),
+                precisions=pr_curves[iou_idx, label_idx, :, 0]
+                .astype(float)
+                .tolist(),
+                scores=pr_curves[iou_idx, label_idx, :, 1]
+                .astype(float)
+                .tolist(),
                 iou_threshold=iou_threshold,
                 label=label,
             )
diff --git a/lite/valor_lite/object_detection/metric.py b/lite/valor_lite/object_detection/metric.py
@@ -591,8 +591,10 @@ class PrecisionRecallCurve:
 
     Attributes
     ----------
-    precision : list[float]
+    precisions : list[float]
         Interpolated precision values corresponding to recalls at 0.0, 0.01, ..., 1.0.
+    scores : list[float]
+        Maximum prediction score for each point on the interpolated curve.
     iou_threshold : float
         The Intersection over Union (IoU) threshold used to determine true positives.
     label : str
@@ -606,14 +608,18 @@ class PrecisionRecallCurve:
         Converts the instance to a dictionary representation.
     """
 
-    precision: list[float]
+    precisions: list[float]
+    scores: list[float]
     iou_threshold: float
     label: str
 
     def to_metric(self) -> Metric:
         return Metric(
             type=type(self).__name__,
-            value=self.precision,
+            value={
+                "precisions": self.precisions,
+                "scores": self.scores,
+            },
             parameters={
                 "iou_threshold": self.iou_threshold,
                 "label": self.label,

Original file line number	Diff line number	Diff line change
`@@ -211,7 +211,7 @@ def run_benchmarking_analysis(`
`211`	`211`	`)`
`212`	`212`
`213`	`213`	`# evaluate`
`214`		`- eval_time, _ = time_it(evaluator.evaluate)()`
	`214`	`+ eval_time, _ = time_it(evaluator.compute_precision_recall)()`
`215`	`215`	`if eval_time > evaluation_timeout and evaluation_timeout != -1:`
`216`	`216`	`raise TimeoutError(`
`217`	`217`	`f"Base evaluation timed out with {evaluator.n_datums} datums."`
Original file line number	Diff line number	Diff line change
`@@ -322,7 +322,7 @@ def run_benchmarking_analysis(`
`322`	`322`	`)`
`323`	`323`
`324`	`324`	`# evaluate - base metrics only`
`325`		`- eval_time, metrics = time_it(evaluator.evaluate)()`
	`325`	`+ eval_time, metrics = time_it(evaluator.compute_precision_recall)()`
`326`	`326`	`if eval_time > evaluation_timeout and evaluation_timeout != -1:`
`327`	`327`	`raise TimeoutError(`
`328`	`328`	`f"Base evaluation timed out with {evaluator.n_datums} datums."`