Skip to content

Commit

Permalink
Remove metrics_to_return (#788)
Browse files Browse the repository at this point in the history
  • Loading branch information
czaloom authored Oct 10, 2024
1 parent f4401d7 commit 67776e9
Show file tree
Hide file tree
Showing 15 changed files with 543 additions and 550 deletions.
13 changes: 8 additions & 5 deletions lite/benchmarks/benchmark_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import requests
from tqdm import tqdm
from valor_lite.classification import Classification, DataLoader, MetricType
from valor_lite.classification import Classification, DataLoader


def _convert_valor_dicts_into_Classification(gt_dict: dict, pd_dict: dict):
Expand Down Expand Up @@ -217,8 +217,10 @@ def run_benchmarking_analysis(
f"Base evaluation timed out with {evaluator.n_datums} datums."
)

detail_no_examples_time, _ = time_it(evaluator.evaluate)(
metrics_to_return=[*MetricType.base(), MetricType.ConfusionMatrix],
detail_no_examples_time, _ = time_it(
evaluator.compute_confusion_matrix
)(
number_of_examples=0,
)
if (
detail_no_examples_time > evaluation_timeout
Expand All @@ -228,8 +230,9 @@ def run_benchmarking_analysis(
f"Base evaluation timed out with {evaluator.n_datums} datums."
)

detail_three_examples_time, _ = time_it(evaluator.evaluate)(
metrics_to_return=[*MetricType.base(), MetricType.ConfusionMatrix],
detail_three_examples_time, _ = time_it(
evaluator.compute_confusion_matrix
)(
number_of_examples=3,
)
if (
Expand Down
20 changes: 4 additions & 16 deletions lite/benchmarks/benchmark_objdet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,7 @@

import requests
from tqdm import tqdm
from valor_lite.object_detection import (
BoundingBox,
DataLoader,
Detection,
MetricType,
)
from valor_lite.object_detection import BoundingBox, DataLoader, Detection


def _get_bbox_extrema(
Expand Down Expand Up @@ -335,12 +330,9 @@ def run_benchmarking_analysis(

# evaluate - base metrics + detailed counts with no samples
detailed_counts_time_no_samples, metrics = time_it(
evaluator.evaluate
evaluator.compute_confusion_matrix
)(
[
MetricType.ConfusionMatrix,
*MetricType.base_metrics(),
]
number_of_examples=0,
)
if (
detailed_counts_time_no_samples > evaluation_timeout
Expand All @@ -352,12 +344,8 @@ def run_benchmarking_analysis(

# evaluate - base metrics + detailed counts with 3 samples
detailed_counts_time_three_samples, metrics = time_it(
evaluator.evaluate
evaluator.compute_confusion_matrix
)(
[
MetricType.ConfusionMatrix,
*MetricType.base_metrics(),
],
number_of_examples=3,
)
if (
Expand Down
3 changes: 1 addition & 2 deletions lite/examples/object-detection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 5000/5000 [00:00<00:00, 8290.40it/s]\n"
"100%|██████████| 5000/5000 [00:00<00:00, 8616.97it/s]\n"
]
}
],
Expand Down Expand Up @@ -521,7 +521,6 @@
" score_thresholds=[0.25],\n",
" iou_thresholds=[0.25],\n",
" number_of_examples=3,\n",
" metrics_to_return=[MetricType.ConfusionMatrix]\n",
")[MetricType.ConfusionMatrix]\n",
"metric = matrices[0]\n",
"\n",
Expand Down
55 changes: 27 additions & 28 deletions lite/examples/tabular_classification.ipynb

Large diffs are not rendered by default.

26 changes: 7 additions & 19 deletions lite/tests/classification/test_confusion_matrix.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np
from valor_lite.classification import Classification, DataLoader, MetricType
from valor_lite.classification import Classification, DataLoader
from valor_lite.classification.computation import compute_confusion_matrix


Expand Down Expand Up @@ -114,14 +114,12 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
"missing_prediction_labels": [],
}

metrics = evaluator.evaluate(
metrics_to_return=[MetricType.ConfusionMatrix],
actual_metrics = evaluator.compute_confusion_matrix(
score_thresholds=[0.25, 0.75],
number_of_examples=1,
as_dict=True,
)

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -190,13 +188,11 @@ def test_confusion_matrix_unit(
loader.add_data(classifications_from_api_unit_tests)
evaluator = loader.finalize()

metrics = evaluator.evaluate(
metrics_to_return=[MetricType.ConfusionMatrix],
actual_metrics = evaluator.compute_confusion_matrix(
score_thresholds=[0.5],
as_dict=True,
)

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -233,14 +229,12 @@ def test_confusion_matrix_with_animal_example(
loader.add_data(classifications_animal_example)
evaluator = loader.finalize()

metrics = evaluator.evaluate(
metrics_to_return=[MetricType.ConfusionMatrix],
actual_metrics = evaluator.compute_confusion_matrix(
score_thresholds=[0.5],
number_of_examples=6,
as_dict=True,
)

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -304,14 +298,12 @@ def test_confusion_matrix_with_color_example(
loader.add_data(classifications_color_example)
evaluator = loader.finalize()

metrics = evaluator.evaluate(
metrics_to_return=[MetricType.ConfusionMatrix],
actual_metrics = evaluator.compute_confusion_matrix(
score_thresholds=[0.5],
number_of_examples=6,
as_dict=True,
)

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -385,14 +377,12 @@ def test_confusion_matrix_multiclass(
"n_predictions": 15,
}

metrics = evaluator.evaluate(
metrics_to_return=[MetricType.ConfusionMatrix],
actual_metrics = evaluator.compute_confusion_matrix(
score_thresholds=[0.05, 0.5, 0.85],
number_of_examples=5,
as_dict=True,
)

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -517,15 +507,13 @@ def test_confusion_matrix_without_hardmax_animal_example(
"missing_prediction_labels": [],
}

metrics = evaluator.evaluate(
metrics_to_return=[MetricType.ConfusionMatrix],
actual_metrics = evaluator.compute_confusion_matrix(
score_thresholds=[0.05, 0.4, 0.5],
number_of_examples=6,
hardmax=False,
as_dict=True,
)

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down
2 changes: 1 addition & 1 deletion lite/tests/object_detection/test_average_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ def test_ap_true_positive_deassignment(
as_dict=True,
)

assert len(metrics) == 14
assert len(metrics) == 15

# test AP
actual_metrics = [m for m in metrics[MetricType.AP]]
Expand Down
2 changes: 1 addition & 1 deletion lite/tests/object_detection/test_average_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def test_ar_true_positive_deassignment(
as_dict=True,
)

assert len(metrics) == 14
assert len(metrics) == 15

# test AR
actual_metrics = [m for m in metrics[MetricType.AR]]
Expand Down
35 changes: 9 additions & 26 deletions lite/tests/object_detection/test_confusion_matrix.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
import numpy as np
from valor_lite.object_detection import (
DataLoader,
Detection,
Evaluator,
MetricType,
)
from valor_lite.object_detection import DataLoader, Detection, Evaluator
from valor_lite.object_detection.computation import compute_confusion_matrix


def test_confusion_matrix_no_data():
evaluator = Evaluator()
curves = evaluator._compute_confusion_matrix(
data=np.array([]),
label_metadata=np.array([]),
curves = evaluator.compute_confusion_matrix(
iou_thresholds=[0.5],
score_thresholds=[0.5],
number_of_examples=0,
Expand Down Expand Up @@ -441,11 +434,10 @@ def test_confusion_matrix(
assert evaluator.n_groundtruths == 4
assert evaluator.n_predictions == 4

metrics = evaluator.evaluate(
actual_metrics = evaluator.compute_confusion_matrix(
iou_thresholds=[0.5],
score_thresholds=[0.05, 0.3, 0.35, 0.45, 0.55, 0.95],
number_of_examples=1,
metrics_to_return=[MetricType.ConfusionMatrix],
as_dict=True,
)

Expand All @@ -455,7 +447,6 @@ def test_confusion_matrix(
rect4_dict = evaluator._convert_example_to_dict(np.array(rect4))
rect5_dict = evaluator._convert_example_to_dict(np.array(rect5))

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -798,15 +789,13 @@ def test_confusion_matrix(

# test at lower IoU threshold

metrics = evaluator.evaluate(
actual_metrics = evaluator.compute_confusion_matrix(
iou_thresholds=[0.45],
score_thresholds=[0.05, 0.3, 0.35, 0.45, 0.55, 0.95],
number_of_examples=1,
metrics_to_return=[MetricType.ConfusionMatrix],
as_dict=True,
)

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -1185,17 +1174,15 @@ def test_confusion_matrix_using_torch_metrics_example(
assert evaluator.n_groundtruths == 20
assert evaluator.n_predictions == 19

metrics = evaluator.evaluate(
actual_metrics = evaluator.compute_confusion_matrix(
iou_thresholds=[0.5, 0.9],
score_thresholds=[0.05, 0.25, 0.35, 0.55, 0.75, 0.8, 0.85, 0.95],
number_of_examples=0,
metrics_to_return=[MetricType.ConfusionMatrix],
as_dict=True,
)

assert len(metrics[MetricType.ConfusionMatrix]) == 16
assert len(actual_metrics) == 16

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -1581,17 +1568,15 @@ def test_confusion_matrix_fp_hallucination_edge_case(
assert evaluator.n_groundtruths == 2
assert evaluator.n_predictions == 2

metrics = evaluator.evaluate(
actual_metrics = evaluator.compute_confusion_matrix(
iou_thresholds=[0.5],
score_thresholds=[0.5, 0.85],
number_of_examples=1,
metrics_to_return=[MetricType.ConfusionMatrix],
as_dict=True,
)

assert len(metrics[MetricType.ConfusionMatrix]) == 2
assert len(actual_metrics) == 2

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down Expand Up @@ -1732,15 +1717,13 @@ def test_confusion_matrix_ranked_pair_ordering(
"n_predictions": 4,
}

metrics = evaluator.evaluate(
actual_metrics = evaluator.compute_confusion_matrix(
iou_thresholds=[0.5],
score_thresholds=[0.0],
number_of_examples=0,
metrics_to_return=[MetricType.ConfusionMatrix],
as_dict=True,
)

actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
expected_metrics = [
{
"type": "ConfusionMatrix",
Expand Down
22 changes: 0 additions & 22 deletions lite/tests/object_detection/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,25 +86,3 @@ def test_no_predictions(detections_no_predictions):
assert m in expected_metrics
for m in expected_metrics:
assert m in actual_metrics


def test_metrics_to_return(basic_detections_first_class: list[Detection]):

loader = DataLoader()
loader.add_bounding_boxes(basic_detections_first_class)
evaluator = loader.finalize()

metrics_to_return = [
MetricType.AP,
MetricType.AR,
]
metrics = evaluator.evaluate(metrics_to_return)
assert metrics.keys() == set(metrics_to_return)

metrics_to_return = [
MetricType.AP,
MetricType.AR,
MetricType.ConfusionMatrix,
]
metrics = evaluator.evaluate(metrics_to_return)
assert metrics.keys() == set(metrics_to_return)
8 changes: 4 additions & 4 deletions lite/tests/object_detection/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,10 +568,10 @@ def test_filtering_all_detections(four_detections: list[Detection]):
metrics = evaluator.evaluate(
iou_thresholds=[0.5],
filter_=filter_,
metrics_to_return=[
*MetricType.base_metrics(),
MetricType.ConfusionMatrix,
],
)
evaluator.compute_confusion_matrix(
iou_thresholds=[0.5],
filter_=filter_,
)

actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
Expand Down
Loading

0 comments on commit 67776e9

Please sign in to comment.