Skip to content

Commit

Permalink
fix objdet confusion matrix OOM (#797)
Browse files Browse the repository at this point in the history
  • Loading branch information
czaloom authored Oct 11, 2024
1 parent 2a49693 commit 667a31a
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 46 deletions.
24 changes: 22 additions & 2 deletions lite/tests/object_detection/test_stability.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ def _generate_random_detections(
n_detections: int, n_boxes: int, labels: str
) -> list[Detection]:
def bbox(is_prediction):
xmin, ymin = uniform(0, 10), uniform(0, 10)
xmax, ymax = uniform(xmin, 15), uniform(ymin, 15)
width, height = 50, 50
xmin, ymin = uniform(0, 1000), uniform(0, 1000)
xmax, ymax = uniform(xmin, xmin + width), uniform(ymin, ymin + height)
kw = {"scores": [uniform(0, 1)]} if is_prediction else {}
return BoundingBox(
xmin,
Expand Down Expand Up @@ -81,3 +82,22 @@ def test_fuzz_detections_with_filtering():
score_thresholds=[0.25, 0.75],
filter_=filter_,
)


def test_fuzz_confusion_matrix():
dets = _generate_random_detections(1000, 30, "abcde")
loader = DataLoader()
loader.add_bounding_boxes(dets)
evaluator = loader.finalize()
assert evaluator.metadata == {
"ignored_prediction_labels": [],
"missing_prediction_labels": [],
"n_datums": 1000,
"n_groundtruths": 30000,
"n_predictions": 30000,
"n_labels": 5,
}
evaluator.evaluate(
iou_thresholds=[0.25, 0.75],
score_thresholds=[0.5],
)
99 changes: 55 additions & 44 deletions lite/valor_lite/object_detection/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def _compute_ranked_pairs_for_datum(

# find best fits for prediction
mask_label_match = data[:, 4] == data[:, 5]
matched_predicitons = np.unique(data[mask_label_match, 2].astype(int))
matched_predicitons = np.unique(data[mask_label_match, 2].astype(np.int32))
mask_unmatched_predictions = ~np.isin(data[:, 2], matched_predicitons)
data = data[mask_label_match | mask_unmatched_predictions]

Expand Down Expand Up @@ -333,7 +333,7 @@ def compute_metrics(
average_recall = np.zeros((n_scores, n_labels))
counts = np.zeros((n_ious, n_scores, n_labels, 7))

pd_labels = data[:, 5].astype(int)
pd_labels = data[:, 5].astype(np.int32)
scores = data[:, 6]
unique_pd_labels, unique_pd_indices = np.unique(
pd_labels, return_index=True
Expand Down Expand Up @@ -383,17 +383,19 @@ def compute_metrics(
true_positives_mask[mask_tp_inner] = mask_gt_unique

# calculate intermediates
pd_count = np.bincount(pd_labels, minlength=n_labels).astype(float)
pd_count = np.bincount(pd_labels, minlength=n_labels).astype(
np.float64
)
tp_count = np.bincount(
pd_labels,
weights=true_positives_mask,
minlength=n_labels,
).astype(float)
).astype(np.float64)

fp_count = np.bincount(
pd_labels[mask_fp_inner],
minlength=n_labels,
).astype(float)
).astype(np.float64)

fn_count = np.bincount(
pd_labels[mask_fn_inner],
Expand Down Expand Up @@ -476,7 +478,7 @@ def compute_metrics(
where=running_gt_count > 1e-9,
out=recall,
)
recall_index = np.floor(recall * 100.0).astype(int)
recall_index = np.floor(recall * 100.0).astype(np.int32)

# bin precision-recall curve
pr_curve = np.zeros((n_ious, n_labels, 101, 2))
Expand Down Expand Up @@ -582,7 +584,7 @@ def _count_with_examples(
Counts for each unique label index.
"""
unique_rows, indices = np.unique(
data.astype(int)[:, unique_idx],
data.astype(np.int32)[:, unique_idx],
return_index=True,
axis=0,
)
Expand All @@ -593,6 +595,35 @@ def _count_with_examples(
return examples, labels, counts


def _isin(
data: NDArray[np.int32],
subset: NDArray[np.int32],
) -> NDArray[np.bool_]:
"""
Creates a mask of rows that exist within the subset.
Parameters
----------
data : NDArray[np.int32]
An array with shape (N, 2).
subset : NDArray[np.int32]
An array with shape (M, 2) where N >= M.
Returns
-------
NDArray[np.bool_]
Returns a bool mask with shape (N,).
"""
combined_data = (data[:, 0].astype(np.int64) << 32) | data[:, 1].astype(
np.uint32
)
combined_subset = (subset[:, 0].astype(np.int64) << 32) | subset[
:, 1
].astype(np.uint32)
mask = np.isin(combined_data, combined_subset, assume_unique=False)
return mask


def compute_confusion_matrix(
data: NDArray[np.float64],
label_metadata: NDArray[np.int32],
Expand Down Expand Up @@ -666,20 +697,16 @@ def compute_confusion_matrix(
mask_gt_pd_match = mask_gt_pd_exists & mask_label_match
mask_gt_pd_mismatch = mask_gt_pd_exists & ~mask_label_match

groundtruths = data[:, [0, 1]].astype(int)
predictions = data[:, [0, 2]].astype(int)
groundtruths = data[:, [0, 1]].astype(np.int32)
predictions = data[:, [0, 2]].astype(np.int32)
for iou_idx in range(n_ious):
mask_iou_threshold = data[:, 3] >= iou_thresholds[iou_idx]
mask_iou = mask_iou_nonzero & mask_iou_threshold

groundtruths_passing_ious = np.unique(groundtruths[mask_iou], axis=0)
mask_groundtruths_with_passing_ious = (
(
groundtruths.reshape(-1, 1, 2)
== groundtruths_passing_ious.reshape(1, -1, 2)
)
.all(axis=2)
.any(axis=1)
mask_groundtruths_with_passing_ious = _isin(
data=groundtruths,
subset=groundtruths_passing_ious,
)
mask_groundtruths_without_passing_ious = (
~mask_groundtruths_with_passing_ious & mask_gt_exists
Expand All @@ -688,13 +715,9 @@ def compute_confusion_matrix(
predictions_with_passing_ious = np.unique(
predictions[mask_iou], axis=0
)
mask_predictions_with_passing_ious = (
(
predictions.reshape(-1, 1, 2)
== predictions_with_passing_ious.reshape(1, -1, 2)
)
.all(axis=2)
.any(axis=1)
mask_predictions_with_passing_ious = _isin(
data=predictions,
subset=predictions_with_passing_ious,
)
mask_predictions_without_passing_ious = (
~mask_predictions_with_passing_ious & mask_pd_exists
Expand All @@ -707,13 +730,9 @@ def compute_confusion_matrix(
groundtruths_with_passing_score = np.unique(
groundtruths[mask_iou & mask_score], axis=0
)
mask_groundtruths_with_passing_score = (
(
groundtruths.reshape(-1, 1, 2)
== groundtruths_with_passing_score.reshape(1, -1, 2)
)
.all(axis=2)
.any(axis=1)
mask_groundtruths_with_passing_score = _isin(
data=groundtruths,
subset=groundtruths_with_passing_score,
)
mask_groundtruths_without_passing_score = (
~mask_groundtruths_with_passing_score & mask_gt_exists
Expand All @@ -736,21 +755,13 @@ def compute_confusion_matrix(
)

# filter out true-positives from misclf and misprd
mask_gts_with_tp_override = (
(
data[mask_misclf][:, [0, 1]].reshape(-1, 1, 2)
== data[mask_tp][:, [0, 1]].reshape(1, -1, 2)
)
.all(axis=2)
.any(axis=1)
mask_gts_with_tp_override = _isin(
data=groundtruths[mask_misclf],
subset=groundtruths[mask_tp],
)
mask_pds_with_tp_override = (
(
data[mask_misclf][:, [0, 2]].reshape(-1, 1, 2)
== data[mask_tp][:, [0, 2]].reshape(1, -1, 2)
)
.all(axis=2)
.any(axis=1)
mask_pds_with_tp_override = _isin(
data=predictions[mask_misclf],
subset=predictions[mask_tp],
)
mask_misprd[mask_misclf] |= (
~mask_gts_with_tp_override & mask_pds_with_tp_override
Expand Down

0 comments on commit 667a31a

Please sign in to comment.