From 7252c45a4138884d816ee76eefa24e049b771be0 Mon Sep 17 00:00:00 2001 From: Charles Zaloom <38677807+czaloom@users.noreply.github.com> Date: Thu, 29 Aug 2024 15:35:09 -0400 Subject: [PATCH] Unaccounted Time in Valor-Core Benchmark (#727) --- .../workflows/core-benchmark-evaluations.yml | 4 +- ...{benchmark_script.py => benchmark_core.py} | 349 ++++++------ .../object-detection/benchmark_manager.py | 528 ++++++++++++++++++ 3 files changed, 687 insertions(+), 194 deletions(-) rename core/benchmarks/object-detection/{benchmark_script.py => benchmark_core.py} (52%) create mode 100644 core/benchmarks/object-detection/benchmark_manager.py diff --git a/.github/workflows/core-benchmark-evaluations.yml b/.github/workflows/core-benchmark-evaluations.yml index e145b6ec6..705f740bd 100644 --- a/.github/workflows/core-benchmark-evaluations.yml +++ b/.github/workflows/core-benchmark-evaluations.yml @@ -28,11 +28,11 @@ jobs: echo "$BENCHMARK_RESULTS" working-directory: ./core/benchmarks/classification - name: run object detection benchmarks - run: python benchmark_script.py + run: python benchmark_manager.py working-directory: ./core/benchmarks/object-detection - name: print object detection results run: | - export BENCHMARK_RESULTS=$(python -c "import os;import json;print(json.dumps(json.load(open('results.json', 'r')), indent=4));") + export BENCHMARK_RESULTS=$(python -c "import os;import json;print(json.dumps(json.load(open('manager_results.json', 'r')), indent=4));") echo "$BENCHMARK_RESULTS" working-directory: ./core/benchmarks/object-detection - run: make stop-env diff --git a/core/benchmarks/object-detection/benchmark_script.py b/core/benchmarks/object-detection/benchmark_core.py similarity index 52% rename from core/benchmarks/object-detection/benchmark_script.py rename to core/benchmarks/object-detection/benchmark_core.py index 52e39b710..a90ab4c3d 100644 --- a/core/benchmarks/object-detection/benchmark_script.py +++ b/core/benchmarks/object-detection/benchmark_core.py @@ -1,7 +1,6 @@ import io import json import os -import re from base64 import b64decode from dataclasses import dataclass from datetime import datetime @@ -21,17 +20,19 @@ Polygon, Prediction, Raster, - ValorDetectionManager, enums, evaluate_detection, ) from valor_core.enums import AnnotationType -def time_it(fn, *args, **kwargs): - start = time() - results = fn(*args, **kwargs) - return (time() - start, results) +def time_it(fn): + def wrapper(*args, **kwargs): + start = time() + results = fn(*args, **kwargs) + return (time() - start, results) + + return wrapper def download_data_if_not_exists( @@ -62,9 +63,18 @@ def download_data_if_not_exists( else: print(f"{file_name} already exists locally.") + # sort file by datum uid + with open(file_path, "r") as f: + lines = [x for x in f] + with open(file_path, "w") as f: + for line in sorted( + lines, key=lambda x: int(json.loads(x)["datum"]["uid"]) + ): + f.write(line) + def write_results_to_file(write_path: Path, results: list[dict]): - """Write results to results.json""" + """Write results to core_results.json""" current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S") if os.path.isfile(write_path): with open(write_path, "r") as file: @@ -79,6 +89,44 @@ def write_results_to_file(write_path: Path, results: list[dict]): json.dump(data, file, indent=4) +def _create_annotation( + dtype: str, + ann: dict, +): + ann.pop("text") + ann.pop("context_list") + + labels = [] + for label in ann["labels"]: + labels.append(Label(**label)) + ann["labels"] = labels + + if ann["bounding_box"] and dtype == AnnotationType.BOX: + ann["bounding_box"] = Box(ann["bounding_box"]) + return Annotation(**ann) + + if ann["polygon"] and dtype == AnnotationType.POLYGON: + ann["polygon"] = Polygon(ann["polygon"]) + return Annotation(**ann) + + if ann["raster"] and dtype == AnnotationType.RASTER: + mask_bytes = b64decode(ann["raster"]["mask"]) + with io.BytesIO(mask_bytes) as f: + img = PIL.Image.open(f) + w, h = img.size + if ann["raster"]["geometry"] is not None: + ann["raster"] = Raster.from_geometry( + ann["raster"]["geometry"], + width=w, + height=h, + ) + elif ann["raster"]["geometry"] is None: + # decode raster + ann["raster"] = Raster(mask=np.array(img)) + return Annotation(**ann) + + +@time_it def ingest_groundtruths( dtype: AnnotationType, path: Path, @@ -93,46 +141,18 @@ def ingest_groundtruths( annotations = [] for ann in gt_dict["annotations"]: - ann.pop("text") - ann.pop("context_list") - - labels = [] - for label in ann["labels"]: - labels.append(Label(**label)) - ann["labels"] = labels - - if ann["bounding_box"] and dtype == AnnotationType.BOX: - ann["bounding_box"] = Box(ann["bounding_box"]) - annotations.append(Annotation(**ann)) - - if ann["polygon"] and dtype == AnnotationType.POLYGON: - ann["polygon"] = Polygon(ann["polygon"]) - annotations.append(Annotation(**ann)) - - if ann["raster"] and dtype == AnnotationType.RASTER: - mask_bytes = b64decode(ann["raster"]["mask"]) - with io.BytesIO(mask_bytes) as f: - img = PIL.Image.open(f) - w, h = img.size - if ann["raster"]["geometry"] is not None: - ann["raster"] = Raster.from_geometry( - ann["raster"]["geometry"], - width=w, - height=h, - ) - elif ann["raster"]["geometry"] is None: - # decode raster - ann["raster"] = Raster(mask=np.array(img)) - annotations.append(Annotation(**ann)) + annotations.append(_create_annotation(dtype=dtype, ann=ann)) gt_dict["annotations"] = annotations gt = GroundTruth(**gt_dict) groundtruths.append(gt) + if len(groundtruths) >= limit: return groundtruths return groundtruths +@time_it def ingest_predictions( dtype: AnnotationType, datum_uids: list[str], @@ -140,59 +160,22 @@ def ingest_predictions( limit: int, ) -> list[Prediction]: - pattern = re.compile(r'"uid":\s*"(\d+)"') - predictions = [] with open(path, "r") as f: count = 0 for line in f: - match = pattern.search(line) - if not match: - continue - elif match.group(1) not in datum_uids: - continue pd_dict = json.loads(line) - pd_dict["datum"].pop("text") pd_dict["datum"] = Datum(**pd_dict["datum"]) annotations = [] for ann in pd_dict["annotations"]: - ann.pop("text") - ann.pop("context_list") - - labels = [] - for label in ann["labels"]: - labels.append(Label(**label)) - ann["labels"] = labels - - if ann["bounding_box"] and dtype == AnnotationType.BOX: - ann["bounding_box"] = Box(ann["bounding_box"]) - annotations.append(Annotation(**ann)) - - if ann["polygon"] and dtype == AnnotationType.POLYGON: - ann["polygon"] = Polygon(ann["polygon"]) - annotations.append(Annotation(**ann)) - - if ann["raster"] and dtype == AnnotationType.RASTER: - mask_bytes = b64decode(ann["raster"]["mask"]) - with io.BytesIO(mask_bytes) as f: - img = PIL.Image.open(f) - w, h = img.size - if ann["raster"]["geometry"] is not None: - ann["raster"] = Raster.from_geometry( - ann["raster"]["geometry"], - width=w, - height=h, - ) - elif ann["raster"]["geometry"] is None: - # decode raster - ann["raster"] = Raster(mask=np.array(img)) - annotations.append(Annotation(**ann)) + annotations.append(_create_annotation(dtype=dtype, ann=ann)) pd_dict["annotations"] = annotations pd = Prediction(**pd_dict) predictions.append(pd) + count += 1 if count >= limit: return predictions @@ -242,101 +225,70 @@ def run_detailed_pr_curve_evaluation(groundtruths, predictions): return evaluation -def run_base_evaluation_with_manager(groundtruths, predictions): - """Run a base evaluation (with no PR curves) using ValorDetectionManager.""" - manager = ValorDetectionManager() - manager.add_data(groundtruths=groundtruths, predictions=predictions) - return manager.evaluate() - - -def run_pr_curve_evaluation_with_manager(groundtruths, predictions): - """Run a base evaluation with PrecisionRecallCurve included using ValorDetectionManager.""" - manager = ValorDetectionManager( - metrics_to_return=[ - enums.MetricType.AP, - enums.MetricType.AR, - enums.MetricType.mAP, - enums.MetricType.APAveragedOverIOUs, - enums.MetricType.mAR, - enums.MetricType.mAPAveragedOverIOUs, - enums.MetricType.PrecisionRecallCurve, - ], - ) - - manager.add_data(groundtruths=groundtruths, predictions=predictions) - - return manager.evaluate() - - -def run_detailed_pr_curve_evaluation_with_manager(groundtruths, predictions): - """Run a base evaluation with PrecisionRecallCurve and DetailedPrecisionRecallCurve included using ValorDetectionManager.""" - - manager = ValorDetectionManager( - metrics_to_return=[ - enums.MetricType.AP, - enums.MetricType.AR, - enums.MetricType.mAP, - enums.MetricType.APAveragedOverIOUs, - enums.MetricType.mAR, - enums.MetricType.mAPAveragedOverIOUs, - enums.MetricType.PrecisionRecallCurve, - enums.MetricType.DetailedPrecisionRecallCurve, - ], - ) - - manager.add_data(groundtruths=groundtruths, predictions=predictions) - - return manager.evaluate() - - -@dataclass -class DataBenchmark: - dtype: str - ingestion: float - - def result(self) -> dict[str, float | str]: - return { - "dtype": self.dtype, - "ingestion": round(self.ingestion, 2), - } - - @dataclass -class EvaluationBenchmark: +class Benchmark: limit: int - gt_stats: DataBenchmark - pd_stats: DataBenchmark n_datums: int n_annotations: int n_labels: int + gt_type: AnnotationType + pd_type: AnnotationType + gt_ingest: float + pd_ingest: float eval_base: float eval_base_pr: float eval_base_pr_detail: float - def result(self) -> dict[str, float | str | dict[str, str | float]]: + def result(self) -> dict: return { "limit": self.limit, - "groundtruths": self.gt_stats.result(), - "predictions": self.pd_stats.result(), - "evaluation": { - "number_of_datums": self.n_datums, - "number_of_annotations": self.n_annotations, - "number_of_labels": self.n_labels, - "base": round(self.eval_base, 2), - "base+pr": round(self.eval_base_pr, 2), - "base+pr+detailed": round(self.eval_base_pr_detail, 2), + "n_datums": self.n_datums, + "n_annotations": self.n_annotations, + "n_labels": self.n_labels, + "dtype": { + "groundtruth": self.gt_type.value, + "prediction": self.pd_type.value, + }, + "chunk_size": self.limit, + "base": { + "ingestion": f"{round(self.gt_ingest + self.pd_ingest, 2)} seconds", + "evaluation": { + "preprocessing": "0.0 seconds", + "computation": f"{round(self.eval_base, 2)} seconds", + "total": f"{round(self.eval_base, 2)} seconds", + }, }, + "base+pr": { + "ingestion": f"{round(self.gt_ingest + self.pd_ingest, 2)} seconds", + "evaluation": { + "preprocessing": "0.0 seconds", + "computation": f"{round(self.eval_base_pr, 2)} seconds", + "total": f"{round(self.eval_base_pr, 2)} seconds", + }, + } + if self.eval_base_pr > -1 + else {}, + "base+pr+detailed": { + "ingestion": f"{round(self.gt_ingest + self.pd_ingest, 2)} seconds", + "evaluation": { + "preprocessing": "0.0 seconds", + "computation": f"{round(self.eval_base_pr_detail, 2)} seconds", + "total": f"{round(self.eval_base_pr_detail, 2)} seconds", + }, + } + if self.eval_base_pr_detail > -1 + else {}, } def run_benchmarking_analysis( limits_to_test: list[int], combinations: list[tuple[AnnotationType, AnnotationType]] | None = None, - results_file: str = "results.json", - ingestion_chunk_timeout: int = 30, - evaluation_timeout: int = 30, + results_file: str = "core_results.json", compute_pr: bool = True, compute_detailed: bool = True, + ingestion_timeout=30, + evaluation_timeout=30, ): """Time various function calls and export the results.""" current_directory = Path(__file__).parent @@ -393,65 +345,78 @@ def run_benchmarking_analysis( pd_filename = prediction_caches[pd_type] # gt ingestion - gt_ingest_time, groundtruths = time_it( - ingest_groundtruths, + gt_ingest_time, groundtruths = ingest_groundtruths( dtype=gt_type, path=current_directory / Path(gt_filename), limit=limit, ) # pd ingestion - datum_uids = [gt.datum.uid for gt in groundtruths] - pd_ingest_time, predictions = time_it( - ingest_predictions, + datum_uids = [gt.datum.uid for gt in groundtruths] # type: ignore + pd_ingest_time, predictions = ingest_predictions( dtype=pd_type, datum_uids=datum_uids, path=current_directory / Path(pd_filename), limit=limit, ) - # run evaluations - eval_pr = None - eval_detail = None - eval_base = run_base_evaluation_with_manager( - groundtruths, predictions - ) - if compute_pr: - eval_pr = run_pr_curve_evaluation_with_manager( - groundtruths, predictions + if ( + gt_ingest_time + pd_ingest_time > ingestion_timeout # type: ignore + and ingestion_timeout != -1 + ): + raise TimeoutError( + f"Benchmark timed out while attempting to ingest {limit} datums." + ) + + # === Base Evaluation === + base_results = run_base_evaluation(groundtruths, predictions) + assert base_results.meta + n_datums = base_results.meta["datums"] + n_annotations = base_results.meta["annotations"] + n_labels = base_results.meta["labels"] + base = base_results.meta["duration"] + if base > evaluation_timeout and evaluation_timeout != -1: + raise TimeoutError( + f"Base evaluation timed out with {n_datums} datums." ) + + # === PR Evaluation === + pr = -1 + if compute_pr: + pr_results = run_pr_curve_evaluation(groundtruths, predictions) + assert pr_results.meta + pr = pr_results.meta["duration"] + if pr > evaluation_timeout and evaluation_timeout != -1: + raise TimeoutError( + f"PR evaluation timed out with {n_datums} datums." + ) + + # === Detailed Evaluation === + detailed = -1 if compute_detailed: - eval_detail = run_detailed_pr_curve_evaluation( + detailed_results = run_detailed_pr_curve_evaluation( groundtruths, predictions ) - - assert eval_base.meta + assert detailed_results.meta + detailed = detailed_results.meta["duration"] + if detailed > evaluation_timeout and evaluation_timeout != -1: + raise TimeoutError( + f"Detailed evaluation timed out with {n_datums} datums." + ) results.append( - EvaluationBenchmark( + Benchmark( limit=limit, - gt_stats=DataBenchmark( - dtype=gt_type, - ingestion=gt_ingest_time, - ), - pd_stats=DataBenchmark( - dtype=pd_type, - ingestion=pd_ingest_time, - ), - n_datums=eval_base.meta["datums"], - n_annotations=eval_base.meta["annotations"], - n_labels=eval_base.meta["labels"], - eval_base=eval_base.meta["duration"], - eval_base_pr=( - eval_pr.meta["duration"] - if eval_pr and eval_pr.meta - else -1 - ), - eval_base_pr_detail=( - eval_detail.meta["duration"] - if eval_detail and eval_detail.meta - else -1 - ), + n_datums=n_datums, + n_annotations=n_annotations, + n_labels=n_labels, + gt_type=gt_type, + pd_type=pd_type, + gt_ingest=gt_ingest_time, + pd_ingest=pd_ingest_time, + eval_base=base, + eval_base_pr=pr, + eval_base_pr_detail=detailed, ).result() ) diff --git a/core/benchmarks/object-detection/benchmark_manager.py b/core/benchmarks/object-detection/benchmark_manager.py new file mode 100644 index 000000000..a04873be7 --- /dev/null +++ b/core/benchmarks/object-detection/benchmark_manager.py @@ -0,0 +1,528 @@ +import io +import json +import os +from base64 import b64decode +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from time import time + +import numpy as np +import PIL.Image +import requests +from tqdm import tqdm +from valor_core import ( + Annotation, + Box, + Datum, + GroundTruth, + Label, + Polygon, + Prediction, + Raster, +) +from valor_core import ValorDetectionManager as Manager +from valor_core import enums +from valor_core.enums import AnnotationType + + +def time_it(fn): + def wrapper(*args, **kwargs): + start = time() + results = fn(*args, **kwargs) + return (time() - start, results) + + return wrapper + + +def download_data_if_not_exists( + file_name: str, + file_path: Path, + url: str, +): + """Download the data from a public bucket if it doesn't exist locally.""" + + if not os.path.exists(file_path): + response = requests.get(url, stream=True) + if response.status_code == 200: + total_size = int(response.headers.get("content-length", 0)) + with open(file_path, "wb") as f: + with tqdm( + total=total_size, + unit="B", + unit_scale=True, + unit_divisor=1024, + desc=file_name, + ) as pbar: + for chunk in response.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + pbar.update(1024) + else: + raise RuntimeError(response) + else: + print(f"{file_name} already exists locally.") + + # sort file by datum uid + with open(file_path, "r") as f: + lines = [x for x in f] + with open(file_path, "w") as f: + for line in sorted( + lines, key=lambda x: int(json.loads(x)["datum"]["uid"]) + ): + f.write(line) + + +def write_results_to_file(write_path: Path, results: list[dict]): + """Write results to manager_results.json""" + current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + if os.path.isfile(write_path): + with open(write_path, "r") as file: + file.seek(0) + data = json.load(file) + else: + data = {} + + data[current_datetime] = results + + with open(write_path, "w+") as file: + json.dump(data, file, indent=4) + + +def _create_annotation( + dtype: str, + ann: dict, +): + ann.pop("text") + ann.pop("context_list") + + labels = [] + for label in ann["labels"]: + labels.append(Label(**label)) + ann["labels"] = labels + + if ann["bounding_box"] and dtype == AnnotationType.BOX: + ann["bounding_box"] = Box(ann["bounding_box"]) + return Annotation(**ann) + + if ann["polygon"] and dtype == AnnotationType.POLYGON: + ann["polygon"] = Polygon(ann["polygon"]) + return Annotation(**ann) + + if ann["raster"] and dtype == AnnotationType.RASTER: + mask_bytes = b64decode(ann["raster"]["mask"]) + with io.BytesIO(mask_bytes) as f: + img = PIL.Image.open(f) + w, h = img.size + if ann["raster"]["geometry"] is not None: + ann["raster"] = Raster.from_geometry( + ann["raster"]["geometry"], + width=w, + height=h, + ) + elif ann["raster"]["geometry"] is None: + # decode raster + ann["raster"] = Raster(mask=np.array(img)) + return Annotation(**ann) + + +@time_it +def ingest_and_preprocess( + manager: Manager, + gt_type: AnnotationType, + pd_type: AnnotationType, + gt_path: Path, + pd_path: Path, + limit: int, + chunk_size: int, +) -> tuple[float, Manager]: + with open(gt_path, "r") as gf: + with open(pd_path, "r") as pf: + + count = 0 + groundtruths = [] + predictions = [] + + accumulated_runtime = 0.0 + + for gline, pline in zip(gf, pf): + + # unpack groundtruth + gt_dict = json.loads(gline) + gt_dict["datum"].pop("text") + gt_dict["datum"] = Datum(**gt_dict["datum"]) + annotations = [ + _create_annotation(dtype=gt_type, ann=ann) + for ann in gt_dict["annotations"] + ] + gt_dict["annotations"] = annotations + gt = GroundTruth(**gt_dict) + groundtruths.append(gt) + + # unpack prediction + pd_dict = json.loads(pline) + pd_dict["datum"].pop("text") + pd_dict["datum"] = Datum(**pd_dict["datum"]) + annotations = [ + _create_annotation(dtype=pd_type, ann=ann) + for ann in pd_dict["annotations"] + ] + pd_dict["annotations"] = annotations + pd = Prediction(**pd_dict) + predictions.append(pd) + + count += 1 + if count >= limit and limit > 0: + break + elif len(groundtruths) < chunk_size or chunk_size == -1: + continue + + runtime, _ = time_it(manager.add_data)( + groundtruths, predictions + ) + accumulated_runtime += runtime + groundtruths = [] + predictions = [] + + if groundtruths: + runtime, _ = time_it(manager.add_data)( + groundtruths, predictions + ) + accumulated_runtime += runtime + return (accumulated_runtime, manager) + + +def run_base_evaluation(manager: Manager): + """Run a base evaluation (with no PR curves) using Manager.""" + return manager.evaluate() + + +def run_pr_curve_evaluation(manager: Manager): + """Run a base evaluation with PrecisionRecallCurve included using Manager.""" + return manager.evaluate() + + +def run_detailed_pr_curve_evaluation(manager: Manager): + """Run a base evaluation with PrecisionRecallCurve and DetailedPrecisionRecallCurve included using Manager.""" + return manager.evaluate() + + +@dataclass +class Benchmark: + limit: int + base_runtime: float + pr_runtime: float + detailed_runtime: float + n_datums: int + n_annotations: int + n_labels: int + gt_type: AnnotationType + pd_type: AnnotationType + chunk_size: int + base_ingest: float + pr_ingest: float + detailed_ingest: float + base_precompute: float + pr_precompute: float + detailed_precompute: float + eval_base: float + eval_base_pr: float + eval_base_pr_detail: float + + def result(self) -> dict: + return { + "limit": self.limit, + "n_datums": self.n_datums, + "n_annotations": self.n_annotations, + "n_labels": self.n_labels, + "dtype": { + "groundtruth": self.gt_type.value, + "prediction": self.pd_type.value, + }, + "chunk_size": self.chunk_size, + "base": { + "ingestion": f"{round(self.base_ingest - self.base_precompute, 2)} seconds", + "evaluation": { + "preprocessing": f"{round(self.base_precompute, 2)} seconds", + "computation": f"{round(self.eval_base, 2)} seconds", + "total": f"{round(self.base_precompute + self.eval_base, 2)} seconds", + }, + }, + "base+pr": { + "ingestion": f"{round(self.pr_ingest - self.pr_precompute, 2)} seconds", + "evaluation": { + "preprocessing": f"{round(self.pr_precompute, 2)} seconds", + "computation": f"{round(self.eval_base_pr, 2)} seconds", + "total": f"{round(self.pr_precompute + self.eval_base_pr, 2)} seconds", + }, + } + if self.pr_ingest > -1 + else {}, + "base+pr+detailed": { + "ingestion": f"{round(self.detailed_ingest - self.detailed_precompute, 2)} seconds", + "evaluation": { + "preprocessing": f"{round(self.detailed_precompute, 2)} seconds", + "computation": f"{round(self.eval_base_pr_detail, 2)} seconds", + "total": f"{round(self.detailed_precompute + self.eval_base_pr_detail, 2)} seconds", + }, + } + if self.detailed_ingest > -1 + else {}, + } + + +def run_benchmarking_analysis( + limits_to_test: list[int], + combinations: list[tuple[AnnotationType, AnnotationType]] | None = None, + results_file: str = "manager_results.json", + chunk_size: int = -1, + compute_pr: bool = True, + compute_detailed: bool = True, + ingestion_timeout=30, + evaluation_timeout=30, +): + """Time various function calls and export the results.""" + current_directory = Path(__file__).parent + write_path = current_directory / Path(results_file) + + gt_box_filename = "gt_objdet_coco_bbox.jsonl" + gt_polygon_filename = "gt_objdet_coco_polygon.jsonl" + # gt_multipolygon_filename = "gt_objdet_coco_raster_multipolygon.jsonl" + gt_raster_filename = "gt_objdet_coco_raster_bitmask.jsonl" + pd_box_filename = "pd_objdet_yolo_bbox.jsonl" + pd_polygon_filename = "pd_objdet_yolo_polygon.jsonl" + # pd_multipolygon_filename = "pd_objdet_yolo_multipolygon.jsonl" + pd_raster_filename = "pd_objdet_yolo_raster.jsonl" + + groundtruth_caches = { + AnnotationType.BOX: gt_box_filename, + AnnotationType.POLYGON: gt_polygon_filename, + # AnnotationType.MULTIPOLYGON: gt_multipolygon_filename, + AnnotationType.RASTER: gt_raster_filename, + } + prediction_caches = { + AnnotationType.BOX: pd_box_filename, + AnnotationType.POLYGON: pd_polygon_filename, + # AnnotationType.MULTIPOLYGON: pd_multipolygon_filename, + AnnotationType.RASTER: pd_raster_filename, + } + + # default is to perform all combinations + if combinations is None: + combinations = [ + (gt_type, pd_type) + for gt_type in groundtruth_caches + for pd_type in prediction_caches + ] + + # cache data locally + filenames = [ + *list(groundtruth_caches.values()), + *list(prediction_caches.values()), + ] + for filename in filenames: + file_path = current_directory / Path(filename) + url = f"https://pub-fae71003f78140bdaedf32a7c8d331d2.r2.dev/{filename}" + download_data_if_not_exists( + file_name=filename, file_path=file_path, url=url + ) + + # iterate through datum limits + results = list() + for limit in limits_to_test: + for gt_type, pd_type in combinations: + + gt_filename = groundtruth_caches[gt_type] + pd_filename = prediction_caches[pd_type] + + # === Base Evaluation === + start = time() + base_evaluation = Manager() + + # ingest + preprocess + base_ingest, ( + base_precompute, + base_evaluation, + ) = ingest_and_preprocess( + manager=base_evaluation, + gt_type=gt_type, + pd_type=pd_type, + gt_path=current_directory / Path(gt_filename), + pd_path=current_directory / Path(pd_filename), + limit=limit, + chunk_size=chunk_size, + ) # type: ignore - time_it wrapper + if base_ingest > ingestion_timeout and ingestion_timeout != -1: + raise TimeoutError( + f"Base precomputation timed out with limit of {limit}." + ) + + # evaluate + base_results = run_base_evaluation(base_evaluation) + base_total = time() - start + assert base_results.meta + n_datums = base_results.meta["datums"] + n_annotations = base_results.meta["annotations"] + n_labels = base_results.meta["labels"] + base = base_results.meta["duration"] + if base > evaluation_timeout and evaluation_timeout != -1: + raise TimeoutError( + f"Base evaluation timed out with {n_datums} datums." + ) + + # === PR Evaluation === + pr_total = -1 + pr_ingest = -1 + pr_precompute = -1 + pr = -1 + if compute_pr: + start = time() + pr_evaluation = Manager( + metrics_to_return=[ + enums.MetricType.AP, + enums.MetricType.AR, + enums.MetricType.mAP, + enums.MetricType.APAveragedOverIOUs, + enums.MetricType.mAR, + enums.MetricType.mAPAveragedOverIOUs, + enums.MetricType.PrecisionRecallCurve, + ] + ) + + # ingest + preprocess + pr_ingest, ( + pr_precompute, + pr_evaluation, + ) = ingest_and_preprocess( + manager=pr_evaluation, + gt_type=gt_type, + pd_type=pd_type, + gt_path=current_directory / Path(gt_filename), + pd_path=current_directory / Path(pd_filename), + limit=limit, + chunk_size=chunk_size, + ) # type: ignore - time_it wrapper + if pr_ingest > ingestion_timeout and ingestion_timeout != -1: + raise TimeoutError( + f"PR precomputation timed out with {n_datums} datums." + ) + + # evaluate + pr_results = run_pr_curve_evaluation(pr_evaluation) + pr_total = time() - start + assert pr_results.meta + pr = pr_results.meta["duration"] + if pr > evaluation_timeout and evaluation_timeout != -1: + raise TimeoutError( + f"PR evaluation timed out with {n_datums} datums." + ) + + # === Detailed Evaluation === + detailed_total = -1 + detailed_ingest = -1 + detailed_precompute = -1 + detailed = -1 + if compute_detailed: + start = time() + detailed_evaluation = Manager( + metrics_to_return=[ + enums.MetricType.AP, + enums.MetricType.AR, + enums.MetricType.mAP, + enums.MetricType.APAveragedOverIOUs, + enums.MetricType.mAR, + enums.MetricType.mAPAveragedOverIOUs, + enums.MetricType.PrecisionRecallCurve, + enums.MetricType.DetailedPrecisionRecallCurve, + ] + ) + + # ingest + preprocess + detailed_ingest, ( + detailed_precompute, + detailed_evaluation, + ) = ingest_and_preprocess( + manager=detailed_evaluation, + gt_type=gt_type, + pd_type=pd_type, + gt_path=current_directory / Path(gt_filename), + pd_path=current_directory / Path(pd_filename), + limit=limit, + chunk_size=chunk_size, + ) # type: ignore - time_it wrapper + if ( + detailed_ingest > ingestion_timeout + and ingestion_timeout != -1 + ): + raise TimeoutError( + f"Detailed precomputation timed out with {n_datums} datums." + ) + + # evaluate + detailed_results = run_detailed_pr_curve_evaluation( + detailed_evaluation + ) + detailed_total = time() - start + + assert detailed_results.meta + detailed = detailed_results.meta["duration"] + if detailed > evaluation_timeout and evaluation_timeout != -1: + raise TimeoutError( + f"Detailed evaluation timed out with {n_datums} datums." + ) + + results.append( + Benchmark( + limit=limit, + base_runtime=base_total, + pr_runtime=pr_total, + detailed_runtime=detailed_total, + n_datums=n_datums, + n_annotations=n_annotations, + n_labels=n_labels, + gt_type=gt_type, + pd_type=pd_type, + chunk_size=chunk_size, + base_ingest=base_ingest, + pr_ingest=pr_ingest, + detailed_ingest=detailed_ingest, + base_precompute=base_precompute, + pr_precompute=pr_precompute, + detailed_precompute=detailed_precompute, + eval_base=base, + eval_base_pr=pr, + eval_base_pr_detail=detailed, + ).result() + ) + + write_results_to_file(write_path=write_path, results=results) + + +if __name__ == "__main__": + + # run bounding box benchmark + run_benchmarking_analysis( + combinations=[ + (AnnotationType.BOX, AnnotationType.BOX), + ], + limits_to_test=[5000, 5000], + compute_detailed=False, + ) + + # run polygon benchmark + run_benchmarking_analysis( + combinations=[ + (AnnotationType.POLYGON, AnnotationType.POLYGON), + ], + limits_to_test=[5000, 5000], + compute_detailed=False, + ) + + # run raster benchmark + run_benchmarking_analysis( + combinations=[ + (AnnotationType.RASTER, AnnotationType.RASTER), + ], + limits_to_test=[500, 500], + compute_detailed=False, + )