From 71c6bdd7d748b4ba9a54cfd6831016010cae75a0 Mon Sep 17 00:00:00 2001 From: jteijema Date: Wed, 30 Oct 2024 14:24:32 +0100 Subject: [PATCH] Update algorithm --- asreviewcontrib/insights/algorithms.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/asreviewcontrib/insights/algorithms.py b/asreviewcontrib/insights/algorithms.py index dba1096..e09ed01 100644 --- a/asreviewcontrib/insights/algorithms.py +++ b/asreviewcontrib/insights/algorithms.py @@ -24,24 +24,19 @@ def _loss_value(labels): Nx = len(labels) # The best AUC represents the entire area under the perfect curve, which is - # the total area Nx * Ny, minus the area above the perfect curve (which is - # the sum of a series with a formula (Ny * Ny) / 2) plus 0.5 to account for - # the boundary. - best_auc = Nx * Ny - (((Ny * Ny) / 2) + 0.5) + # the total area Nx * Ny, minus the area above the perfect curve. + best_auc = Nx * Ny - ((Ny * (Ny - 1)) / 2) - # Compute recall values (y) based on the provided labels. We don't need x - # values because the points are uniformly spaced. + # Compute recall values (y) based on the provided labels. y = np.array(_recall_values(labels, x_absolute=True, y_absolute=True)[1]) - # The actual AUC is calculated by approximating the area under the curve - # using the trapezoidal rule. (y[1:] + y[:-1]) / 2 takes the average height - # between consecutive y values, and we sum them up. - actual_auc = np.sum((y[1:] + y[:-1]) / 2) + # The actual AUC is the sum of the recall curve. + actual_auc = np.sum(y) # The worst AUC represents the area under the worst-case step curve, which - # is simply the area under the recall curve where all positive labels are - # clumped at the end, calculated as (Ny * Ny) / 2. - worst_auc = ((Ny * Ny) / 2) + # is the area under the recall curve where all positive labels are clumped + # at the end. + worst_auc = (Ny * (Ny + 1)) / 2 # The normalized loss is the difference between the best AUC and the actual # AUC, normalized by the range between the best and worst AUCs.