@@ -24,24 +24,19 @@ def _loss_value(labels):
24
24
Nx = len (labels )
25
25
26
26
# The best AUC represents the entire area under the perfect curve, which is
27
- # the total area Nx * Ny, minus the area above the perfect curve (which is
28
- # the sum of a series with a formula (Ny * Ny) / 2) plus 0.5 to account for
29
- # the boundary.
30
- best_auc = Nx * Ny - (((Ny * Ny ) / 2 ) + 0.5 )
27
+ # the total area Nx * Ny, minus the area above the perfect curve.
28
+ best_auc = Nx * Ny - ((Ny * (Ny - 1 )) / 2 )
31
29
32
- # Compute recall values (y) based on the provided labels. We don't need x
33
- # values because the points are uniformly spaced.
30
+ # Compute recall values (y) based on the provided labels.
34
31
y = np .array (_recall_values (labels , x_absolute = True , y_absolute = True )[1 ])
35
32
36
- # The actual AUC is calculated by approximating the area under the curve
37
- # using the trapezoidal rule. (y[1:] + y[:-1]) / 2 takes the average height
38
- # between consecutive y values, and we sum them up.
39
- actual_auc = np .sum ((y [1 :] + y [:- 1 ]) / 2 )
33
+ # The actual AUC is the sum of the recall curve.
34
+ actual_auc = np .sum (y )
40
35
41
36
# The worst AUC represents the area under the worst-case step curve, which
42
- # is simply the area under the recall curve where all positive labels are
43
- # clumped at the end, calculated as (Ny * Ny) / 2 .
44
- worst_auc = (( Ny * Ny ) / 2 )
37
+ # is the area under the recall curve where all positive labels are clumped
38
+ # at the end.
39
+ worst_auc = (Ny * ( Ny + 1 )) / 2
45
40
46
41
# The normalized loss is the difference between the best AUC and the actual
47
42
# AUC, normalized by the range between the best and worst AUCs.
0 commit comments