Skip to content

Commit d3d8bcb

Browse files
committed
Use a faster Bradley-Terry implementation
1 parent 950a018 commit d3d8bcb

File tree

5 files changed

+73
-60
lines changed

5 files changed

+73
-60
lines changed

python/evalica/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def _make_matrix(
5757
win_matrix: npt.NDArray[np.float64],
5858
tie_matrix: npt.NDArray[np.float64],
5959
win_weight: float = 1.,
60-
tie_weight: float = 0.,
60+
tie_weight: float = .5,
6161
nan: float = 0.0,
6262
) -> npt.NDArray[np.float64]:
6363
with np.errstate(all="ignore"):
@@ -391,6 +391,11 @@ def bradley_terry(
391391
The Method of Paired Comparisons. Biometrika. 39, 324–345 (1952).
392392
<https://doi.org/10.2307/2334029>.
393393
394+
Quote:
395+
Newman, M.E.J.: Efficient Computation of Rankings from Pairwise Comparisons.
396+
Journal of Machine Learning Research. 24, 1&ndash;25 (2023).
397+
<https://www.jmlr.org/papers/v24/22-1086.html>.
398+
394399
Args:
395400
xs: The left-hand side elements.
396401
ys: The right-hand side elements.

python/evalica/naive.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414

1515
def pairwise_scores(scores: npt.NDArray[np.number[Any]]) -> npt.NDArray[np.float64]:
16-
if not scores.shape[0]:
16+
if not scores.size:
1717
return np.zeros((0, 0))
1818

1919
return np.nan_to_num(scores[:, np.newaxis] / (scores + scores[:, np.newaxis]))
@@ -63,31 +63,30 @@ def bradley_terry(
6363
tolerance: float = 1e-6,
6464
limit: int = 100,
6565
) -> tuple[npt.NDArray[np.float64], int]:
66-
with np.errstate(all="ignore"):
67-
totals = matrix.T + matrix
68-
69-
active = totals > 0
66+
scores = np.ones(matrix.shape[0])
7067

71-
wins = matrix.sum(axis=1)
68+
converged, iterations = False, 0
7269

73-
normalized = np.zeros_like(matrix, dtype=float)
70+
if not matrix.size:
71+
return scores, iterations
7472

75-
scores = np.ones(matrix.shape[0])
7673
scores_new = scores.copy()
7774

78-
converged, iterations = False, 0
79-
8075
while not converged and iterations < limit:
8176
iterations += 1
8277

8378
with np.errstate(all="ignore"):
84-
sums = np.add.outer(scores, scores)
79+
for i in range(matrix.shape[0]):
80+
sums = scores_new[i] + scores_new
81+
82+
numerator = np.sum(matrix[i] * scores_new / sums)
83+
denominator = np.sum(matrix[:, i] / sums)
8584

86-
normalized[active] = totals[active] / sums[active]
85+
scores_new[i] = numerator / denominator
8786

88-
scores_new[:] = wins
89-
scores_new /= normalized.sum(axis=0)
90-
scores_new /= scores_new.sum()
87+
geometric_mean = np.exp(np.mean(np.log(scores_new)))
88+
89+
scores_new /= geometric_mean
9190

9291
scores_new[:] = np.nan_to_num(scores_new, nan=tolerance)
9392

@@ -105,14 +104,18 @@ def newman(
105104
tolerance: float = 1e-6,
106105
limit: int = 100,
107106
) -> tuple[npt.NDArray[np.float64], float, int]:
108-
win_tie_half = win_matrix + tie_matrix / 2
107+
win_tie_half = np.nan_to_num(win_matrix + tie_matrix / 2, nan=tolerance)
109108

110109
scores = np.ones(win_matrix.shape[0])
111-
scores_new = scores.copy()
112-
v_new = v
113110

114111
converged, iterations = False, 0
115112

113+
if not win_matrix.size and not tie_matrix.size:
114+
return scores, v, iterations
115+
116+
scores_new = scores.copy()
117+
v_new = v
118+
116119
while not converged and iterations < limit:
117120
iterations += 1
118121

@@ -199,7 +202,7 @@ def eigen(
199202
tolerance: float = 1e-6,
200203
limit: int = 100,
201204
) -> tuple[npt.NDArray[np.float64], int]:
202-
if not matrix.shape[0]:
205+
if not matrix.size:
203206
return np.zeros(0, dtype=np.float64), 0
204207

205208
n = matrix.shape[0]
@@ -227,7 +230,7 @@ def pagerank_matrix(
227230
matrix: npt.NDArray[np.float64],
228231
damping: float,
229232
) -> npt.NDArray[np.float64]:
230-
if not matrix.shape[0]:
233+
if not matrix.size:
231234
return np.zeros(0, dtype=np.float64)
232235

233236
p = 1. / int(matrix.shape[0])

python/evalica/test_evalica.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def test_bradley_terry(comparison: Comparison, win_weight: float, tie_weight: fl
188188
assert len(result.scores) == len(set(xs) | set(ys))
189189
assert np.isfinite(result.scores).all()
190190
assert result.scores.is_monotonic_decreasing
191-
assert result.iterations > 0
191+
assert not xs or result.iterations > 0
192192
assert result.limit > 0
193193

194194
assert_series_equal(result_pyo3.scores, result_naive.scores, rtol=1e-3, check_like=True)
@@ -216,8 +216,8 @@ def test_newman(comparison: Comparison, v_init: float) -> None:
216216
assert len(result.scores) == len(set(xs) | set(ys))
217217
assert np.isfinite(result.scores).all()
218218
assert result.scores.is_monotonic_decreasing
219-
assert np.isfinite(result.v)
220-
assert result.iterations > 0
219+
assert not xs or np.isfinite(result.v)
220+
assert not xs or result.iterations > 0
221221
assert result.limit > 0
222222

223223
if np.isfinite(v_init):
@@ -226,7 +226,8 @@ def test_newman(comparison: Comparison, v_init: float) -> None:
226226
assert result.v_init is v_init
227227

228228
assert_series_equal(result_pyo3.scores, result_naive.scores, check_like=True)
229-
assert result_pyo3.v == pytest.approx(result_naive.v)
229+
230+
assert not np.isfinite(v_init) or result_pyo3.v == pytest.approx(result_naive.v)
230231

231232

232233
@given(
@@ -445,9 +446,12 @@ def test_bradley_terry_dataset(comparison: Comparison, comparison_golden: pd.Ser
445446
result_pyo3 = evalica.bradley_terry(xs, ys, winners, weights=weights, solver="pyo3")
446447
result_naive = evalica.bradley_terry(xs, ys, winners, weights=weights, solver="naive")
447448

448-
assert_series_equal(result_naive.scores, comparison_golden, rtol=1e-4, check_like=True)
449-
assert_series_equal(result_pyo3.scores, comparison_golden, rtol=1e-4, check_like=True)
450-
assert_series_equal(result_pyo3.scores, result_naive.scores, check_like=True)
449+
scores_pyo3 = result_pyo3.scores / result_pyo3.scores.sum()
450+
scores_naive = result_naive.scores / result_naive.scores.sum()
451+
452+
assert_series_equal(scores_naive, comparison_golden, rtol=1e-4, check_like=True)
453+
assert_series_equal(scores_pyo3, comparison_golden, rtol=1e-4, check_like=True)
454+
assert_series_equal(scores_pyo3, scores_naive, check_like=True)
451455

452456

453457
@pytest.mark.parametrize(("algorithm", "dataset"), [

src/bradley_terry.rs

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use std::ops::{AddAssign, DivAssign};
22

3-
use ndarray::{Array1, Array2, ArrayView2, Axis, ErrorKind, ScalarOperand, ShapeError};
4-
use num_traits::Float;
3+
use ndarray::{Array1, ArrayView2, Axis, ErrorKind, ScalarOperand, ShapeError};
4+
use num_traits::{Float, FromPrimitive};
55

6-
use crate::utils::{nan_to_num, one_nan_to_num};
6+
use crate::utils::{nan_to_num, one_nan_to_num, win_plus_tie_matrix};
77

8-
pub fn bradley_terry<A: Float + ScalarOperand + AddAssign + DivAssign>(
8+
pub fn bradley_terry<A: Float + FromPrimitive + ScalarOperand + AddAssign + DivAssign>(
99
matrix: &ArrayView2<A>,
1010
tolerance: A,
1111
limit: usize,
@@ -14,41 +14,35 @@ pub fn bradley_terry<A: Float + ScalarOperand + AddAssign + DivAssign>(
1414
return Err(ShapeError::from_kind(ErrorKind::IncompatibleShape));
1515
}
1616

17-
let totals = &matrix.t().clone() + matrix;
18-
19-
let active = totals
20-
.indexed_iter()
21-
.filter(|((_, _), &total)| total > A::zero())
22-
.collect::<Vec<((usize, usize), &A)>>();
23-
24-
let wins = matrix.sum_axis(Axis(1));
25-
26-
let mut normalized = Array2::zeros(matrix.raw_dim());
27-
2817
let mut scores = Array1::ones(matrix.shape()[0]);
2918

3019
let mut converged = false;
3120
let mut iterations = 0;
3221

22+
if matrix.is_empty() {
23+
return Ok((scores, iterations));
24+
}
25+
3326
while !converged && iterations < limit {
3427
iterations += 1;
3528

36-
for ((i, j), &v) in active.iter() {
37-
let i = *i;
38-
let j = *j;
29+
let mut scores_new = scores.clone();
3930

40-
normalized[[i, j]] = v / (scores[i] + scores[j]);
41-
}
31+
for i in 0..matrix.nrows() {
32+
let mut numerator = A::zero();
33+
let mut denominator = A::zero();
4234

43-
let mut scores_new = &wins / &normalized.sum_axis(Axis(0));
35+
for j in 0..matrix.ncols() {
36+
let sum_scores = scores_new[i] + scores_new[j];
37+
numerator += matrix[[i, j]] * scores_new[j] / sum_scores;
38+
denominator += matrix[[j, i]] / sum_scores;
39+
}
4440

45-
// Otherwise the result is different from what is computed by NumPy
46-
let mut scores_new_sum = A::zero();
47-
for &score in scores_new.iter() {
48-
scores_new_sum += score;
41+
scores_new[i] = numerator / denominator;
4942
}
5043

51-
scores_new /= scores_new_sum;
44+
let geometric_mean = scores_new.mapv(|x| x.ln()).mean().unwrap().exp();
45+
scores_new /= geometric_mean;
5246

5347
nan_to_num(&mut scores_new, tolerance);
5448

@@ -72,15 +66,20 @@ pub fn newman(
7266
return Err(ShapeError::from_kind(ErrorKind::IncompatibleShape));
7367
}
7468

75-
let win_tie_half = win_matrix + &(tie_matrix / 2.0);
76-
7769
let mut scores = Array1::<f64>::ones(win_matrix.shape()[0]);
7870
let mut v = v_init;
79-
let mut v_new = v;
8071

8172
let mut converged = false;
8273
let mut iterations = 0;
8374

75+
if win_matrix.is_empty() && tie_matrix.is_empty() {
76+
return Ok((scores, v, iterations));
77+
}
78+
79+
let win_tie_half = win_plus_tie_matrix(&win_matrix, &tie_matrix, 1.0, 0.5, tolerance);
80+
81+
let mut v_new = v;
82+
8483
while !converged && iterations < limit {
8584
iterations += 1;
8685

@@ -156,7 +155,9 @@ mod tests {
156155
assert_eq!(actual.len(), matrix.shape()[0]);
157156
assert_ne!(iterations, 0);
158157

159-
for (left, right) in actual.iter().zip(expected.iter()) {
158+
let actual_normalized = actual.clone() / actual.sum();
159+
160+
for (left, right) in actual_normalized.iter().zip(expected.iter()) {
160161
assert_abs_diff_eq!(left, right, epsilon = tolerance * 1e1);
161162
}
162163
}
@@ -191,7 +192,7 @@ mod tests {
191192
tolerance,
192193
100,
193194
)
194-
.unwrap();
195+
.unwrap();
195196

196197
assert_eq!(actual.len(), win_matrix.shape()[0]);
197198
assert_eq!(actual.len(), tie_matrix.shape()[0]);

src/elo.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ mod tests {
8585
1.0,
8686
0.5,
8787
)
88-
.unwrap();
88+
.unwrap();
8989

9090
for (a, b) in actual.iter().zip(expected.iter()) {
9191
assert!((a - b).abs() < 1e-0, "a = {}, b = {}", a, b);

0 commit comments

Comments
 (0)