Add more tests for stable_random_distribution

Donaim · Donaim · commit 4fb84516b146 · 2025-02-13T18:09:04.000-08:00
diff --git a/micall/tests/test_stable_random_distribution.py b/micall/tests/test_stable_random_distribution.py
@@ -1,5 +1,6 @@
 
 from micall.utils.stable_random_distribution import stable_random_distribution
+import numpy as np
 
 
 def test_indices_in_range():
@@ -12,3 +13,76 @@ def test_indices_in_range():
     for _ in range(1000):
         idx = next(gen)
         assert 0 <= idx < maximum, f"Index {idx} out of range [0,{maximum})"
+
+
+def test_deterministic_output_with_seed():
+    """
+    Test that the generator produces the same sequence when
+    re-seeded with the same seed.
+    """
+
+    maximum = 15
+    seed = 456
+    gen1 = stable_random_distribution(maximum, seed=seed)
+    gen2 = stable_random_distribution(maximum, seed=seed)
+
+    # Compare the first 50 generated values.
+    values1 = [next(gen1) for _ in range(50)]
+    values2 = [next(gen2) for _ in range(50)]
+    assert values1 == values2, \
+        "Generators with the same seed produced different outputs."
+
+
+def test_different_seeds_differ():
+    """
+    A sanity check that different seeds usually lead to a different sequence.
+    """
+
+    maximum = 15
+    gen1 = stable_random_distribution(maximum, seed=789)
+    gen2 = stable_random_distribution(maximum, seed=987)
+
+    # Compare the first 50 generated values: while not guaranteed to
+    # be different, it is extremely unlikely that the two sequences
+    # are identical.
+    values1 = [next(gen1) for _ in range(50)]
+    values2 = [next(gen2) for _ in range(50)]
+
+    assert values1 != values2, \
+        "Generators with different seeds produced identical sequences."
+
+
+def test_fair_distribution_behavior():
+    """
+    Test that the stable_random_distribution leads to outputs that are
+    more 'spread out' than a simple uniform generator.
+
+    Idea:
+      - Generate a long sequence from our generator.
+      - Compute the average absolute difference (jump) between indices.
+      - Do the same for a uniformly random generator over the same range.
+      - With the adaptive update, values should tend to be farther apart.
+    """
+
+    maximum = 1_000
+    num_samples = 10_000
+    for seed in range(20):
+        # Gather samples from our generator.
+        gen = stable_random_distribution(maximum, seed=seed)
+        samples = np.array([next(gen) for _ in range(num_samples)])
+        diff_stable = np.abs(np.diff(np.sort(samples))) ** 2
+        avg_diff_stable = diff_stable.mean()
+
+        # For comparison, generate num_samples indices uniformly at random.
+        rng = np.random.default_rng(seed)
+        uniform_samples = rng.choice(maximum, size=num_samples)
+        diff_uniform = np.abs(np.diff(np.sort(uniform_samples))) ** 2
+        avg_diff_uniform = diff_uniform.mean()
+
+        # Our expectation: the stable generator should have larger jumps
+        # on average. We include a tolerance, because both sequences are
+        # random.
+        assert avg_diff_stable >= avg_diff_uniform, (
+            f"Expected stable generator to have a higher average jump than a uniform generator: "
+            f"stable {avg_diff_stable} vs uniform {avg_diff_uniform}"
+        )
diff --git a/micall/utils/stable_random_distribution.py b/micall/utils/stable_random_distribution.py
@@ -12,14 +12,13 @@ def stable_random_distribution(maximum: int, seed: int = 42) -> Iterator[int]:
     rng = random.Random(seed)
 
     population = np.arange(n)
-    forward = np.arange(1, n + 1) ** 0.5
+    forward = np.arange(1, n + 1)
     backwards = np.copy(np.flip(forward))
-    np_weights = np.zeros(n) + 0.1
+    np_weights = np.zeros(n) + 1
 
     while True:
-        weights: Sequence[float] = 1 - np_weights  # type: ignore
-        indexes = rng.choices(population=population, weights=weights)
-        index = indexes[0]
+        weights: Sequence[float] = np_weights  # type: ignore
+        index = rng.choices(population=population, weights=weights)[0]
         yield index
 
         if index == 0:
@@ -28,4 +27,5 @@ def stable_random_distribution(maximum: int, seed: int = 42) -> Iterator[int]:
             np_weights[:(index + 1)] += forward[-(index + 1):]
             np_weights[(index + 1):] += backwards[1:-index]
 
-        np_weights /= np_weights.sum()
+        np_weights -= np_weights.min()
+        np_weights = (1 + np_weights.max()) - np_weights