Use pythons random instead of numpy random in stable_number_distribution

Donaim · Donaim · commit 87b47d421ffb · 2025-02-14T16:55:12.000-08:00
diff --git a/micall/tests/test_stable_random_distribution.py b/micall/tests/test_stable_random_distribution.py
@@ -3,13 +3,15 @@
 import numpy as np
 from itertools import islice
 from typing import Set
+import random
 
 
 def test_indices_in_range():
     """Test that each index generated is within the range [0, high)."""
 
     high = 10
-    gen = stable_random_distribution(high, seed=123)
+    rng = random.Random(123)
+    gen = stable_random_distribution(high, rng=rng)
     # Grab a bunch of values from the infinite generator
 
     for _ in range(1000):
@@ -21,7 +23,8 @@ def test_bounds_are_reachable():
     """Test that both min and max-1 can be generated."""
 
     high = 999
-    gen = stable_random_distribution(high, seed=123)
+    rng = random.Random(123456)
+    gen = stable_random_distribution(high, rng=rng)
     lst = islice(gen, 1000)
 
     assert 0 in lst
@@ -32,13 +35,8 @@ def test_everything_is_reachable():
     """Test that all numbers in the range [0, max-1) can be generated."""
 
     high = 30
-    fun = stable_random_distribution
-    # def fun(high, seed):
-    #     import random
-    #     while True:
-    #         yield random.randint(0, high)
-
-    gen = fun(high, seed=123)
+    rng = random.Random(123)
+    gen = stable_random_distribution(high, rng=rng)
     lst = tuple(map(int, islice(gen, 1000)))
 
     for x in range(high):
@@ -53,8 +51,10 @@ def test_deterministic_output_with_seed():
 
     high = 15
     seed = 456
-    gen1 = stable_random_distribution(high, seed=seed)
-    gen2 = stable_random_distribution(high, seed=seed)
+    rng1 = random.Random(seed)
+    rng2 = random.Random(seed)
+    gen1 = stable_random_distribution(high, rng=rng1)
+    gen2 = stable_random_distribution(high, rng=rng2)
 
     # Compare the first 50 generated values.
     values1 = [next(gen1) for _ in range(50)]
@@ -69,8 +69,10 @@ def test_different_seeds_differ():
     """
 
     high = 15
-    gen1 = stable_random_distribution(high, seed=789)
-    gen2 = stable_random_distribution(high, seed=987)
+    rng1 = random.Random(789)
+    rng2 = random.Random(987)
+    gen1 = stable_random_distribution(high, rng=rng1)
+    gen2 = stable_random_distribution(high, rng=rng2)
 
     # Compare the first 50 generated values: while not guaranteed to
     # be different, it is extremely unlikely that the two sequences
@@ -98,14 +100,15 @@ def test_fair_distribution_behavior():
     num_samples = 3_000
     for seed in range(100):
         # Gather samples from our generator.
-        gen = stable_random_distribution(high, seed=seed)
+        rng = random.Random(seed)
+        gen = stable_random_distribution(high, rng=rng)
         samples = np.array([next(gen) for _ in range(num_samples)])
         diff_stable = np.abs(np.diff(np.sort(samples))) ** 2
         avg_diff_stable = diff_stable.mean()
 
         # For comparison, generate num_samples indices uniformly at random.
-        rng = np.random.default_rng(seed)
-        uniform_samples = rng.choice(high, size=num_samples)
+        nprng = np.random.default_rng(seed)
+        uniform_samples = nprng.choice(high, size=num_samples)
         diff_uniform = np.abs(np.diff(np.sort(uniform_samples))) ** 2
         avg_diff_uniform = diff_uniform.mean()
 
@@ -132,22 +135,23 @@ def test_fill_domain_speed():
 
     for seed in range(trials):
         # Gather samples from our generator.
-        gen = stable_random_distribution(high, seed=seed)
+        rng = random.Random(seed)
+        gen = stable_random_distribution(high, rng=rng)
         stable_bucket: Set[int] = set()
         stable_steps = 0
         while len(stable_bucket) < high:
             stable_bucket.add(next(gen))
             stable_steps += 1
 
         # For comparison, generate num_samples indices uniformly at random.
-        rng = np.random.default_rng(seed)
+        nprng = np.random.default_rng(seed)
         uniform_bucket: Set[int] = set()
         uniform_steps = 0
         while len(uniform_bucket) < high:
-            uniform_bucket.add(rng.integers(0, high))
+            uniform_bucket.add(nprng.integers(0, high))
             uniform_steps += 1
 
         if stable_steps < uniform_steps:
             wins += 1
 
-    assert wins / trials > 0.85
+    assert wins / trials > 0.80
diff --git a/micall/utils/stable_random_distribution.py b/micall/utils/stable_random_distribution.py
@@ -1,21 +1,29 @@
-from typing import Iterator
+from typing import Iterator, Optional
 
+import random
 import numpy as np
 
-DUPLICATION_FACTOR = 1
+DUPLICATION_FACTOR = 100
 
 
-def stable_random_distribution(high: int, seed: int = 42) -> Iterator[int]:
+def stable_random_distribution(high: int,
+                               rng: Optional[random.Random] = None,
+                               ) -> Iterator[int]:
+
     if high <= 0:
         return
 
-    rng = np.random.default_rng(seed)
+    if rng is None:
+        rng = random.Random()
+
+    maximum = high - 1
     block = np.arange(high)
     population = np.concatenate([block] * DUPLICATION_FACTOR, axis=0)
 
     assert len(population) == DUPLICATION_FACTOR * len(block)
 
     while True:
-        index = rng.choice(population)
+        choice = rng.randint(0, maximum)
+        index = population[choice]
         yield index
-        population[index] = rng.integers(low=0, high=high)
+        population[index] = rng.randint(0, maximum)