Skip to content

Commit c59ceee

Browse files
Imitation Dynamics and Regret Minimization models added to the repo
1 parent 335a8ed commit c59ceee

File tree

5 files changed

+305
-1
lines changed

5 files changed

+305
-1
lines changed

Diff for: src/nashpy/algorithms/regret_minimization.py

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import numpy as np
2+
from typing import Generator, Tuple, Any
3+
import numpy.typing as npt
4+
5+
def compute_regrets(strategy_utilities, current_strategy):
6+
'''
7+
This function calculates the regrets for a player based on the strategy utilities and the current strategy.
8+
Regrets represent the difference between the utility achieved by playing a strategy and the maximum utility
9+
that could have been achieved by playing any strategy.
10+
In this implementation, only positive regrets are considered
11+
'''
12+
regrets = np.maximum(0, strategy_utilities - current_strategy)
13+
return regrets
14+
15+
def update_strategy(current_strategy, regrets, learning_rate):
16+
'''
17+
This function updates the player's strategy based on the regrets, the current strategy, and a fixed learning rate.
18+
It scales the regrets by the learning rate and adds them to the current strategy.
19+
Finally, it normalizes the updated strategy to ensure that the probabilities sum up to 1.
20+
'''
21+
updated_strategy = current_strategy + learning_rate * regrets
22+
return updated_strategy / np.sum(updated_strategy)
23+
24+
def generate_abs_strategy(strategy_list):
25+
'''
26+
This function will return most favorable utility by a player based on the max probability value
27+
'''
28+
max_probability = max(strategy_list)
29+
strategy_relative = [1 if x == max_probability else 0 for x in strategy_list]
30+
sum_value_in_the_list = sum(strategy_relative)
31+
favorable_strategy = [ x / sum_value_in_the_list for x in strategy_relative]
32+
return favorable_strategy
33+
34+
def regret_minimization(
35+
A: npt.NDArray, B: npt.NDArray, learning_rate = 0.1, max_iterations=100
36+
) -> Generator[Tuple[float, float], Any, None]:
37+
"""
38+
Obtain the Nash equilibria using regret minimization method using N number of itreations.
39+
The code provided is based on the concept of regret matching,
40+
with the fixed learning rate.
41+
42+
Algorithm implemented here is Algorithm 4.3 Theorem 4.4 of [Nisan2007]_
43+
44+
1. Build best Strategies probability of both players
45+
46+
Parameters
47+
----------
48+
A : array
49+
The row player utility matrix.
50+
B : array
51+
The column player utility matrix
52+
53+
learning_rate: float ( Optional Defaulted to 0.1 )
54+
The learning_rate determines the magnitude of the update towards the regrets
55+
The learning rate scales the regrets before they are added to the current strategy.
56+
A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update.
57+
This value allows you to control the pace towards a Nash equilibrium.
58+
59+
max_itreations: Integer ( Optional Defaulted to 100 )
60+
This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape
61+
62+
63+
Yields
64+
-------
65+
Generator
66+
The equilibria.
67+
"""
68+
num_strategies_1, num_strategies_2 = A.shape
69+
strategy_A = np.ones(num_strategies_1) / num_strategies_1
70+
strategy_B = np.ones(num_strategies_2) / num_strategies_2
71+
72+
for itration_num in range(max_iterations):
73+
strategy_utilities_A = np.dot(A, strategy_B)
74+
strategy_utilities_B = np.dot(B, strategy_A)
75+
76+
regrets_A = compute_regrets(strategy_utilities_A, strategy_A)
77+
regrets_B = compute_regrets(strategy_utilities_B, strategy_B)
78+
79+
strategy_A = update_strategy(strategy_A, regrets_A, learning_rate)
80+
strategy_B = update_strategy(strategy_B, regrets_B, learning_rate)
81+
82+
strategy_A_final = generate_abs_strategy(strategy_A)
83+
strategy_B_final = generate_abs_strategy(strategy_B)
84+
85+
yield strategy_A_final, strategy_B_final

Diff for: src/nashpy/egt/imitation_dynamics.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import numpy as np
2+
from typing import Generator, Tuple, Any
3+
import numpy.typing as npt
4+
5+
def payoff(player_strategy, opponent_strategy, player_payoff_matrix):
6+
'''
7+
Calculate the payoff of a player given their strategy and the opponent's strategy.
8+
9+
Parameters:
10+
- player_strategy: numpy array representing the strategy of the player
11+
- opponent_strategy: numpy array representing the strategy of the opponent
12+
- player_payoff_matrix: numpy matrix representing the payoff matrix for the player
13+
14+
Returns:
15+
- payoff: scalar representing the payoff of the player
16+
'''
17+
return np.dot(player_strategy, np.dot(player_payoff_matrix, opponent_strategy))
18+
19+
def imitation_dynamics(
20+
A: npt.NDArray, B: npt.NDArray, population_size=100, num_of_generations=1000, random_seed=None,threshold=0.5
21+
) -> Generator[Tuple[float, float], Any, None]:
22+
'''
23+
Simulate the imitation dynamics for a given game represented by payoff matrices A and B.
24+
25+
Parameters:
26+
- A: numpy matrix representing the payoff matrix for Player 1
27+
- B: numpy matrix representing the payoff matrix for Player 2
28+
- population_size : number of individuals in the population of the group (default: 100)
29+
- num_of_generations: number of generations to simulate (default: 1000)
30+
- random_seed: seed for reproducibility (default: None)
31+
- threshold: threshold value for representing strategies as 0 or 1 (default: 0.5)
32+
33+
Yields:
34+
- nash_equilibrium_player1: numpy array representing the Nash equilibrium strategy for Player 1
35+
- nash_equilibrium_player2: numpy array representing the Nash equilibrium strategy for Player 2
36+
'''
37+
38+
num_strategies = len(A)
39+
40+
# Initialize population
41+
if random_seed:
42+
np.random.seed(random_seed) # Set random seed for reproducibility
43+
44+
population_A = np.random.dirichlet(np.ones(num_strategies), size=population_size)
45+
population_B = np.random.dirichlet(np.ones(num_strategies), size=population_size)
46+
47+
for generation in range(num_of_generations):
48+
# Play the game
49+
payoffs_A = np.array([payoff(population_A[i], population_B[i], A) for i in range(population_size)])
50+
payoffs_B = np.array([payoff(population_B[i], population_A[i], B) for i in range(population_size)])
51+
52+
# Update population based on payoffs
53+
# Used Imitation dynamics in which the players copy the strategy of the most successful individual
54+
fittest_A_index = np.argmax(payoffs_A)
55+
fittest_B_index = np.argmax(payoffs_B)
56+
population_A = np.tile(population_A[fittest_A_index], (population_size, 1))
57+
population_B = np.tile(population_B[fittest_B_index], (population_size, 1))
58+
59+
# Calculate Nash equilibrium strategies
60+
nash_equilibrium_A = np.mean(population_A, axis=0)
61+
nash_equilibrium_B = np.mean(population_B, axis=0)
62+
63+
# Threshold the strategies
64+
nash_equilibrium_A[nash_equilibrium_A >= threshold] = 1
65+
nash_equilibrium_A[nash_equilibrium_A < threshold] = 0
66+
nash_equilibrium_B[nash_equilibrium_B >= threshold] = 1
67+
nash_equilibrium_B[nash_equilibrium_B < threshold] = 0
68+
69+
yield nash_equilibrium_A, nash_equilibrium_B

Diff for: src/nashpy/game.py

+50-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
"""A class for a normal form game"""
2-
32
import numpy as np
43
import numpy.typing as npt
54
from typing import Optional, Any
@@ -15,6 +14,8 @@
1514
)
1615
from .learning.stochastic_fictitious_play import stochastic_fictitious_play
1716
from .utils.is_best_response import is_best_response
17+
from .algorithms.regret_minimization import regret_minimization
18+
from .egt.imitation_dynamics import imitation_dynamics
1819

1920

2021
class Game:
@@ -423,3 +424,51 @@ def linear_program(self):
423424
row_strategy = linear_program(row_player_payoff_matrix=A)
424425
column_strategy = linear_program(row_player_payoff_matrix=B.T)
425426
return row_strategy, column_strategy
427+
428+
def regret_minimization(self,learning_rate = 0.1, max_iterations=100):
429+
"""
430+
Build best Strategies probability of both players using regret minimization method
431+
Algorithm implemented here is Algorithm 4.3 Theorem 4.4 of [Nisan2007]_
432+
433+
Parameters
434+
----------
435+
436+
learning_rate: float ( Optional Defaulted to 0.1 )
437+
The learning_rate determines the magnitude of the update towards the regrets
438+
The learning rate scales the regrets before they are added to the current strategy.
439+
A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update.
440+
This value allows you to control the pace towards a Nash equilibrium.
441+
442+
max_itreations: Integer ( Optional Defaulted to 100 )
443+
This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape
444+
Returns
445+
-------
446+
tuple
447+
The Nash equilibria
448+
"""
449+
A, B = self.payoff_matrices
450+
return regret_minimization(
451+
A=A, B=B, learning_rate=learning_rate, max_iterations=max_iterations
452+
)
453+
454+
def imitation_dynamics(self,population_size=100, num_of_generations=1000,random_seed=None,threshold=0.5 ):
455+
"""
456+
Simulate the imitation dynamics for a given game represented by payoff matrices A and B.
457+
458+
Parameters:
459+
A: numpy matrix representing the payoff matrix for Player 1
460+
B: numpy matrix representing the payoff matrix for Player 2
461+
population_size : number of individuals in the population of the group (default: 100)
462+
num_of_generations: number of generations to simulate (default: 1000)
463+
random_seed: seed for reproducibility (default: None)
464+
threshold: threshold value for representing strategies as 0 or 1 (default: 0.5)
465+
466+
Returns
467+
-------
468+
tuple
469+
The Nash equilibria
470+
"""
471+
A, B = self.payoff_matrices
472+
return imitation_dynamics(
473+
A=A, B=B, population_size=population_size, num_of_generations=num_of_generations,random_seed=random_seed,threshold=threshold
474+
)

Diff for: tests/unit/test_imitation_dynamics.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from nashpy.egt.imitation_dynamics import imitation_dynamics
2+
import numpy as np
3+
import unittest
4+
import random
5+
class TestImitationDynamics(unittest.TestCase):
6+
7+
def test_positive_payoffs(self):
8+
A = np.array([[3, 0], [1, 3]]) # Payoff matrix for Player 1
9+
B = np.array([[0, 1], [3, 0]]) # Payoff matrix for Player 2
10+
nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B))
11+
# Assert that Nash equilibrium strategies are within the expected range
12+
self.assertTrue(np.all(nash_equilibrium_player1 >= 0))
13+
self.assertTrue(np.all(nash_equilibrium_player1 <= 1))
14+
self.assertTrue(np.all(nash_equilibrium_player2 >= 0))
15+
self.assertTrue(np.all(nash_equilibrium_player2 <= 1))
16+
17+
def test_negative_payoffs(self):
18+
A = np.array([[-1, 0], [0, -1]]) # Payoff matrix for Player 1 (negative payoffs)
19+
B = np.array([[0, -1], [-1, 0]]) # Payoff matrix for Player 2 (negative payoffs)
20+
nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B))
21+
# Assert that Nash equilibrium strategies are within the expected range
22+
self.assertTrue(np.all(nash_equilibrium_player1 >= 0))
23+
self.assertTrue(np.all(nash_equilibrium_player1 <= 1))
24+
self.assertTrue(np.all(nash_equilibrium_player2 >= 0))
25+
self.assertTrue(np.all(nash_equilibrium_player2 <= 1))
26+
27+
def test_randomness(self):
28+
# Define parameters for the imitation dynamics function
29+
A = np.array([[3, 0], [1, 3]]) # Example payoff matrix for Player 1
30+
B = np.array([[0, 1], [3, 0]]) # Example payoff matrix for Player 2
31+
population_size = 100
32+
num_generations = 1000
33+
34+
# Run imitation dynamics multiple times and collect the results
35+
results = []
36+
for i in range(10): # Run 10 iterations
37+
# Run imitation dynamics with random seed set to None (random initialization)
38+
nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B, population_size, num_generations))
39+
results.append((tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2))) # Convert numpy arrays to tuples
40+
# Check if the results are different in at least one pair of iterations
41+
self.assertTrue(len(set(results)) > 1, "Results are not randomly generated")
42+
43+
def test_random_seed_constant(self):
44+
# Define parameters for the imitation dynamics function
45+
A = np.array([[3, 0], [1, 3]]) # Example payoff matrix for Player 1
46+
B = np.array([[0, 1], [3, 0]]) # Example payoff matrix for Player 2
47+
population_size = 100
48+
num_generations = 1000
49+
random_seed = random.randrange(0, 1000) # Add a random_seed value as constant to generate same results in the evolution
50+
51+
# Run imitation dynamics multiple times and collect the results
52+
results = []
53+
for i in range(100): # Run 10 iterations
54+
# Run imitation dynamics with random seed set to None (random initialization)
55+
nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B, population_size, num_generations, random_seed))
56+
results.append((tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2))) # Convert numpy arrays to tuples
57+
# Check if the results are different in at least one pair of iterations
58+
self.assertTrue(len(set(results)) == 1, "Results are randomly generated")
59+
60+
if __name__ == '__main__':
61+
unittest.main()

Diff for: tests/unit/test_regret_minimization.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import unittest
2+
import numpy as np
3+
from nashpy.algorithms.regret_minimization import regret_minimization
4+
5+
class TestRegretMinimization(unittest.TestCase):
6+
7+
def test_regret_minimization_for_zerosum_game(self):
8+
# Test case values
9+
A = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]]) # Example payoff matrix for Player A
10+
B = -A # Example payoff matrix for Player B ( Zero Sum Game )
11+
learning_rate = 0.1
12+
max_iterations = 100
13+
expected_nash_equilibrium_A = np.array([0.33333333, 0.33333333, 0.33333333]) # Expected Nash equilibrium strategy for Player 1
14+
expected_nash_equilibrium_B = np.array([0.33333333, 0.33333333, 0.33333333]) # Expected Nash equilibrium strategy for Player 2
15+
16+
# Execute the regret minimization algorithm
17+
actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(regret_minimization(A, B, learning_rate, max_iterations))
18+
19+
# Assert if the actual Nash equilibrium strategies match the expected strategies
20+
self.assertTrue(np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A))
21+
self.assertTrue(np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B))
22+
23+
def test_regret_minimization_non_zerosum_game(self):
24+
# Test case values
25+
A = np.array([[3, -1,3], [-1, 3,6], [-1, 1, 2]])
26+
B = np.array([[-3, 1,4], [1, -3,3], [-1, 3, 4]])
27+
learning_rate = 0.1
28+
max_iterations = 100
29+
30+
expected_nash_equilibrium_A = np.array([0.0, 1.0, 0.0]) # Expected Nash equilibrium strategy for Player 1
31+
expected_nash_equilibrium_B = np.array([0.0, 0.0, 1.0]) # Expected Nash equilibrium strategy for Player 2
32+
# Execute the regret minimization algorithm
33+
actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(regret_minimization(A, B, learning_rate, max_iterations))
34+
35+
# Assert if the actual Nash equilibrium strategies match the expected strategies
36+
self.assertTrue(np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A))
37+
self.assertTrue(np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B))
38+
39+
if __name__ == '__main__':
40+
unittest.main()

0 commit comments

Comments
 (0)