Imitation Dynamics and Regret Minimization models added to the repo

sandeepvshenoy · sandeepvshenoy · commit c59ceeec17bf · 2024-03-20T14:15:58.000Z
diff --git a/src/nashpy/algorithms/regret_minimization.py b/src/nashpy/algorithms/regret_minimization.py
@@ -0,0 +1,85 @@
+import numpy as np
+from typing import Generator, Tuple, Any
+import numpy.typing as npt
+
+def compute_regrets(strategy_utilities, current_strategy):
+    '''
+    This function calculates the regrets for a player based on the strategy utilities and the current strategy. 
+    Regrets represent the difference between the utility achieved by playing a strategy and the maximum utility 
+    that could have been achieved by playing any strategy. 
+    In this implementation, only positive regrets are considered
+    '''
+    regrets = np.maximum(0, strategy_utilities - current_strategy)
+    return regrets
+
+def update_strategy(current_strategy, regrets, learning_rate):
+    '''
+    This function updates the player's strategy based on the regrets, the current strategy, and a fixed learning rate.
+    It scales the regrets by the learning rate and adds them to the current strategy. 
+    Finally, it normalizes the updated strategy to ensure that the probabilities sum up to 1.
+    '''
+    updated_strategy = current_strategy + learning_rate * regrets
+    return updated_strategy / np.sum(updated_strategy)
+
+def generate_abs_strategy(strategy_list):
+    '''
+    This function will return most favorable utility by a player based on the max probability value
+    '''
+    max_probability = max(strategy_list)
+    strategy_relative = [1 if x == max_probability else 0 for x in strategy_list]
+    sum_value_in_the_list = sum(strategy_relative)
+    favorable_strategy = [ x / sum_value_in_the_list for x in strategy_relative]
+    return favorable_strategy
+
+def regret_minimization(
+    A: npt.NDArray, B: npt.NDArray, learning_rate = 0.1, max_iterations=100
+) -> Generator[Tuple[float, float], Any, None]:
+    """
+    Obtain the Nash equilibria using regret minimization method using N number of itreations.
+    The code provided is based on the concept of regret matching, 
+    with the fixed learning rate.
+
+    Algorithm implemented here is Algorithm 4.3 Theorem 4.4 of [Nisan2007]_
+
+    1. Build best Strategies probability of both players
+
+    Parameters
+    ----------
+    A : array
+        The row player utility matrix.
+    B : array
+        The column player utility matrix
+
+    learning_rate: float ( Optional Defaulted to 0.1 )
+        The  learning_rate determines the magnitude of the update towards the regrets
+        The learning rate scales the regrets before they are added to the current strategy. 
+        A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update. 
+        This value allows you to control the pace towards a Nash equilibrium.
+
+    max_itreations: Integer ( Optional Defaulted to 100 )
+        This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape 
+
+
+    Yields
+    -------
+    Generator
+        The equilibria.
+    """
+    num_strategies_1, num_strategies_2 = A.shape
+    strategy_A = np.ones(num_strategies_1) / num_strategies_1
+    strategy_B = np.ones(num_strategies_2) / num_strategies_2
+
+    for itration_num in range(max_iterations):
+        strategy_utilities_A = np.dot(A, strategy_B)
+        strategy_utilities_B = np.dot(B, strategy_A)
+
+        regrets_A = compute_regrets(strategy_utilities_A, strategy_A)
+        regrets_B = compute_regrets(strategy_utilities_B, strategy_B)
+
+        strategy_A = update_strategy(strategy_A, regrets_A, learning_rate)
+        strategy_B = update_strategy(strategy_B, regrets_B, learning_rate)
+
+    strategy_A_final = generate_abs_strategy(strategy_A)
+    strategy_B_final = generate_abs_strategy(strategy_B)
+
+    yield strategy_A_final, strategy_B_final
diff --git a/src/nashpy/egt/imitation_dynamics.py b/src/nashpy/egt/imitation_dynamics.py
@@ -0,0 +1,69 @@
+import numpy as np
+from typing import Generator, Tuple, Any
+import numpy.typing as npt
+
+def payoff(player_strategy, opponent_strategy, player_payoff_matrix):
+    '''
+    Calculate the payoff of a player given their strategy and the opponent's strategy.
+
+    Parameters:
+    - player_strategy: numpy array representing the strategy of the player
+    - opponent_strategy: numpy array representing the strategy of the opponent
+    - player_payoff_matrix: numpy matrix representing the payoff matrix for the player
+
+    Returns:
+    - payoff: scalar representing the payoff of the player
+    '''
+    return np.dot(player_strategy, np.dot(player_payoff_matrix, opponent_strategy))
+
+def imitation_dynamics(
+        A: npt.NDArray, B: npt.NDArray, population_size=100, num_of_generations=1000, random_seed=None,threshold=0.5
+        ) -> Generator[Tuple[float, float], Any, None]:
+    '''
+    Simulate the imitation dynamics for a given game represented by payoff matrices A and B.
+
+    Parameters:
+    - A: numpy matrix representing the payoff matrix for Player 1
+    - B: numpy matrix representing the payoff matrix for Player 2
+    - population_size : number of individuals in the population of the group (default: 100)
+    - num_of_generations: number of generations to simulate (default: 1000)
+    - random_seed: seed for reproducibility (default: None)
+    - threshold: threshold value for representing strategies as 0 or 1 (default: 0.5)
+
+    Yields:
+    - nash_equilibrium_player1: numpy array representing the Nash equilibrium strategy for Player 1
+    - nash_equilibrium_player2: numpy array representing the Nash equilibrium strategy for Player 2
+    '''
+
+    num_strategies = len(A)
+
+    # Initialize population
+    if random_seed:
+        np.random.seed(random_seed) # Set random seed for reproducibility 
+
+    population_A = np.random.dirichlet(np.ones(num_strategies), size=population_size)
+    population_B = np.random.dirichlet(np.ones(num_strategies), size=population_size)
+
+    for generation in range(num_of_generations):
+        # Play the game
+        payoffs_A = np.array([payoff(population_A[i], population_B[i], A) for i in range(population_size)])
+        payoffs_B = np.array([payoff(population_B[i], population_A[i], B) for i in range(population_size)])
+
+        # Update population based on payoffs
+        # Used Imitation dynamics in which the players copy the strategy of the most successful individual
+        fittest_A_index = np.argmax(payoffs_A)
+        fittest_B_index = np.argmax(payoffs_B)
+        population_A = np.tile(population_A[fittest_A_index], (population_size, 1))
+        population_B = np.tile(population_B[fittest_B_index], (population_size, 1))
+
+    # Calculate Nash equilibrium strategies
+    nash_equilibrium_A = np.mean(population_A, axis=0)
+    nash_equilibrium_B = np.mean(population_B, axis=0)
+
+    # Threshold the strategies
+    nash_equilibrium_A[nash_equilibrium_A >= threshold] = 1
+    nash_equilibrium_A[nash_equilibrium_A < threshold] = 0
+    nash_equilibrium_B[nash_equilibrium_B >= threshold] = 1
+    nash_equilibrium_B[nash_equilibrium_B < threshold] = 0
+
+    yield nash_equilibrium_A, nash_equilibrium_B
diff --git a/src/nashpy/game.py b/src/nashpy/game.py
@@ -1,5 +1,4 @@
 """A class for a normal form game"""
-
 import numpy as np
 import numpy.typing as npt
 from typing import Optional, Any
@@ -15,6 +14,8 @@
 )
 from .learning.stochastic_fictitious_play import stochastic_fictitious_play
 from .utils.is_best_response import is_best_response
+from .algorithms.regret_minimization import regret_minimization
+from .egt.imitation_dynamics import imitation_dynamics
 
 
 class Game:
@@ -423,3 +424,51 @@ def linear_program(self):
         row_strategy = linear_program(row_player_payoff_matrix=A)
         column_strategy = linear_program(row_player_payoff_matrix=B.T)
         return row_strategy, column_strategy
+
+    def regret_minimization(self,learning_rate = 0.1, max_iterations=100):
+        """
+        Build best Strategies probability of both players using regret minimization method
+        Algorithm implemented here is Algorithm 4.3 Theorem 4.4 of [Nisan2007]_
+
+        Parameters
+        ----------
+
+        learning_rate: float ( Optional Defaulted to 0.1 )
+            The  learning_rate determines the magnitude of the update towards the regrets
+            The learning rate scales the regrets before they are added to the current strategy. 
+            A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update. 
+            This value allows you to control the pace towards a Nash equilibrium.
+
+        max_itreations: Integer ( Optional Defaulted to 100 )
+            This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape 
+        Returns
+        -------
+        tuple
+            The Nash equilibria
+        """
+        A, B = self.payoff_matrices
+        return regret_minimization(
+            A=A, B=B, learning_rate=learning_rate, max_iterations=max_iterations
+        )
+
+    def imitation_dynamics(self,population_size=100, num_of_generations=1000,random_seed=None,threshold=0.5 ):
+        """
+        Simulate the imitation dynamics for a given game represented by payoff matrices A and B.
+
+        Parameters:
+        A: numpy matrix representing the payoff matrix for Player 1
+        B: numpy matrix representing the payoff matrix for Player 2
+        population_size : number of individuals in the population of the group (default: 100)
+        num_of_generations: number of generations to simulate (default: 1000)
+        random_seed: seed for reproducibility (default: None)
+        threshold: threshold value for representing strategies as 0 or 1 (default: 0.5)
+
+        Returns
+        -------
+        tuple
+            The Nash equilibria
+        """
+        A, B = self.payoff_matrices
+        return imitation_dynamics(
+            A=A, B=B, population_size=population_size, num_of_generations=num_of_generations,random_seed=random_seed,threshold=threshold
+        )
diff --git a/tests/unit/test_imitation_dynamics.py b/tests/unit/test_imitation_dynamics.py
@@ -0,0 +1,61 @@
+from  nashpy.egt.imitation_dynamics import imitation_dynamics
+import numpy as np
+import unittest
+import random
+class TestImitationDynamics(unittest.TestCase):
+
+    def test_positive_payoffs(self):
+        A = np.array([[3, 0], [1, 3]])  # Payoff matrix for Player 1
+        B = np.array([[0, 1], [3, 0]])  # Payoff matrix for Player 2
+        nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B))
+        # Assert that Nash equilibrium strategies are within the expected range
+        self.assertTrue(np.all(nash_equilibrium_player1 >= 0))
+        self.assertTrue(np.all(nash_equilibrium_player1 <= 1))
+        self.assertTrue(np.all(nash_equilibrium_player2 >= 0))
+        self.assertTrue(np.all(nash_equilibrium_player2 <= 1))
+
+    def test_negative_payoffs(self):
+        A = np.array([[-1, 0], [0, -1]])  # Payoff matrix for Player 1 (negative payoffs)
+        B = np.array([[0, -1], [-1, 0]])  # Payoff matrix for Player 2 (negative payoffs)
+        nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B))
+        # Assert that Nash equilibrium strategies are within the expected range
+        self.assertTrue(np.all(nash_equilibrium_player1 >= 0))
+        self.assertTrue(np.all(nash_equilibrium_player1 <= 1))
+        self.assertTrue(np.all(nash_equilibrium_player2 >= 0))
+        self.assertTrue(np.all(nash_equilibrium_player2 <= 1))
+
+    def test_randomness(self):
+        # Define parameters for the imitation dynamics function
+        A = np.array([[3, 0], [1, 3]])  # Example payoff matrix for Player 1
+        B = np.array([[0, 1], [3, 0]])  # Example payoff matrix for Player 2
+        population_size = 100
+        num_generations = 1000
+
+        # Run imitation dynamics multiple times and collect the results
+        results = []
+        for i in range(10):  # Run 10 iterations
+            # Run imitation dynamics with random seed set to None (random initialization)
+            nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B, population_size, num_generations))
+            results.append((tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2)))  # Convert numpy arrays to tuples
+        # Check if the results are different in at least one pair of iterations
+        self.assertTrue(len(set(results)) > 1, "Results are not randomly generated")
+
+    def test_random_seed_constant(self):
+        # Define parameters for the imitation dynamics function
+        A = np.array([[3, 0], [1, 3]])  # Example payoff matrix for Player 1
+        B = np.array([[0, 1], [3, 0]])  # Example payoff matrix for Player 2
+        population_size = 100
+        num_generations = 1000
+        random_seed = random.randrange(0, 1000) # Add a random_seed value as constant to generate same results in the evolution
+
+        # Run imitation dynamics multiple times and collect the results
+        results = []
+        for i in range(100):  # Run 10 iterations
+            # Run imitation dynamics with random seed set to None (random initialization)
+            nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B, population_size, num_generations, random_seed))
+            results.append((tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2)))  # Convert numpy arrays to tuples
+        # Check if the results are different in at least one pair of iterations
+        self.assertTrue(len(set(results)) == 1, "Results are randomly generated")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/unit/test_regret_minimization.py b/tests/unit/test_regret_minimization.py
@@ -0,0 +1,40 @@
+import unittest
+import numpy as np
+from nashpy.algorithms.regret_minimization import regret_minimization
+
+class TestRegretMinimization(unittest.TestCase):
+    
+    def test_regret_minimization_for_zerosum_game(self):
+        # Test case values
+        A = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]])  # Example payoff matrix for Player A
+        B = -A  # Example payoff matrix for Player B ( Zero Sum Game )
+        learning_rate = 0.1
+        max_iterations = 100
+        expected_nash_equilibrium_A = np.array([0.33333333, 0.33333333, 0.33333333])  # Expected Nash equilibrium strategy for Player 1
+        expected_nash_equilibrium_B = np.array([0.33333333, 0.33333333, 0.33333333])  # Expected Nash equilibrium strategy for Player 2
+
+        # Execute the regret minimization algorithm
+        actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(regret_minimization(A, B, learning_rate, max_iterations))
+
+        # Assert if the actual Nash equilibrium strategies match the expected strategies
+        self.assertTrue(np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A))
+        self.assertTrue(np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B))
+
+    def test_regret_minimization_non_zerosum_game(self):
+        # Test case values
+        A = np.array([[3, -1,3], [-1, 3,6], [-1, 1, 2]])
+        B = np.array([[-3, 1,4], [1, -3,3], [-1, 3, 4]])
+        learning_rate = 0.1
+        max_iterations = 100
+
+        expected_nash_equilibrium_A = np.array([0.0, 1.0, 0.0])  # Expected Nash equilibrium strategy for Player 1
+        expected_nash_equilibrium_B = np.array([0.0, 0.0, 1.0])  # Expected Nash equilibrium strategy for Player 2
+        # Execute the regret minimization algorithm
+        actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(regret_minimization(A, B, learning_rate, max_iterations))
+
+        # Assert if the actual Nash equilibrium strategies match the expected strategies
+        self.assertTrue(np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A))
+        self.assertTrue(np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B))
+
+if __name__ == '__main__':
+    unittest.main()