Source code for nashpy.learning.fictitious_play

"""Code to carry out fictitious learning"""

import numpy as np
import numpy.typing as npt
from typing import Generator, Optional, Any


[docs]def get_best_response_to_play_count(A: npt.NDArray, play_count: npt.NDArray) -> int: """ Returns the best response to a belief based on the playing distribution of the opponent Parameters ---------- A : array The utility matrix. play_count : array The play counts. Returns ------- int The action that corresponds to the best response. """ utilities = A @ play_count return np.random.choice(np.argwhere(utilities == np.max(utilities)).transpose()[0])
[docs]def update_play_count(play_count: npt.NDArray, play: int) -> npt.NDArray: """ Update a belief vector with a given play Parameters ---------- play_count : array The play counts. play : int The given play. Returns ------- array The updated play counts. """ extra_play = np.zeros(play_count.shape) extra_play[play] = 1 return play_count + extra_play
[docs]def fictitious_play( A: npt.NDArray, B: npt.NDArray, iterations: int, play_counts: Optional[Any] = None ) -> Generator: """ Implement fictitious play Parameters ---------- A : array The row player payoff matrix. B : array The column player payoff matrix. iterations : int The number of iterations of the algorithm. play_counts : Optional The play counts. Yields ------- Generator The play counts. """ if play_counts is None: play_counts = [np.array([0 for _ in range(dimension)]) for dimension in A.shape] yield play_counts for repetition in range(iterations): plays = [ get_best_response_to_play_count(matrix, play_count) for matrix, play_count in zip((A, B.transpose()), play_counts[::-1]) ] play_counts = [ update_play_count(play_count, play) for play_count, play in zip(play_counts, plays) ] yield play_counts