ucl-dark · alexandrasouly · Jan 23, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 25, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,17 +1,17 @@
 repos:
--   repo: https://github.com/ambv/black
-    rev: 22.6.0
+-   repo: https://github.com/psf/black
+    rev: 22.12.0
     hooks:
     - id: black
       language_version: python3.9
--   repo: https://github.com/pycqa/flake8
-    rev: '3.9.2'
-    hooks:
-    - id: flake8
-      additional_dependencies: [flake8-bugbear]
-      args: [
-        "--show-source",
-        "--ignore=E203,E266,E501,W503,F403,F401,B008,E712",
-        "--max-line-length=100",
-        "--max-complexity=18",
-        "--select=B,C,E,F,W,T4,B9"]
+# -   repo: https://github.com/pycqa/flake8
+#     rev: '3.9.2'
+#     hooks:
+#     - id: flake8
+#       additional_dependencies: [flake8-bugbear]
+#       args: [
+#         "--show-source",
+#         "--ignore=E203,E266,E501,W503,F403,F401,B008,E712",
+#         "--max-line-length=100",
+#         "--max-complexity=18",
+#         "--select=B,C,E,F,W,T4,B9"]
diff --git a/experiment.log b/experiment.log
diff --git a/mess.ipynb b/mess.ipynb
@@ -0,0 +1,54 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "unsupported operand type(s) for +: 'int' and 'str'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mjax\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mnumpy\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mjnp\u001b[39;00m\n\u001b[1;32m      2\u001b[0m num_players \u001b[39m=\u001b[39m \u001b[39m4\u001b[39m\n\u001b[0;32m----> 4\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39msum\u001b[39;49m(\u001b[39mbin\u001b[39;49m(\u001b[39m2\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49m(num_players \u001b[39m-\u001b[39;49m \u001b[39m1\u001b[39;49m)\u001b[39m-\u001b[39;49m\u001b[39m1\u001b[39;49m)))\n\u001b[1;32m      5\u001b[0m \u001b[39mprint\u001b[39m(jnp\u001b[39m.\u001b[39mbitwise_and(\u001b[39m2\u001b[39m, \u001b[39m2\u001b[39m\u001b[39m*\u001b[39m\u001b[39m*\u001b[39m(num_players \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m)\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m))\n\u001b[1;32m      7\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mnp\u001b[39;00m\n",
+      "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'int' and 'str'"
+     ]
+    }
+   ],
+   "source": [
+    "import jax.numpy as jnp\n",
+    "num_players = 4\n",
+    "\n",
+    "print(sum(bin(2**(num_players - 1)-1)))\n",
+    "print(jnp.bitwise_and(2, 2**(num_players - 1)-1))\n",
+    "\n",
+    "import numpy as np\n",
+    "np.binary_repr(2**(num_players - 1)-1)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pax",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.15"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/outputs/2023-01-18/19-35-11/experiment.log b/outputs/2023-01-18/19-35-11/experiment.log
@@ -0,0 +1,3 @@
+[2023-01-18 19:35:11,465][root][INFO] - => Global setup ...
+[2023-01-18 19:35:11,465][root][INFO] - => Done in 287.056 us
+[2023-01-18 19:35:11,465][root][INFO] - 
diff --git a/pax/agents/agent.py b/pax/agents/agent.py
@@ -1,8 +1,9 @@
 from typing import Tuple
 
-from pax.utils import MemoryState, TrainingState
 import jax.numpy as jnp
 
+from pax.utils import MemoryState, TrainingState
+
 
 class AgentInterface:
     """Interface for agents to interact with runners and environemnts.

diff --git a/pax/agents/hyper/ppo.py b/pax/agents/hyper/ppo.py
@@ -329,7 +329,9 @@ def model_update_epoch(
             return new_state, new_mem, metrics
 
         @jax.jit
-        def make_initial_state(key: Any, hidden: jnp.ndarray) -> TrainingState:
+        def make_initial_state(
+            key: Any, hidden: jnp.ndarray
+        ) -> Tuple[TrainingState, MemoryState]:
             """Initialises the training state (parameters and optimiser state)."""
             key, subkey = jax.random.split(key)
             dummy_obs = jnp.zeros(shape=obs_spec)

diff --git a/pax/agents/naive_exact.py b/pax/agents/naive_exact.py
@@ -2,8 +2,8 @@
 
 import jax
 import jax.numpy as jnp
-from pax.agents.agent import AgentInterface
 
+from pax.agents.agent import AgentInterface
 from pax.envs.infinite_matrix_game import EnvParams as InfiniteMatrixGameParams
 from pax.utils import MemoryState
 

diff --git a/pax/agents/ppo/ppo.py b/pax/agents/ppo/ppo.py
@@ -10,10 +10,10 @@
 from pax import utils
 from pax.agents.agent import AgentInterface
 from pax.agents.ppo.networks import (
-    make_ipditm_network,
-    make_sarl_network,
     make_coingame_network,
     make_ipd_network,
+    make_ipditm_network,
+    make_sarl_network,
 )
 from pax.utils import Logger, MemoryState, TrainingState, get_advantages
 
@@ -336,7 +336,9 @@ def model_update_epoch(
 
             return new_state, new_memory, metrics
 
-        def make_initial_state(key: Any, hidden: jnp.ndarray) -> TrainingState:
+        def make_initial_state(
+            key: Any, hidden: jnp.ndarray
+        ) -> Tuple[TrainingState, MemoryState]:
             """Initialises the training state (parameters and optimiser state)."""
             key, subkey = jax.random.split(key)
 

diff --git a/pax/agents/ppo/ppo_gru.py b/pax/agents/ppo/ppo_gru.py
@@ -517,6 +517,15 @@ def make_gru_agent(
         network, initial_hidden_state = make_GRU_ipd_network(
             action_spec, agent_args.hidden_size
         )
+    elif args.env_id == "iterated_tensor_game":
+        network, initial_hidden_state = make_GRU_ipd_network(
+            action_spec, agent_args.hidden_size
+        )
+
+    elif args.env_id == "iterated_nplayer_tensor_game":
+        network, initial_hidden_state = make_GRU_ipd_network(
+            action_spec, agent_args.hidden_size
+        )
 
     elif args.env_id == "InTheMatrix":
         network, initial_hidden_state = make_GRU_ipditm_network(

diff --git a/pax/agents/strategies.py b/pax/agents/strategies.py
@@ -4,8 +4,8 @@
 
 import jax.numpy as jnp
 import jax.random
-from pax.agents.agent import AgentInterface
 
+from pax.agents.agent import AgentInterface
 from pax.utils import Logger, MemoryState, TrainingState
 
 # states are [CC, CD, DC, DD, START]
@@ -381,7 +381,7 @@ def _policy(
 
     def _reciprocity(self, obs: jnp.ndarray, *args) -> jnp.ndarray:
         # now either 0, 1, 2, 3
-        batch_size, _ = obs.shape
+        # batch_size, _ = obs.shape
         obs = obs.argmax(axis=-1)
         # if 0 | 2 | 4  -> C
         # if 1 | 3 -> D
@@ -488,7 +488,7 @@ def make_initial_state(self, _unused, *args) -> TrainingState:
 
 
 class Stay(AgentInterface):
-    def __init__(self, num_actions: int, num_envs: int):
+    def __init__(self, num_actions: int, num_envs: int, num_players: int = 2):
         self.make_initial_state = initial_state_fun(num_envs)
         self._state, self._mem = self.make_initial_state(None, None)
         self._logger = Logger()