dont use tuple for indexing, addresses #32

spro · spro · commit 4e14f39aee0f · 2017-06-08T12:11:00.000-07:00
diff --git a/reinforce-gridworld/reinforce-gridworld.ipynb b/reinforce-gridworld/reinforce-gridworld.ipynb
@@ -147,7 +147,8 @@
     "        E = self.grid_size + VISIBLE_RADIUS - 1\n",
     "        gps = [(E, E), (S, E), (E, S), (S, S)]\n",
     "        gp = gps[random.randint(0, len(gps)-1)]\n",
-    "        self.grid[gp] = GOAL_VALUE\n",
+    "        gy, gx = gp\n",
+    "        self.grid[gy, gx] = GOAL_VALUE\n",
     "    \n",
     "    def visible(self, pos):\n",
     "        y, x = pos\n",
@@ -237,7 +238,8 @@
     "    def record_step(self):\n",
     "        \"\"\"Add the current state to history for display later\"\"\"\n",
     "        grid = np.array(self.grid.grid)\n",
-    "        grid[self.agent.pos] = self.agent.health * 0.5 # Agent marker faded by health\n",
+    "        y, x = self.agent.pos\n",
+    "        grid[y, x] = self.agent.health * 0.5 # Agent marker faded by health\n",
     "        visible = np.array(self.grid.visible(self.agent.pos))\n",
     "        self.history.append((grid, visible, self.agent.health))\n",
     "    \n",
@@ -256,8 +258,9 @@
     "        self.agent.act(action)\n",
     "        \n",
     "        # Get reward from where agent landed, add to agent health\n",
-    "        value = self.grid.grid[self.agent.pos]\n",
-    "        self.grid.grid[self.agent.pos] = 0\n",
+    "        y, x = self.agent.pos\n",
+    "        value = self.grid.grid[y, x]\n",
+    "        self.grid.grid[y, x] = 0\n",
     "        self.agent.health += value\n",
     "        \n",
     "        # Check if agent won (reached the goal) or lost (health reached 0)\n",
@@ -707,7 +710,7 @@
    "metadata": {
     "collapsed": false
    },
-   "outputs": [ ],
+   "outputs": [],
    "source": [
     "hidden_size = 50\n",
     "learning_rate = 1e-4\n",