|
147 | 147 | " E = self.grid_size + VISIBLE_RADIUS - 1\n",
|
148 | 148 | " gps = [(E, E), (S, E), (E, S), (S, S)]\n",
|
149 | 149 | " gp = gps[random.randint(0, len(gps)-1)]\n",
|
150 |
| - " self.grid[gp] = GOAL_VALUE\n", |
| 150 | + " gy, gx = gp\n", |
| 151 | + " self.grid[gy, gx] = GOAL_VALUE\n", |
151 | 152 | " \n",
|
152 | 153 | " def visible(self, pos):\n",
|
153 | 154 | " y, x = pos\n",
|
|
237 | 238 | " def record_step(self):\n",
|
238 | 239 | " \"\"\"Add the current state to history for display later\"\"\"\n",
|
239 | 240 | " grid = np.array(self.grid.grid)\n",
|
240 |
| - " grid[self.agent.pos] = self.agent.health * 0.5 # Agent marker faded by health\n", |
| 241 | + " y, x = self.agent.pos\n", |
| 242 | + " grid[y, x] = self.agent.health * 0.5 # Agent marker faded by health\n", |
241 | 243 | " visible = np.array(self.grid.visible(self.agent.pos))\n",
|
242 | 244 | " self.history.append((grid, visible, self.agent.health))\n",
|
243 | 245 | " \n",
|
|
256 | 258 | " self.agent.act(action)\n",
|
257 | 259 | " \n",
|
258 | 260 | " # Get reward from where agent landed, add to agent health\n",
|
259 |
| - " value = self.grid.grid[self.agent.pos]\n", |
260 |
| - " self.grid.grid[self.agent.pos] = 0\n", |
| 261 | + " y, x = self.agent.pos\n", |
| 262 | + " value = self.grid.grid[y, x]\n", |
| 263 | + " self.grid.grid[y, x] = 0\n", |
261 | 264 | " self.agent.health += value\n",
|
262 | 265 | " \n",
|
263 | 266 | " # Check if agent won (reached the goal) or lost (health reached 0)\n",
|
|
707 | 710 | "metadata": {
|
708 | 711 | "collapsed": false
|
709 | 712 | },
|
710 |
| - "outputs": [ ], |
| 713 | + "outputs": [], |
711 | 714 | "source": [
|
712 | 715 | "hidden_size = 50\n",
|
713 | 716 | "learning_rate = 1e-4\n",
|
|
0 commit comments