|
147 | 147 | " E = self.grid_size + VISIBLE_RADIUS - 1\n",
|
148 | 148 | " gps = [(E, E), (S, E), (E, S), (S, S)]\n",
|
149 | 149 | " gp = gps[random.randint(0, len(gps)-1)]\n",
|
150 |
| - " gy, gx = gp\n", |
151 |
| - " self.grid[gy, gx] = GOAL_VALUE\n", |
| 150 | + " self.grid[gp] = GOAL_VALUE\n", |
152 | 151 | " \n",
|
153 | 152 | " def visible(self, pos):\n",
|
154 | 153 | " y, x = pos\n",
|
|
238 | 237 | " def record_step(self):\n",
|
239 | 238 | " \"\"\"Add the current state to history for display later\"\"\"\n",
|
240 | 239 | " grid = np.array(self.grid.grid)\n",
|
241 |
| - " y, x = self.agent.pos\n", |
242 |
| - " grid[y, x] = self.agent.health * 0.5 # Agent marker faded by health\n", |
| 240 | + " grid[self.agent.pos] = self.agent.health * 0.5 # Agent marker faded by health\n", |
243 | 241 | " visible = np.array(self.grid.visible(self.agent.pos))\n",
|
244 | 242 | " self.history.append((grid, visible, self.agent.health))\n",
|
245 | 243 | " \n",
|
|
258 | 256 | " self.agent.act(action)\n",
|
259 | 257 | " \n",
|
260 | 258 | " # Get reward from where agent landed, add to agent health\n",
|
261 |
| - " y, x = self.agent.pos\n", |
262 |
| - " value = self.grid.grid[y, x]\n", |
263 |
| - " self.grid.grid[y, x] = 0\n", |
| 259 | + " value = self.grid.grid[self.agent.pos]\n", |
| 260 | + " self.grid.grid[self.agent.pos] = 0\n", |
264 | 261 | " self.agent.health += value\n",
|
265 | 262 | " \n",
|
266 | 263 | " # Check if agent won (reached the goal) or lost (health reached 0)\n",
|
|
710 | 707 | "metadata": {
|
711 | 708 | "collapsed": false
|
712 | 709 | },
|
713 |
| - "outputs": [], |
| 710 | + "outputs": [ ], |
714 | 711 | "source": [
|
715 | 712 | "hidden_size = 50\n",
|
716 | 713 | "learning_rate = 1e-4\n",
|
|
0 commit comments