-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathapp.py
executable file
·96 lines (74 loc) · 3 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
Reinforcement learning maze example.
This script is the main part which controls the update method of this example using q-learning algorithm.
The RL algorithm (Q-learning) is in RL_agent.py.
The environment is presented in maze_env.py.
Red rectangle: explorer.
Black rectangles: hells [reward = -1].
Yellow bin circle: paradise [reward = +1].
All other states: ground [reward = 0].
"""
from maze_env import Maze
from RL_agent import QLearningTable
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
episode_count = 50 # Number of episodes to run the experiment
episodes = range(episode_count)
movements = [] # Number of movements happened in each episode
rewards = [] # The gained reward in each episode
'''
This function updates the position of the explorer in the Maze environment based on the actions it chooses.
'''
def run_experiment():
for episode in episodes:
print("Episode %s/%s." %(episode+1, episode_count))
# initial observation;
observation = env.reset()
moves = 0
while True:
# fresh env
env.render()
# Q-learning chooses action based on observation
# we convert observation to str since we want to use them as index for our DataFrame.
action = q_learning_agent.choose_action(str(observation))
# RL takes action and gets next observation and reward
observation_, reward, done = env.get_state_reward(action)
moves +=1
# RL learn from the above transition,
# Update the Q value for the given tuple
q_learning_agent.learn(str(observation), action, reward, str(observation_))
# consider the next observation
observation = observation_
# break while loop when end of this episode
if done:
movements.append(moves) # Keep track of the number of movements in this episode
rewards.append(reward) # Keep track of the gained reward in this episode
print("Reward: {0}, Moves: {1}".format(reward, moves))
break
# end of game
print('game over!')
# Show the results
plot_reward_movements()
def plot_reward_movements():
plt.figure()
plt.subplot(2,1,1)
plt.plot(episodes, movements)
plt.xlabel("Episode")
plt.ylabel("# Movements")
plt.subplot(2,1,2)
plt.step(episodes, rewards)
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.savefig("rewards_movements_q_learn.png")
plt.show()
if __name__ == "__main__":
# Craete maze environment
env = Maze()
# Create Q-learning agent
q_learning_agent = QLearningTable(actions=list(range(env.n_actions)))
# Call run_experiment() function once after given time in milliseconds.
env.window.after(10, run_experiment)
# The infinite loop used to run the application, wait for an event to occur and process the event
# till the window is not closed.
env.window.mainloop()