-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaze_env.py
177 lines (150 loc) · 5.57 KB
/
maze_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
Created on Tue April 24 21:07 2018
@author: hanxy
"""
import numpy as np
import tkinter as tk
import time
UNIT = 40 # pixels
SQUAR = UNIT * 3 / 8
# create points on canvas
def create_points(point, canvas, fill):
rect_center = UNIT / 2 + point * UNIT
shape = np.shape(rect_center)
assert shape[-1] == 2, 'only build 2d maze'
if len(shape) == 1:
rect = canvas.create_rectangle(
rect_center[0] - SQUAR, rect_center[1] - SQUAR,
rect_center[0] + SQUAR, rect_center[1] + SQUAR,
fill=fill
)
return rect
# transfer rectangle on canvas to point
def transfer_coordinate(rect):
rect_center = np.array([(rect[0] + rect[2]) / 2, (rect[1] + rect[3]) / 2])
point = rect_center / UNIT - 0.5
point = np.array(point, dtype=int)
return point
class Maze(tk.Tk, object):
def __init__(self, col=8, row=8,
hell=np.array([[6, 1], [1, 6], [5, 6], [6, 5]]),
origin=np.array([0, 0]),
terminal=np.array([7, 7])):
super(Maze, self).__init__()
self.action_space = ['w', 's', 'a', 'd']
self.n_actions = len(self.action_space)
self.n_features = 2
self.title('maze')
# col and row for maze
self.col = np.max([col, np.max(hell, axis=0)[1] + 1,
origin[1] + 1, terminal[0] + 1])
self.row = np.max([row, np.max(hell, axis=0)[0] + 1,
origin[0] + 1, terminal[0] + 1])
self.origin = origin # origin coordinate
self.terminal = terminal # terminal coordinate
self.hell = hell # hell coordinate
self.n_hells = np.shape(self.hell)[0] # number of hells
self.geometry('{0}x{1}'.format(self.row * UNIT,
self.col * UNIT))
self.is_encode = True
self._build_maze()
def _build_maze(self):
self.canvas = tk.Canvas(self, bg='white',
height=self.row * UNIT,
width=self.col * UNIT)
# create grids
for c in range(0, self.col * UNIT, UNIT):
x0, y0, x1, y1 = c, 0, c, self.row * UNIT
self.canvas.create_line(x0, y0, x1, y1)
for r in range(0, self.row * UNIT, UNIT):
x0, y0, x1, y1 = 0, r, self.col * UNIT, r
self.canvas.create_line(x0, y0, x1, y1)
# create origin (red)
self.rect = create_points(self.origin, self.canvas, 'red')
# create terminal (yellow)
self.terminal_point = create_points(self.terminal, self.canvas, 'yellow')
# create hell (black)
self.hell_points = np.zeros(self.n_hells, dtype=int)
for i in range(0, self.n_hells):
self.hell_points[i] = create_points(self.hell[i], self.canvas, 'black')
# self.bind("<Key>", self.on_key_pressed)
# pack all
self.canvas.pack()
def reset(self):
self.update()
time.sleep(0.5)
self.canvas.delete(self.rect)
self.rect = create_points(self.origin, self.canvas, 'red')
# return observation
if self.is_encode:
return self._encode(self.origin)
else:
return self.origin
def step(self, action):
s = self.canvas.coords(self.rect)
base_action = np.array([0, 0])
if action == 0: # up 'w'
if s[1] > UNIT:
base_action[1] -= UNIT
elif action == 1: # down 's'
if s[1] < (self.row - 1) * UNIT:
base_action[1] += UNIT
elif action == 2: # left 'a'
if s[0] > UNIT:
base_action[0] -= UNIT
elif action == 3: # right 'd'
if s[0] < (self.col - 1) * UNIT:
base_action[0] += UNIT
# move agent
self.canvas.move(self.rect, base_action[0], base_action[1])
# next state
s_ = self.canvas.coords(self.rect)
# reward function for Reinforcement Learning
done = False
# terminal
if s_ == self.canvas.coords(self.terminal_point):
reward = 1000
done = True
s_ = self.terminal
# s_ = 'terminal'
if self.is_encode:
return self._encode(s_), reward, done
else:
return s_, reward, done
else: # hell
hell_rects = np.zeros((self.n_hells, 4))
for i in range(0, self.n_hells):
hell_rects[i] = self.canvas.coords(self.hell_points[i])
if (s_ == hell_rects[i]).all():
reward = -10
done = True
# s_ = 'hell'
if self.is_encode:
return self._encode(transfer_coordinate(s_)), reward, done
else:
return transfer_coordinate(s_), reward, done
# normal
reward = 0
if self.is_encode:
return self._encode(transfer_coordinate(s_)), reward, done
else:
return transfer_coordinate(s_), reward, done
def render(self):
time.sleep(0.01)
self.update()
# key pressed call back function
def on_key_pressed(self, event):
char = event.char
self.render()
if char in self.action_space:
action = self.action_space.index(char)
s, r, done = self.step(action)
print s
if done:
self.reset()
# encode state
def _encode(self, state):
return state[0] + state[1] * self.col
if __name__ == '__main__':
env = Maze()
env.mainloop()