forked from anujshetty/SearchAndResQ
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGridworld.py
170 lines (149 loc) · 6.64 KB
/
Gridworld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# Define gridworld class
import numpy as np
import random
from operator import add
class Gridworld:
def __init__(self, gridworld_length=10, gridworld_width=-1, num_obstacles=-1,
collisionReward= -1, destinationReward= 10, defaultReward= 0, outOfBoundsReward = -1,
failChance= 0.1, gamma= 0.9):
self.gridworld_length = gridworld_length
if gridworld_width == -1: # if no width is specified, make it a square
self.gridworld_width = gridworld_length
else:
self.gridworld_width = gridworld_width
self.grid = np.zeros((self.gridworld_length,self.gridworld_width))
self.ds_actions = {"u": [-1,0], "r": [0,1], "d": [1,0], "l": [0,-1],
"tr": [0,0], "tl": [0,0]} # turn right/left
self.actions= list(self.ds_actions.keys()),
if num_obstacles == -1: # if no number of obstacles is specified, make it ~ sqrt(length*width)
self.num_obstacles = np.floor(np.sqrt(gridworld_length*gridworld_width))
else:
self.num_obstacles = num_obstacles
self.source, self.destination, self.obstacle_positions = self.initiate_gridworld()
self.num_orientations = 4
# Initialize 1 of 4 orientations for agent to be facing
orientation = random.randint(0,self.num_orientations-1)
self.state = self.source + [orientation]
self.state = self.state + self.getSurroundingMarkers()
self.collisionReward = collisionReward
self.destinationReward = destinationReward
self.defaultReward = defaultReward
self.failChance = failChance
self.gamma = gamma
self.outOfBoundsReward = outOfBoundsReward
def getCoords(self):
return self.state[:2]
def getOrientation(self):
return self.state[2]
def randomCoords(self):
return [random.randint(0, self.gridworld_length-1), random.randint(0, self.gridworld_width-1)]
def getNumStates(self):
return self.gridworld_length * self.gridworld_width * self.num_orientations * (3**3)
def getNumActions(self):
return len(self.actions[0])
def reset_position(self):
pos = self.randomCoords()
while pos == self.destination or (pos in self.obstacle_positions):
pos = self.randomCoords()
orientation = random.randint(0,self.num_orientations-1)
self.state = pos + [orientation]
self.state = self.state + self.getSurroundingMarkers()
def reset_state(self, s):
self.state = s
def reset_destination(self):
pos = self.randomCoords()
while pos == self.state[:2] or (pos in self.obstacle_positions):
pos = self.randomCoords()
self.destination = pos
def initiate_gridworld(self):
# add a random source and destination to the gridworld
source = self.randomCoords()
destination = self.randomCoords()
while destination == source:
destination = self.randomCoords()
# add some random obstacles to the gridworld, making sure that the source and destination are not obstacles
obstacle_positions = []
while len(obstacle_positions) < self.num_obstacles:
position = self.randomCoords()
if position != source and position != destination:
obstacle_positions.append(position)
return source, destination, obstacle_positions
def getMarker(self, posn):
if posn == self.destination:
return 2
if posn in self.obstacle_positions:
return 1
return 0
# get the markers of the 3 cells
def getSurroundingMarkers(self):
markers = []
x, y = self.ds_actions[self.actions[0][self.getOrientation()]]
if x == 0:
steps = [[-1,y], [0,y], [1,y]]
if y == 0:
steps = [[x,-1], [x,0], [x,1]]
for step in steps:
adj_posn = list(map(add, self.getCoords(), step))
markers.append(self.getMarker(adj_posn))
return markers
def turn(self, a):
if a == 'tr':
self.state[2] = (self.state[2] + 1) % 4
if a == 'tl':
self.state[2] = (self.state[2] - 1) % 4
def takeAction(self, a, s=None):
if s:
self.state = s
# take action with probability 1-self.failChance, stay in same state with probability self.failChance
if random.random() < 1 - self.failChance:
new_state = list(map(add, self.getCoords(), self.ds_actions[a]))
# if turning
if self.getCoords() == new_state:
self.turn(a)
return self.defaultReward
if new_state[0] < 0 or new_state[0] >= self.gridworld_length or \
new_state[1] < 0 or new_state[1] >= self.gridworld_width:
return self.outOfBoundsReward
# if collision
if new_state in self.obstacle_positions:
return self.collisionReward
self.state[:2] = new_state
if new_state == self.destination:
return self.destinationReward
# if no action taken, or step taken without collision/reaching destination
return self.defaultReward
def print_gridworld(self):
for row in range(self.gridworld_length):
for col in range(self.gridworld_width):
if [row,col] in self.obstacle_positions:
print('O', end=' ')
elif [row,col] == self.destination:
print('D', end=' ')
elif [row, col] == self.getCoords():
print('A', end=' ')
else:
print('-', end=' ')
print()
def gridworld_to_arr(self):
char_grid = np.zeros([self.gridworld_length, self.gridworld_width]).astype('<U1')
for row in range(self.gridworld_length):
for col in range(self.gridworld_width):
if [row,col] in self.obstacle_positions:
char_grid[row, col] = 'O'
elif [row,col] == self.destination:
char_grid[row, col] = 'D'
elif [row, col] == self.getCoords():
char_grid[row, col] = 'A'
else:
char_grid[row, col] = '-'
return char_grid
def state_to_ind(self, s):
"""
Converts a state in list format to an index for indexing into a Q value matrix
"""
return s[:]
def action_to_ind(self, a):
"""
Converts an action in string format to an index for indexing into a Q value matrix
"""
return self.actions[0].index(a)