-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathValue_Iteration.jl
97 lines (79 loc) · 2.27 KB
/
Value_Iteration.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
using Pkg
pkg"add https://github.com/zsunberg/DMUStudent.jl"
pkg"add POMDPs"
pkg"add POMDPTools"
using DMUStudent.HW2
using POMDPs: states, actions, convert_s, stateindex
using POMDPTools: ordered_states
using Random
using LinearAlgebra
using SparseArrays
using MKL
############################
# Valute Iteration Functions
############################
function value_iteration_generic(m, gamma=0.95, tolerance=1e-8)
# Exract MDP
S = states(m)
A = actions(m)
T = transition_matrices(m)
R = reward_vectors(m)
# Initialize
num_states = length(S)
num_actions = length(A)
V = rand(MersenneTwister(42), num_states)
Vprime = rand(MersenneTwister(123), num_states)
V_iter = zeros(num_states, num_actions)
# Iterate until tol is met
while maximum(abs.(V - Vprime)) > tolerance
# Update 1
copyto!(V, Vprime)
# For a value of V
for k in 1:num_actions
V_iter[:, k] = R[A[k]] + gamma * T[A[k]] * V
end
# Update 2
Vprime .= maximum(V_iter, dims=2)[:, 1]
end
return Vprime
end
function value_iteration_ACAS(m, gamma=0.99, epsilon=1e-8)
# Exract MDP
S = states(m)
A = actions(m)
T = transition_matrices(m, sparse=true)
R = reward_vectors(m)
# ACAS Reward and Transition Matrices
R1_sparse = sparse(R[A[1]])
T1_sparse = sparse(T[A[1]])
R2_sparse = sparse(R[A[2]])
T2_sparse = sparse(T[A[2]])
R3_sparse = sparse(R[A[3]])
T3_sparse = sparse(T[A[3]])
# Initialize
num_states = length(S)
V = zeros(num_states)
Vprime = rand!(MersenneTwister(43), zeros(num_states))
# Iterate until tol is met
while maximum(abs.(V - Vprime)) > epsilon
# Update 1
copyto!(V, Vprime)
# Update 2, Belman Operator
Vprime = max.(R1_sparse + gamma * T1_sparse * V,
R2_sparse + gamma * T2_sparse * V,
R3_sparse + gamma * T3_sparse * V)
end
return Vprime
end
############
# Question 3
############
# Solution
V = value_iteration_generic(grid_world)
display(render(grid_world, color=V))
############
# Question 4
############
# Solution and Evaluation
V = value_iteration_ACAS(UnresponsiveACASMDP(15))
HW2.evaluate(V, "[email protected]")