-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarvin.py
executable file
·51 lines (34 loc) · 918 Bytes
/
marvin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!./venv/bin/python3.7
import sys
import gym
import numpy as np
from utils.NNBackProp import *
env = gym.make('Marvin-v0')
env.reset()
if __name__ == "__main__":
nn = NNBackProp()
nn.init_nn()
if "-r" in sys.argv:
nn.load_weights("models/model_1000.pickle")
for i_episode in range(50):
losses = []
observation_arr = []
cum_reward = 0
done = False
print("Epoch #", i_episode)
observation = env.reset()
while not done:
env.render()
observation = np.array(observation).reshape((24,))
observation_arr.append(observation)
action = nn.predict(observation).reshape((4))
observation, reward, done, info = env.step(action)
cum_reward = reward - cum_reward
losses.append(cum_reward)
if done:
print("Episode finished after {} timesteps".format(i_episode+1))
break
nn.nn_backprop(observation_arr, losses)
done = False
observation = env.reset()
env.close()