forked from MorvanZhou/Evolutionary-Algorithm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_cartpole.py
81 lines (65 loc) · 2.63 KB
/
run_cartpole.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
Using NEAT for reinforcement learning.
The detail for NEAT can be find in : http://nn.cs.utexas.edu/downloads/papers/stanley.cec02.pdf
Visit my tutorial website for more: https://mofanpy.com/tutorials/
"""
import neat
import numpy as np
import gym
import visualize
GAME = 'CartPole-v0'
env = gym.make(GAME).unwrapped
CONFIG = "./config"
EP_STEP = 300 # maximum episode steps
GENERATION_EP = 10 # evaluate by the minimum of 10-episode rewards
TRAINING = False # training or testing
CHECKPOINT = 9 # test on this checkpoint
def eval_genomes(genomes, config):
for genome_id, genome in genomes:
net = neat.nn.FeedForwardNetwork.create(genome, config)
ep_r = []
for ep in range(GENERATION_EP): # run many episodes for the genome in case it's lucky
accumulative_r = 0. # stage longer to get a greater episode reward
observation = env.reset()
for t in range(EP_STEP):
action_values = net.activate(observation)
action = np.argmax(action_values)
observation_, reward, done, _ = env.step(action)
accumulative_r += reward
if done:
break
observation = observation_
ep_r.append(accumulative_r)
genome.fitness = np.min(ep_r)/float(EP_STEP) # depends on the minimum episode reward
def run():
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
neat.DefaultSpeciesSet, neat.DefaultStagnation, CONFIG)
pop = neat.Population(config)
# recode history
stats = neat.StatisticsReporter()
pop.add_reporter(stats)
pop.add_reporter(neat.StdOutReporter(True))
pop.add_reporter(neat.Checkpointer(5))
pop.run(eval_genomes, 10) # train 10 generations
# visualize training
visualize.plot_stats(stats, ylog=False, view=True)
visualize.plot_species(stats, view=True)
def evaluation():
p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-%i' % CHECKPOINT)
winner = p.run(eval_genomes, 1) # find the winner in restored population
# show winner net
node_names = {-1: 'In0', -2: 'In1', -3: 'In3', -4: 'In4', 0: 'act1', 1: 'act2'}
visualize.draw_net(p.config, winner, True, node_names=node_names)
net = neat.nn.FeedForwardNetwork.create(winner, p.config)
while True:
s = env.reset()
while True:
env.render()
a = np.argmax(net.activate(s))
s, r, done, _ = env.step(a)
if done: break
if __name__ == '__main__':
if TRAINING:
run()
else:
evaluation()