-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathrainbow_beam_rider_del.py
122 lines (115 loc) · 3.87 KB
/
rainbow_beam_rider_del.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
num_seeds = 5
timesteps_total = 10_000_000
from collections import OrderedDict
var_env_configs = OrderedDict({
'delay': [0] + [2**i for i in range(4)],
'dummy_seed': [i for i in range(num_seeds)],
})
var_configs = OrderedDict({
"env": var_env_configs
})
env_config = {
"env": "GymEnvWrapper-Atari",
"env_config": {
"AtariEnv": {
"game": 'beam_rider', #"breakout",
'obs_type': 'image',
'frameskip': 1,
},
# "GymEnvWrapper": {
"atari_preprocessing": True,
'frame_skip': 4,
'grayscale_obs': False,
'state_space_type': 'discrete',
'action_space_type': 'discrete',
'seed': 0,
# },
# 'seed': 0, #seed
},
}
algorithm = "DQN"
agent_config = { # Taken from Ray tuned_examples
'adam_epsilon': 0.00015,
'buffer_size': 500000,
'double_q': True,
'dueling': True,
'exploration_config': { 'epsilon_timesteps': 200000,
'final_epsilon': 0.01},
'final_prioritized_replay_beta': 1.0,
'hiddens': [512],
'learning_starts': 20000,
'lr': 6.25e-05,
# 'lr': 0.0001,
# 'model': { 'dim': 42,
# 'grayscale': True,
# 'zero_mean': False},
'n_step': 4,
'noisy': False,
'num_atoms': 51,
'num_gpus': 0,
"num_workers": 3,
# "num_cpus_for_driver": 2,
# 'gpu': False, #deprecated
'prioritized_replay': True,
'prioritized_replay_alpha': 0.5,
'prioritized_replay_beta_annealing_timesteps': 2000000,
'rollout_fragment_length': 4,
'timesteps_per_iteration': 10000,
'target_network_update_freq': 2000,
# 'target_network_update_freq': 500,
'train_batch_size': 32,
"tf_session_args": {
# note: overriden by `local_tf_session_args`
"intra_op_parallelism_threads": 4,
"inter_op_parallelism_threads": 4,
# "gpu_options": {
# "allow_growth": True,
# },
# "log_device_placement": False,
"device_count": {
"CPU": 2,
# "GPU": 0,
},
# "allow_soft_placement": True, # required by PPO multi-gpu
},
# Override the following tf session args on the local worker
"local_tf_session_args": {
"intra_op_parallelism_threads": 4,
"inter_op_parallelism_threads": 4,
},
}
model_config = {
# "model": {
# "fcnet_hiddens": [256, 256],
# "fcnet_activation": "tanh",
# "use_lstm": False,
# "max_seq_len": 20,
# "lstm_cell_size": 256,
# "lstm_use_prev_action_reward": False,
# },
}
from ray import tune
eval_config = {
"evaluation_interval": None, # I think this means every x training_iterations
"evaluation_config": {
"explore": False,
"exploration_fraction": 0,
"exploration_final_eps": 0,
"evaluation_num_episodes": 10,
# "horizon": 100,
"env_config": {
"dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
'reward_noise': tune.function(lambda a: a.normal(0, 0)),
'action_loss_weight': 0.0,
}
},
}
value_tuples = []
for config_type, config_dict in var_configs.items():
for key in config_dict:
assert type(var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
value_tuples.append(var_configs[config_type][key])
import itertools
cartesian_product_configs = list(itertools.product(*value_tuples))
print("Total number of configs. to run:", len(cartesian_product_configs))