Skip to content

Commit 94f417c

Browse files
# 323 A3C on Atari image_transforms sh_quant
1 parent f3f2e8f commit 94f417c

9 files changed

+619
-8
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import itertools
2+
from ray import tune
3+
from collections import OrderedDict
4+
num_seeds = 5
5+
timesteps_total = 10_000_000
6+
7+
var_env_configs = OrderedDict(
8+
{
9+
"image_transforms": [
10+
"shift",
11+
# "scale",
12+
# "flip",
13+
# "rotate",
14+
# "shift,scale,rotate,flip",
15+
], # image_transforms,
16+
"image_sh_quant": [2, 4, 8, 16],
17+
"dummy_seed": [i for i in range(num_seeds)],
18+
}
19+
)
20+
21+
var_configs = OrderedDict({"env": var_env_configs})
22+
23+
env_config = {
24+
"env": "GymEnvWrapper-Atari",
25+
"env_config": {
26+
"AtariEnv": {
27+
"game": "beam_rider",
28+
"obs_type": "image",
29+
"frameskip": 1,
30+
},
31+
# "GymEnvWrapper": {
32+
"atari_preprocessing": True,
33+
"frame_skip": 4,
34+
"grayscale_obs": False, # grayscale_obs gives a 2-D observation tensor.
35+
"image_width": 40,
36+
"image_padding": 30,
37+
"state_space_type": "discrete",
38+
"action_space_type": "discrete",
39+
"seed": 0,
40+
# },
41+
# 'seed': 0, #seed
42+
},
43+
}
44+
45+
algorithm = "A3C"
46+
agent_config = { # Taken from Ray tuned_examples
47+
"clip_rewards": True,
48+
"lr": 1e-4,
49+
# Value Function Loss coefficient
50+
"vf_loss_coeff": 2.5,
51+
# Entropy coefficient
52+
"entropy_coeff": 0.01,
53+
"min_iter_time_s": 0,
54+
"num_envs_per_worker": 5,
55+
"num_gpus": 0,
56+
"num_workers": 3,
57+
"rollout_fragment_length": 10,
58+
"timesteps_per_iteration": 10000,
59+
"tf_session_args": {
60+
# note: overriden by `local_tf_session_args`
61+
"intra_op_parallelism_threads": 4,
62+
"inter_op_parallelism_threads": 4,
63+
# "gpu_options": {
64+
# "allow_growth": True,
65+
# },
66+
# "log_device_placement": False,
67+
"device_count": {
68+
"CPU": 2,
69+
# "GPU": 0,
70+
},
71+
# "allow_soft_placement": True, # required by PPO multi-gpu
72+
},
73+
# Override the following tf session args on the local worker
74+
"local_tf_session_args": {
75+
"intra_op_parallelism_threads": 4,
76+
"inter_op_parallelism_threads": 4,
77+
},
78+
}
79+
80+
81+
filters_100x100 = [
82+
[
83+
16,
84+
[8, 8],
85+
4,
86+
], # changes from 42x42x1 with padding 2 to 22x22x16 (or 52x52x16 for 102x102x1)
87+
[32, [4, 4], 2],
88+
[
89+
128,
90+
[13, 13],
91+
1,
92+
],
93+
]
94+
95+
96+
model_config = {
97+
"model": {
98+
"fcnet_hiddens": [256, 256],
99+
# "custom_preprocessor": "ohe",
100+
"custom_options": {}, # extra options to pass to your preprocessor
101+
"conv_activation": "relu",
102+
"conv_filters": filters_100x100,
103+
# "fcnet_activation": "tanh",
104+
"use_lstm": False,
105+
"max_seq_len": 20,
106+
"lstm_cell_size": 256,
107+
"lstm_use_prev_action_reward": False,
108+
},
109+
}
110+
111+
112+
eval_config = {
113+
"evaluation_interval": None, # I think this means every x training_iterations
114+
"evaluation_config": {
115+
"explore": False,
116+
"exploration_fraction": 0,
117+
"exploration_final_eps": 0,
118+
"evaluation_num_episodes": 10,
119+
# "horizon": 100,
120+
"env_config": {
121+
"dummy_eval": True, # hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
122+
"transition_noise": 0
123+
if "state_space_type" in env_config["env_config"]
124+
and env_config["env_config"]["state_space_type"] == "discrete"
125+
else tune.function(lambda a: a.normal(0, 0)),
126+
"reward_noise": tune.function(lambda a: a.normal(0, 0)),
127+
"action_loss_weight": 0.0,
128+
},
129+
},
130+
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import itertools
2+
from ray import tune
3+
from collections import OrderedDict
4+
num_seeds = 5
5+
timesteps_total = 10_000_000
6+
7+
var_env_configs = OrderedDict(
8+
{
9+
"image_transforms": [
10+
"shift",
11+
# "scale",
12+
# "flip",
13+
# "rotate",
14+
# "shift,scale,rotate,flip",
15+
], # image_transforms,
16+
"image_sh_quant": [2, 4, 8, 16],
17+
"dummy_seed": [i for i in range(num_seeds)],
18+
}
19+
)
20+
21+
var_configs = OrderedDict({"env": var_env_configs})
22+
23+
env_config = {
24+
"env": "GymEnvWrapper-Atari",
25+
"env_config": {
26+
"AtariEnv": {
27+
"game": "breakout",
28+
"obs_type": "image",
29+
"frameskip": 1,
30+
},
31+
# "GymEnvWrapper": {
32+
"atari_preprocessing": True,
33+
"frame_skip": 4,
34+
"grayscale_obs": False, # grayscale_obs gives a 2-D observation tensor.
35+
"image_width": 40,
36+
"image_padding": 30,
37+
"state_space_type": "discrete",
38+
"action_space_type": "discrete",
39+
"seed": 0,
40+
# },
41+
# 'seed': 0, #seed
42+
},
43+
}
44+
45+
algorithm = "A3C"
46+
agent_config = { # Taken from Ray tuned_examples
47+
"clip_rewards": True,
48+
"lr": 1e-4,
49+
# Value Function Loss coefficient
50+
"vf_loss_coeff": 2.5,
51+
# Entropy coefficient
52+
"entropy_coeff": 0.01,
53+
"min_iter_time_s": 0,
54+
"num_envs_per_worker": 5,
55+
"num_gpus": 0,
56+
"num_workers": 3,
57+
"rollout_fragment_length": 10,
58+
"timesteps_per_iteration": 10000,
59+
"tf_session_args": {
60+
# note: overriden by `local_tf_session_args`
61+
"intra_op_parallelism_threads": 4,
62+
"inter_op_parallelism_threads": 4,
63+
# "gpu_options": {
64+
# "allow_growth": True,
65+
# },
66+
# "log_device_placement": False,
67+
"device_count": {
68+
"CPU": 2,
69+
# "GPU": 0,
70+
},
71+
# "allow_soft_placement": True, # required by PPO multi-gpu
72+
},
73+
# Override the following tf session args on the local worker
74+
"local_tf_session_args": {
75+
"intra_op_parallelism_threads": 4,
76+
"inter_op_parallelism_threads": 4,
77+
},
78+
}
79+
80+
81+
filters_100x100 = [
82+
[
83+
16,
84+
[8, 8],
85+
4,
86+
], # changes from 42x42x1 with padding 2 to 22x22x16 (or 52x52x16 for 102x102x1)
87+
[32, [4, 4], 2],
88+
[
89+
128,
90+
[13, 13],
91+
1,
92+
],
93+
]
94+
95+
96+
model_config = {
97+
"model": {
98+
"fcnet_hiddens": [256, 256],
99+
# "custom_preprocessor": "ohe",
100+
"custom_options": {}, # extra options to pass to your preprocessor
101+
"conv_activation": "relu",
102+
"conv_filters": filters_100x100,
103+
# "fcnet_activation": "tanh",
104+
"use_lstm": False,
105+
"max_seq_len": 20,
106+
"lstm_cell_size": 256,
107+
"lstm_use_prev_action_reward": False,
108+
},
109+
}
110+
111+
112+
eval_config = {
113+
"evaluation_interval": None, # I think this means every x training_iterations
114+
"evaluation_config": {
115+
"explore": False,
116+
"exploration_fraction": 0,
117+
"exploration_final_eps": 0,
118+
"evaluation_num_episodes": 10,
119+
# "horizon": 100,
120+
"env_config": {
121+
"dummy_eval": True, # hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
122+
"transition_noise": 0
123+
if "state_space_type" in env_config["env_config"]
124+
and env_config["env_config"]["state_space_type"] == "discrete"
125+
else tune.function(lambda a: a.normal(0, 0)),
126+
"reward_noise": tune.function(lambda a: a.normal(0, 0)),
127+
"action_loss_weight": 0.0,
128+
},
129+
},
130+
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import itertools
2+
from ray import tune
3+
from collections import OrderedDict
4+
num_seeds = 5
5+
timesteps_total = 10_000_000
6+
7+
var_env_configs = OrderedDict(
8+
{
9+
"image_transforms": [
10+
"shift",
11+
# "scale",
12+
# "flip",
13+
# "rotate",
14+
# "shift,scale,rotate,flip",
15+
], # image_transforms,
16+
"image_sh_quant": [2, 4, 8, 16],
17+
"dummy_seed": [i for i in range(num_seeds)],
18+
}
19+
)
20+
21+
var_configs = OrderedDict({"env": var_env_configs})
22+
23+
env_config = {
24+
"env": "GymEnvWrapper-Atari",
25+
"env_config": {
26+
"AtariEnv": {
27+
"game": "qbert",
28+
"obs_type": "image",
29+
"frameskip": 1,
30+
},
31+
# "GymEnvWrapper": {
32+
"atari_preprocessing": True,
33+
"frame_skip": 4,
34+
"grayscale_obs": False, # grayscale_obs gives a 2-D observation tensor.
35+
"image_width": 40,
36+
"image_padding": 30,
37+
"state_space_type": "discrete",
38+
"action_space_type": "discrete",
39+
"seed": 0,
40+
# },
41+
# 'seed': 0, #seed
42+
},
43+
}
44+
45+
algorithm = "A3C"
46+
agent_config = { # Taken from Ray tuned_examples
47+
"clip_rewards": True,
48+
"lr": 1e-4,
49+
# Value Function Loss coefficient
50+
"vf_loss_coeff": 2.5,
51+
# Entropy coefficient
52+
"entropy_coeff": 0.01,
53+
"min_iter_time_s": 0,
54+
"num_envs_per_worker": 5,
55+
"num_gpus": 0,
56+
"num_workers": 3,
57+
"rollout_fragment_length": 10,
58+
"timesteps_per_iteration": 10000,
59+
"tf_session_args": {
60+
# note: overriden by `local_tf_session_args`
61+
"intra_op_parallelism_threads": 4,
62+
"inter_op_parallelism_threads": 4,
63+
# "gpu_options": {
64+
# "allow_growth": True,
65+
# },
66+
# "log_device_placement": False,
67+
"device_count": {
68+
"CPU": 2,
69+
# "GPU": 0,
70+
},
71+
# "allow_soft_placement": True, # required by PPO multi-gpu
72+
},
73+
# Override the following tf session args on the local worker
74+
"local_tf_session_args": {
75+
"intra_op_parallelism_threads": 4,
76+
"inter_op_parallelism_threads": 4,
77+
},
78+
}
79+
80+
81+
filters_100x100 = [
82+
[
83+
16,
84+
[8, 8],
85+
4,
86+
], # changes from 42x42x1 with padding 2 to 22x22x16 (or 52x52x16 for 102x102x1)
87+
[32, [4, 4], 2],
88+
[
89+
128,
90+
[13, 13],
91+
1,
92+
],
93+
]
94+
95+
96+
model_config = {
97+
"model": {
98+
"fcnet_hiddens": [256, 256],
99+
# "custom_preprocessor": "ohe",
100+
"custom_options": {}, # extra options to pass to your preprocessor
101+
"conv_activation": "relu",
102+
"conv_filters": filters_100x100,
103+
# "fcnet_activation": "tanh",
104+
"use_lstm": False,
105+
"max_seq_len": 20,
106+
"lstm_cell_size": 256,
107+
"lstm_use_prev_action_reward": False,
108+
},
109+
}
110+
111+
112+
eval_config = {
113+
"evaluation_interval": None, # I think this means every x training_iterations
114+
"evaluation_config": {
115+
"explore": False,
116+
"exploration_fraction": 0,
117+
"exploration_final_eps": 0,
118+
"evaluation_num_episodes": 10,
119+
# "horizon": 100,
120+
"env_config": {
121+
"dummy_eval": True, # hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
122+
"transition_noise": 0
123+
if "state_space_type" in env_config["env_config"]
124+
and env_config["env_config"]["state_space_type"] == "discrete"
125+
else tune.function(lambda a: a.normal(0, 0)),
126+
"reward_noise": tune.function(lambda a: a.normal(0, 0)),
127+
"action_loss_weight": 0.0,
128+
},
129+
},
130+
}

0 commit comments

Comments
 (0)