Skip to content

Commit 6598163

Browse files
# 324, 325: DQN, Rainbow on Atari image_transforms sh_quant
1 parent 94f417c commit 6598163

4 files changed

+332
-4
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import itertools
2+
from ray import tune
3+
from collections import OrderedDict
4+
num_seeds = 5
5+
timesteps_total = 10_000_000
6+
7+
var_env_configs = OrderedDict(
8+
{
9+
"image_transforms": [
10+
"shift",
11+
# "scale",
12+
# "flip",
13+
# "rotate",
14+
# "shift,scale,rotate,flip",
15+
], # image_transforms,
16+
"image_sh_quant": [2, 4, 8, 16],
17+
"dummy_seed": [i for i in range(num_seeds)],
18+
}
19+
)
20+
21+
var_configs = OrderedDict({"env": var_env_configs})
22+
23+
env_config = {
24+
"env": "GymEnvWrapper-Atari",
25+
"env_config": {
26+
"AtariEnv": {
27+
"game": "space_invaders",
28+
"obs_type": "image",
29+
"frameskip": 1,
30+
},
31+
# "GymEnvWrapper": {
32+
"atari_preprocessing": True,
33+
"frame_skip": 4,
34+
"grayscale_obs": False, # grayscale_obs gives a 2-D observation tensor.
35+
"image_width": 40,
36+
"image_padding": 30,
37+
"state_space_type": "discrete",
38+
"action_space_type": "discrete",
39+
"seed": 0,
40+
# },
41+
# 'seed': 0, #seed
42+
},
43+
}
44+
45+
algorithm = "DQN"
46+
agent_config = { # Taken from Ray tuned_examples
47+
"adam_epsilon": 0.00015,
48+
"buffer_size": 150000,
49+
"double_q": False,
50+
"dueling": False,
51+
"exploration_config": {"epsilon_timesteps": 200000, "final_epsilon": 0.01},
52+
"final_prioritized_replay_beta": 1.0,
53+
"hiddens": [512],
54+
"learning_starts": 20000,
55+
"lr": 6.25e-05,
56+
"n_step": 1,
57+
"noisy": False,
58+
"num_atoms": 1,
59+
"num_gpus": 0,
60+
"num_workers": 3,
61+
"prioritized_replay": False,
62+
"prioritized_replay_alpha": 0.5,
63+
"prioritized_replay_beta_annealing_timesteps": 2000000,
64+
"rollout_fragment_length": 4,
65+
"target_network_update_freq": 8000,
66+
"timesteps_per_iteration": 10000,
67+
"train_batch_size": 32,
68+
"tf_session_args": {
69+
# note: overriden by `local_tf_session_args`
70+
"intra_op_parallelism_threads": 4,
71+
"inter_op_parallelism_threads": 4,
72+
# "gpu_options": {
73+
# "allow_growth": True,
74+
# },
75+
# "log_device_placement": False,
76+
"device_count": {"CPU": 2},
77+
# "allow_soft_placement": True, # required by PPO multi-gpu
78+
},
79+
# Override the following tf session args on the local worker
80+
"local_tf_session_args": {
81+
"intra_op_parallelism_threads": 4,
82+
"inter_op_parallelism_threads": 4,
83+
},
84+
}
85+
86+
# formula [(W−K+2P)/S]+1; for padding=same: P = ((S-1)*W - S + K)/2
87+
filters_124x124 = [
88+
[
89+
16,
90+
[8, 8],
91+
4,
92+
], # changes from 84x84x1 with padding 4 to 22x22x16 (or 32x32x16 for 124x124x1)
93+
[32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 16x16x32 for 124x124x1)
94+
[
95+
128,
96+
[16, 16],
97+
1,
98+
], # changes to 1x1x128 with padding 0 (for 124x124x1??); this is the only layer with "valid" padding in Ray!
99+
]
100+
101+
filters_62x62 = [
102+
[
103+
16,
104+
[4, 4],
105+
2,
106+
], # changes from 42x42x1 with padding 2 to 22x22x16 (or 32x32x16 for 62x62x1)
107+
[32, [4, 4], 2],
108+
[
109+
128,
110+
[16, 16],
111+
1,
112+
],
113+
]
114+
115+
filters_100x100 = [
116+
[
117+
16,
118+
[8, 8],
119+
4,
120+
], # changes from 42x42x1 with padding 2 to 22x22x16 (or 52x52x16 for 102x102x1)
121+
[32, [4, 4], 2],
122+
[
123+
128,
124+
[13, 13],
125+
1,
126+
],
127+
]
128+
129+
130+
131+
model_config = {
132+
"model": {
133+
"fcnet_hiddens": [256, 256],
134+
# "custom_preprocessor": "ohe",
135+
"custom_options": {}, # extra options to pass to your preprocessor
136+
"conv_activation": "relu",
137+
"conv_filters": filters_100x100,
138+
# "fcnet_hiddens": [256, 256],
139+
# "fcnet_activation": "tanh",
140+
"use_lstm": False,
141+
"max_seq_len": 20,
142+
"lstm_cell_size": 256,
143+
"lstm_use_prev_action_reward": False,
144+
},
145+
}
146+
147+
148+
eval_config = {
149+
"evaluation_interval": None, # I think this means every x training_iterations
150+
"evaluation_config": {
151+
"explore": False,
152+
"exploration_fraction": 0,
153+
"exploration_final_eps": 0,
154+
"evaluation_num_episodes": 10,
155+
"horizon": 100,
156+
"env_config": {
157+
"dummy_eval": True, # hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
158+
"transition_noise": 0
159+
if "state_space_type" in env_config["env_config"]
160+
and env_config["env_config"]["state_space_type"] == "discrete"
161+
else tune.function(lambda a: a.normal(0, 0)),
162+
"reward_noise": tune.function(lambda a: a.normal(0, 0)),
163+
"action_loss_weight": 0.0,
164+
},
165+
},
166+
}
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import itertools
2+
from ray import tune
3+
from collections import OrderedDict
4+
num_seeds = 5
5+
timesteps_total = 10_000_000
6+
7+
var_env_configs = OrderedDict(
8+
{
9+
"image_transforms": [
10+
"shift",
11+
# "scale",
12+
# "flip",
13+
# "rotate",
14+
# "shift,scale,rotate,flip",
15+
], # image_transforms,
16+
"image_sh_quant": [2, 4, 8, 16],
17+
"dummy_seed": [i for i in range(num_seeds)],
18+
}
19+
)
20+
21+
var_configs = OrderedDict({"env": var_env_configs})
22+
23+
env_config = {
24+
"env": "GymEnvWrapper-Atari",
25+
"env_config": {
26+
"AtariEnv": {
27+
"game": "space_invaders",
28+
"obs_type": "image",
29+
"frameskip": 1,
30+
},
31+
# "GymEnvWrapper": {
32+
"atari_preprocessing": True,
33+
"frame_skip": 4,
34+
"grayscale_obs": False, # grayscale_obs gives a 2-D observation tensor.
35+
"image_width": 40,
36+
"image_padding": 30,
37+
"state_space_type": "discrete",
38+
"action_space_type": "discrete",
39+
"seed": 0,
40+
# },
41+
# 'seed': 0, #seed
42+
},
43+
}
44+
45+
algorithm = "DQN"
46+
agent_config = { # Taken from Ray tuned_examples
47+
"adam_epsilon": 0.00015,
48+
"buffer_size": 150000,
49+
"double_q": True,
50+
"dueling": True,
51+
"exploration_config": {"epsilon_timesteps": 200000, "final_epsilon": 0.01},
52+
"final_prioritized_replay_beta": 1.0,
53+
"hiddens": [512],
54+
"learning_starts": 20000,
55+
"lr": 6.25e-05,
56+
# 'lr': 0.0001,
57+
# 'model': { 'dim': 42,
58+
# 'grayscale': True,
59+
# 'zero_mean': False},
60+
"n_step": 4,
61+
"noisy": False,
62+
"num_atoms": 51,
63+
"num_gpus": 0,
64+
"num_workers": 3,
65+
# "num_cpus_for_driver": 2,
66+
# 'gpu': False, #deprecated
67+
"prioritized_replay": True,
68+
"prioritized_replay_alpha": 0.5,
69+
"prioritized_replay_beta_annealing_timesteps": 2000000,
70+
"rollout_fragment_length": 4,
71+
"timesteps_per_iteration": 10000,
72+
"target_network_update_freq": 8000,
73+
# 'target_network_update_freq': 500,
74+
"train_batch_size": 32,
75+
"tf_session_args": {
76+
# note: overriden by `local_tf_session_args`
77+
"intra_op_parallelism_threads": 4,
78+
"inter_op_parallelism_threads": 4,
79+
# "gpu_options": {
80+
# "allow_growth": True,
81+
# },
82+
# "log_device_placement": False,
83+
"device_count": {
84+
"CPU": 2,
85+
# "GPU": 0,
86+
},
87+
# "allow_soft_placement": True, # required by PPO multi-gpu
88+
},
89+
# Override the following tf session args on the local worker
90+
"local_tf_session_args": {
91+
"intra_op_parallelism_threads": 4,
92+
"inter_op_parallelism_threads": 4,
93+
},
94+
}
95+
96+
# formula [(W−K+2P)/S]+1; for padding=same: P = ((S-1)*W - S + K)/2
97+
filters_124x124 = [
98+
[
99+
16,
100+
[8, 8],
101+
4,
102+
], # changes from 84x84x1 with padding 4 to 22x22x16 (or 32x32x16 for 124x124x1)
103+
[32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 16x16x32 for 124x124x1)
104+
[
105+
256,
106+
[16, 16],
107+
1,
108+
], # changes to 1x1x256 with padding 0 (for 124x124x1??); this is the only layer with "valid" padding in Ray!
109+
]
110+
111+
filters_100x100 = [
112+
[
113+
16,
114+
[8, 8],
115+
4,
116+
], # changes from 42x42x1 with padding 2 to 22x22x16 (or 52x52x16 for 102x102x1)
117+
[32, [4, 4], 2],
118+
[
119+
128,
120+
[13, 13],
121+
1,
122+
],
123+
]
124+
125+
126+
127+
model_config = {
128+
"model": {
129+
"fcnet_hiddens": [256, 256],
130+
# "custom_preprocessor": "ohe",
131+
"custom_options": {}, # extra options to pass to your preprocessor
132+
"conv_activation": "relu",
133+
"conv_filters": filters_100x100,
134+
# "fcnet_hiddens": [256, 256],
135+
# "fcnet_activation": "tanh",
136+
"use_lstm": False,
137+
"max_seq_len": 20,
138+
"lstm_cell_size": 256,
139+
"lstm_use_prev_action_reward": False,
140+
},
141+
}
142+
143+
144+
eval_config = {
145+
"evaluation_interval": None, # I think this means every x training_iterations
146+
"evaluation_config": {
147+
"explore": False,
148+
"exploration_fraction": 0,
149+
"exploration_final_eps": 0,
150+
"evaluation_num_episodes": 10,
151+
# "horizon": 100,
152+
"env_config": {
153+
"dummy_eval": True, # hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
154+
"transition_noise": 0
155+
if "state_space_type" in env_config["env_config"]
156+
and env_config["env_config"]["state_space_type"] == "discrete"
157+
else tune.function(lambda a: a.normal(0, 0)),
158+
"reward_noise": tune.function(lambda a: a.normal(0, 0)),
159+
"action_loss_weight": 0.0,
160+
},
161+
},
162+
}

run_experiments_on_cluster_nemo.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#MOAB -N mdpp
33
#MOAB -t 0-19 # specifies array job indices
44
#MOAB -l nodes=1:ppn=5
5-
#MOAB -l walltime=0:50:00:00
5+
#MOAB -l walltime=0:80:00:00
66
#MOAB -l pmem=8GB # Seems like it is memory per CPU core
77
#MOAB -d /work/ws/nemo/fr_rr1034-ws_mdpp-0 # initial working dir.
88

@@ -23,7 +23,7 @@ echo "TMPDIR: " $TMPDIR
2323

2424
printenv
2525

26-
export EXP_NAME='a3c_beam_rider_image_transforms_42_sh_quant' # Ideally contains Area of research + algorithm + dataset # Could just pass this as job name?
26+
export EXP_NAME='dqn_space_invaders_image_transforms_42_sh_quant' # Ideally contains Area of research + algorithm + dataset # Could just pass this as job name?
2727

2828
echo -e '\033[32m'
2929
# Print some information about the job to STDOUT

run_experiments_on_cluster_nemo_2.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#MOAB -N mdpp
33
#MOAB -t 0-19 # specifies array job indices
44
#MOAB -l nodes=1:ppn=5
5-
#MOAB -l walltime=0:50:00:00
5+
#MOAB -l walltime=0:90:00:00
66
#MOAB -l pmem=8GB # Seems like it is memory per CPU core
77
#MOAB -d /work/ws/nemo/fr_rr1034-ws_mdpp-0 # initial working dir.
88

@@ -23,7 +23,7 @@ echo "TMPDIR: " $TMPDIR
2323

2424
printenv
2525

26-
export EXP_NAME='a3c_breakout_image_transforms_42_sh_quant' # Ideally contains Area of research + algorithm + dataset # Could just pass this as job name?
26+
export EXP_NAME='rainbow_space_invaders_image_transforms_42_sh_quant' # Ideally contains Area of research + algorithm + dataset # Could just pass this as job name?
2727

2828
echo -e '\033[32m'
2929
# Print some information about the job to STDOUT

0 commit comments

Comments
 (0)