Skip to content

Commit a7afbd5

Browse files
Make DQN image representations expt config cleaner
1 parent bfe2ea3 commit a7afbd5

File tree

3 files changed

+75
-129
lines changed

3 files changed

+75
-129
lines changed

experiments/dqn_image_representations.py

+36-66
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,44 @@
11
"""
2+
This files specifies different configurations to be run for an MDP Playground
3+
experiment. The configurations are divided into varying and static configs.
4+
The varying configs are the ones that will vary across this experiment. The
5+
static configs remain fixed throughout an experiment. Additionally, evaluation
6+
configurations are run interleaved with the experiment to evaluate the agent's
7+
learning progress.
8+
9+
Varying configs can be specified for the environment, agent and the NN model used.
10+
This is done as follows:
11+
12+
Specify var_configs as a dict of dicts with fixed keys:
13+
"env" for holding configs to vary in the environment
14+
"agent" for holding configs to vary for the agent
15+
"model" for holding configs to vary for the NN used
16+
17+
Static configs are specified using:
18+
env_config specifies static environment configurations
19+
agent_config specifies static agent configurations
20+
model_config specifies static NN model configurations
21+
eval_config specifies static evaluation configurations
222
"""
323
from ray import tune
424
from collections import OrderedDict
525
import itertools
626
num_seeds = 10
727
timesteps_total = 20_000
828

9-
10-
transforms = ["shift", "scale", "flip", "rotate"]
11-
image_transforms = []
12-
for i in range(len(transforms) + 1):
13-
curr_combos = list(itertools.combinations(transforms, i))
14-
for j in range(len(curr_combos)):
15-
if i == 0:
16-
# this is written to a CSV file with ' ' separater, therefore it needs to have some value in there.
17-
curr_elem = "none"
18-
else:
19-
curr_elem = ""
20-
for k in range(i):
21-
curr_elem += curr_combos[j][k] + ","
22-
# print(curr_elem, i, j)
23-
image_transforms.append(curr_elem)
24-
25-
29+
# var_env_configs specifies variable configs in the environment and we use it as
30+
# the value for the key "env" in var_configs:
2631
var_env_configs = OrderedDict(
2732
{
28-
"state_space_size": [8], # , 10, 12, 14] # [2**i for i in range(1,6)]
29-
"action_space_size": [8], # 2, 4, 8, 16] # [2**i for i in range(1,6)]
30-
"delay": [0], # + [2**i for i in range(4)],
31-
"sequence_length": [1], # , 2, 3, 4],#i for i in range(1,4)]
32-
"reward_density": [0.25], # np.linspace(0.0, 1.0, num=5)
33+
"state_space_size": [8],
34+
"action_space_size": [8],
35+
"delay": [0],
36+
"sequence_length": [1],
37+
"reward_density": [0.25],
3338
"make_denser": [False],
34-
"terminal_state_density": [0.25], # np.linspace(0.1, 1.0, num=5)
35-
"transition_noise": [0], # , 0.01, 0.02, 0.10, 0.25]
36-
"reward_noise": [0], # , 1, 5, 10, 25] # Std dev. of normal dist.
39+
"terminal_state_density": [0.25],
40+
"transition_noise": [0],
41+
"reward_noise": [0],
3742
"image_representations": [True],
3843
"image_transforms": [
3944
"none",
@@ -42,7 +47,7 @@
4247
"flip",
4348
"rotate",
4449
"shift,scale,rotate,flip",
45-
], # image_transforms,
50+
],
4651
"image_scale_range": [(0.5, 2)],
4752
"image_width": [100],
4853
"image_height": [100],
@@ -52,6 +57,8 @@
5257

5358
var_configs = OrderedDict({"env": var_env_configs})
5459

60+
# All the configs from here on are static configs, i.e., those that won't be
61+
# varied in any runs in this experiment:
5562
env_config = {
5663
"env": "RLToy-v0",
5764
"horizon": 100,
@@ -78,7 +85,7 @@
7885
"final_prioritized_replay_beta": 1.0,
7986
"hiddens": None,
8087
"learning_starts": 1000,
81-
"lr": 1e-5, # "lr": grid_search([1e-2, 1e-4, 1e-6]),
88+
"lr": 1e-5,
8289
"n_step": 1,
8390
"noisy": False,
8491
"num_atoms": 1,
@@ -94,20 +101,6 @@
94101

95102

96103
# formula [(W−K+2P)/S]+1; for padding=same: P = ((S-1)*W - S + K)/2
97-
filters_84x84 = [
98-
[
99-
16,
100-
[8, 8],
101-
4,
102-
], # changes from 84x84x1 with padding 4 to 22x22x16 (or 26x26x16 for 100x100x1)
103-
[32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 13x13x32 for 100x100x1)
104-
[
105-
256,
106-
[11, 11],
107-
1,
108-
], # changes to 1x1x256 with padding 0 (or 3x3x256 for 100x100x1); this is the only layer with valid padding in Ray!
109-
]
110-
111104
filters_100x100 = [
112105
[
113106
16,
@@ -121,36 +114,13 @@
121114
1,
122115
], # changes to 1x1x64 with padding 0 (or 3x3x64 for 100x100x1); this is the only layer with valid padding in Ray!
123116
]
124-
# [num_outputs(=8 in this case), [1, 1], 1] conv2d appended by Ray always followed by a Dense layer with 1 output
125-
126-
# filters_99x99 = [
127-
# [16, [8, 8], 4], # 51x51x16
128-
# [32, [4, 4], 2],
129-
# [64, [13, 13], 1],
130-
# ]
131-
132-
filters_50x50 = [
133-
[16, [4, 4], 2],
134-
[32, [4, 4], 2],
135-
[64, [13, 13], 1],
136-
]
137-
138-
filters_400x400 = [
139-
[16, [32, 32], 16],
140-
[32, [4, 4], 2],
141-
[64, [13, 13], 1],
142-
]
143117

144118
model_config = {
145119
"model": {
146120
"fcnet_hiddens": [256, 256],
147-
# "custom_preprocessor": "ohe",
148-
"custom_options": {}, # extra options to pass to your preprocessor
121+
"custom_options": {},
149122
"conv_activation": "relu",
150123
"conv_filters": filters_100x100,
151-
# "no_final_linear": False,
152-
# "vf_share_layers": True,
153-
# "fcnet_activation": "tanh",
154124
"use_lstm": False,
155125
"max_seq_len": 20,
156126
"lstm_cell_size": 256,
@@ -160,7 +130,7 @@
160130

161131

162132
eval_config = {
163-
"evaluation_interval": 1, # I think this means every x training_iterations
133+
"evaluation_interval": 1, # this means every x training_iterations
164134
"evaluation_config": {
165135
"explore": False,
166136
"exploration_fraction": 0,
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,57 @@
1-
"""###IMP dummy_seed should always be last in the order in the OrderedDict below!!!
1+
"""
2+
This files specifies different configurations to be run for an MDP Playground
3+
experiment. The configurations are divided into varying and static configs.
4+
The varying configs are the ones that will vary across this experiment. The
5+
static configs remain fixed throughout an experiment. Additionally, evaluation
6+
configurations are run interleaved with the experiment to evaluate the agent's
7+
learning progress.
8+
9+
Varying configs can be specified for the environment, agent and the NN model used.
10+
This is done as follows:
11+
12+
Specify var_configs as a dict of dicts with fixed keys:
13+
"env" for holding configs to vary in the environment
14+
"agent" for holding configs to vary for the agent
15+
"model" for holding configs to vary for the NN used
16+
17+
Static configs are specified using:
18+
env_config specifies static environment configurations
19+
agent_config specifies static agent configurations
20+
model_config specifies static NN model configurations
21+
eval_config specifies static evaluation configurations
222
"""
323
import itertools
424
from ray import tune
525
from collections import OrderedDict
626
num_seeds = 10
27+
timesteps_total = 20_000
728

8-
29+
# var_env_configs specifies variable configs in the environment and we use it as
30+
# the value for the key "env" in var_configs:
931
var_env_configs = OrderedDict(
1032
{
11-
"state_space_size": [8], # , 10, 12, 14] # [2**i for i in range(1,6)]
12-
"action_space_size": [8], # 2, 4, 8, 16] # [2**i for i in range(1,6)]
13-
"delay": [0], # + [2**i for i in range(4)],
14-
"sequence_length": [1], # , 2, 3, 4],#i for i in range(1,4)]
15-
"reward_density": [0.25], # np.linspace(0.0, 1.0, num=5)
16-
"make_denser": [False],
17-
"terminal_state_density": [0.25], # np.linspace(0.1, 1.0, num=5)
18-
"transition_noise": [0], # , 0.01, 0.02, 0.10, 0.25]
19-
"reward_noise": [0], # , 1, 5, 10, 25] # Std dev. of normal dist.
33+
"state_space_size": [8],
34+
"action_space_size": [8],
2035
"image_representations": [True],
21-
"image_transforms": ["shift"], # , 'scale', 'flip', 'rotate'],
36+
"image_transforms": ["shift"],
2237
"image_sh_quant": [2, 4, 8, 16],
2338
"image_width": [100],
2439
"image_height": [100],
40+
"delay": [0],
41+
"sequence_length": [1],
42+
"reward_density": [0.25],
43+
"make_denser": [False],
44+
"terminal_state_density": [0.25],
45+
"transition_noise": [0],
46+
"reward_noise": [0],
2547
"dummy_seed": [i for i in range(num_seeds)],
2648
}
2749
)
2850

2951
var_configs = OrderedDict({"env": var_env_configs})
3052

53+
# All the configs from here on are static configs, i.e., those that won't be
54+
# varied in any runs in this experiment:
3155
env_config = {
3256
"env": "RLToy-v0",
3357
"horizon": 100,
@@ -54,7 +78,7 @@
5478
"final_prioritized_replay_beta": 1.0,
5579
"hiddens": None,
5680
"learning_starts": 1000,
57-
"lr": 1e-5, # "lr": grid_search([1e-2, 1e-4, 1e-6]),
81+
"lr": 1e-5,
5882
"n_step": 1,
5983
"noisy": False,
6084
"num_atoms": 1,
@@ -69,21 +93,6 @@
6993
}
7094

7195

72-
# formula [(W−K+2P)/S]+1; for padding=same: P = ((S-1)*W - S + K)/2
73-
filters_84x84 = [
74-
[
75-
16,
76-
[8, 8],
77-
4,
78-
], # changes from 84x84x1 with padding 4 to 22x22x16 (or 26x26x16 for 100x100x1)
79-
[32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 13x13x32 for 100x100x1)
80-
[
81-
256,
82-
[11, 11],
83-
1,
84-
], # changes to 1x1x256 with padding 0 (or 3x3x256 for 100x100x1); this is the only layer with valid padding in Ray!
85-
]
86-
8796
filters_100x100 = [
8897
[
8998
16,
@@ -97,36 +106,13 @@
97106
1,
98107
], # changes to 1x1x64 with padding 0 (or 3x3x64 for 100x100x1); this is the only layer with valid padding in Ray!
99108
]
100-
# [num_outputs(=8 in this case), [1, 1], 1] conv2d appended by Ray always followed by a Dense layer with 1 output
101-
102-
# filters_99x99 = [
103-
# [16, [8, 8], 4], # 51x51x16
104-
# [32, [4, 4], 2],
105-
# [64, [13, 13], 1],
106-
# ]
107-
108-
filters_50x50 = [
109-
[16, [4, 4], 2],
110-
[32, [4, 4], 2],
111-
[64, [13, 13], 1],
112-
]
113-
114-
filters_400x400 = [
115-
[16, [32, 32], 16],
116-
[32, [4, 4], 2],
117-
[64, [13, 13], 1],
118-
]
119109

120110
model_config = {
121111
"model": {
122112
"fcnet_hiddens": [256, 256],
123-
# "custom_preprocessor": "ohe",
124-
"custom_options": {}, # extra options to pass to your preprocessor
113+
"custom_options": {},
125114
"conv_activation": "relu",
126115
"conv_filters": filters_100x100,
127-
# "no_final_linear": False,
128-
# "vf_share_layers": True,
129-
# "fcnet_activation": "tanh",
130116
"use_lstm": False,
131117
"max_seq_len": 20,
132118
"lstm_cell_size": 256,
@@ -154,14 +140,3 @@
154140
},
155141
},
156142
}
157-
value_tuples = []
158-
for config_type, config_dict in var_configs.items():
159-
for key in config_dict:
160-
assert (
161-
isinstance(var_configs[config_type][key], list)
162-
), "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
163-
value_tuples.append(var_configs[config_type][key])
164-
165-
166-
cartesian_product_configs = list(itertools.product(*value_tuples))
167-
print("Total number of configs. to run:", len(cartesian_product_configs))

mdp_playground/envs/gym_env_wrapper.py

+1
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ def __init__(self, env, **config):
184184
# This is a bit hacky because we need to define the state_space_dim
185185
# of the irrelevant toy env in the "base" config and not the nested irrelevant_features
186186
# dict inside the base config to be compatible with the config_processor of MDPP
187+
# which requires variable config to be in the "base" config.
187188
irr_toy_env_conf["state_space_dim"] = \
188189
config["irr_state_space_dim"] # #hack
189190

0 commit comments

Comments
 (0)