|
1 |
| -"""###IMP dummy_seed should always be last in the order in the OrderedDict below!!! |
| 1 | +""" |
| 2 | +This files specifies different configurations to be run for an MDP Playground |
| 3 | +experiment. The configurations are divided into varying and static configs. |
| 4 | +The varying configs are the ones that will vary across this experiment. The |
| 5 | +static configs remain fixed throughout an experiment. Additionally, evaluation |
| 6 | +configurations are run interleaved with the experiment to evaluate the agent's |
| 7 | +learning progress. |
| 8 | +
|
| 9 | +Varying configs can be specified for the environment, agent and the NN model used. |
| 10 | +This is done as follows: |
| 11 | +
|
| 12 | +Specify var_configs as a dict of dicts with fixed keys: |
| 13 | +"env" for holding configs to vary in the environment |
| 14 | +"agent" for holding configs to vary for the agent |
| 15 | +"model" for holding configs to vary for the NN used |
| 16 | +
|
| 17 | +Static configs are specified using: |
| 18 | +env_config specifies static environment configurations |
| 19 | +agent_config specifies static agent configurations |
| 20 | +model_config specifies static NN model configurations |
| 21 | +eval_config specifies static evaluation configurations |
2 | 22 | """
|
3 | 23 | import itertools
|
4 | 24 | from ray import tune
|
5 | 25 | from collections import OrderedDict
|
6 | 26 | num_seeds = 10
|
| 27 | +timesteps_total = 20_000 |
7 | 28 |
|
8 |
| - |
| 29 | +# var_env_configs specifies variable configs in the environment and we use it as |
| 30 | +# the value for the key "env" in var_configs: |
9 | 31 | var_env_configs = OrderedDict(
|
10 | 32 | {
|
11 |
| - "state_space_size": [8], # , 10, 12, 14] # [2**i for i in range(1,6)] |
12 |
| - "action_space_size": [8], # 2, 4, 8, 16] # [2**i for i in range(1,6)] |
13 |
| - "delay": [0], # + [2**i for i in range(4)], |
14 |
| - "sequence_length": [1], # , 2, 3, 4],#i for i in range(1,4)] |
15 |
| - "reward_density": [0.25], # np.linspace(0.0, 1.0, num=5) |
16 |
| - "make_denser": [False], |
17 |
| - "terminal_state_density": [0.25], # np.linspace(0.1, 1.0, num=5) |
18 |
| - "transition_noise": [0], # , 0.01, 0.02, 0.10, 0.25] |
19 |
| - "reward_noise": [0], # , 1, 5, 10, 25] # Std dev. of normal dist. |
| 33 | + "state_space_size": [8], |
| 34 | + "action_space_size": [8], |
20 | 35 | "image_representations": [True],
|
21 |
| - "image_transforms": ["shift"], # , 'scale', 'flip', 'rotate'], |
| 36 | + "image_transforms": ["shift"], |
22 | 37 | "image_sh_quant": [2, 4, 8, 16],
|
23 | 38 | "image_width": [100],
|
24 | 39 | "image_height": [100],
|
| 40 | + "delay": [0], |
| 41 | + "sequence_length": [1], |
| 42 | + "reward_density": [0.25], |
| 43 | + "make_denser": [False], |
| 44 | + "terminal_state_density": [0.25], |
| 45 | + "transition_noise": [0], |
| 46 | + "reward_noise": [0], |
25 | 47 | "dummy_seed": [i for i in range(num_seeds)],
|
26 | 48 | }
|
27 | 49 | )
|
28 | 50 |
|
29 | 51 | var_configs = OrderedDict({"env": var_env_configs})
|
30 | 52 |
|
| 53 | +# All the configs from here on are static configs, i.e., those that won't be |
| 54 | +# varied in any runs in this experiment: |
31 | 55 | env_config = {
|
32 | 56 | "env": "RLToy-v0",
|
33 | 57 | "horizon": 100,
|
|
54 | 78 | "final_prioritized_replay_beta": 1.0,
|
55 | 79 | "hiddens": None,
|
56 | 80 | "learning_starts": 1000,
|
57 |
| - "lr": 1e-5, # "lr": grid_search([1e-2, 1e-4, 1e-6]), |
| 81 | + "lr": 1e-5, |
58 | 82 | "n_step": 1,
|
59 | 83 | "noisy": False,
|
60 | 84 | "num_atoms": 1,
|
|
69 | 93 | }
|
70 | 94 |
|
71 | 95 |
|
72 |
| -# formula [(W−K+2P)/S]+1; for padding=same: P = ((S-1)*W - S + K)/2 |
73 |
| -filters_84x84 = [ |
74 |
| - [ |
75 |
| - 16, |
76 |
| - [8, 8], |
77 |
| - 4, |
78 |
| - ], # changes from 84x84x1 with padding 4 to 22x22x16 (or 26x26x16 for 100x100x1) |
79 |
| - [32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 13x13x32 for 100x100x1) |
80 |
| - [ |
81 |
| - 256, |
82 |
| - [11, 11], |
83 |
| - 1, |
84 |
| - ], # changes to 1x1x256 with padding 0 (or 3x3x256 for 100x100x1); this is the only layer with valid padding in Ray! |
85 |
| -] |
86 |
| - |
87 | 96 | filters_100x100 = [
|
88 | 97 | [
|
89 | 98 | 16,
|
|
97 | 106 | 1,
|
98 | 107 | ], # changes to 1x1x64 with padding 0 (or 3x3x64 for 100x100x1); this is the only layer with valid padding in Ray!
|
99 | 108 | ]
|
100 |
| -# [num_outputs(=8 in this case), [1, 1], 1] conv2d appended by Ray always followed by a Dense layer with 1 output |
101 |
| - |
102 |
| -# filters_99x99 = [ |
103 |
| -# [16, [8, 8], 4], # 51x51x16 |
104 |
| -# [32, [4, 4], 2], |
105 |
| -# [64, [13, 13], 1], |
106 |
| -# ] |
107 |
| - |
108 |
| -filters_50x50 = [ |
109 |
| - [16, [4, 4], 2], |
110 |
| - [32, [4, 4], 2], |
111 |
| - [64, [13, 13], 1], |
112 |
| -] |
113 |
| - |
114 |
| -filters_400x400 = [ |
115 |
| - [16, [32, 32], 16], |
116 |
| - [32, [4, 4], 2], |
117 |
| - [64, [13, 13], 1], |
118 |
| -] |
119 | 109 |
|
120 | 110 | model_config = {
|
121 | 111 | "model": {
|
122 | 112 | "fcnet_hiddens": [256, 256],
|
123 |
| - # "custom_preprocessor": "ohe", |
124 |
| - "custom_options": {}, # extra options to pass to your preprocessor |
| 113 | + "custom_options": {}, |
125 | 114 | "conv_activation": "relu",
|
126 | 115 | "conv_filters": filters_100x100,
|
127 |
| - # "no_final_linear": False, |
128 |
| - # "vf_share_layers": True, |
129 |
| - # "fcnet_activation": "tanh", |
130 | 116 | "use_lstm": False,
|
131 | 117 | "max_seq_len": 20,
|
132 | 118 | "lstm_cell_size": 256,
|
|
154 | 140 | },
|
155 | 141 | },
|
156 | 142 | }
|
157 |
| -value_tuples = [] |
158 |
| -for config_type, config_dict in var_configs.items(): |
159 |
| - for key in config_dict: |
160 |
| - assert ( |
161 |
| - isinstance(var_configs[config_type][key], list) |
162 |
| - ), "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values" |
163 |
| - value_tuples.append(var_configs[config_type][key]) |
164 |
| - |
165 |
| - |
166 |
| -cartesian_product_configs = list(itertools.product(*value_tuples)) |
167 |
| -print("Total number of configs. to run:", len(cartesian_product_configs)) |
0 commit comments