Skip to content

Commit 07367ba

Browse files
Cave integration (#16)
* BOHB output and CAVE analysis * add multiple runs csv to analysis * mdpp to cave * cave plot experiments and analysis same as master * changes to match master * inspect baselines weights, save best eval model, experiments * mdpp to cave analysis options, notebook * cfg_processor * processor changes * add changes to baselines file and config processor * CAVE integration initial stuff from Jessica's branch * small change * small change * Update to_bohb_results() * Made reading in stats more robust, improved MDPP to cave conversion * Deleted some experiments * add tests * Try to fix failing tests * small bug * Update code coverage to exclude example.py * Disable CAVE tests * Fix tests Co-authored-by: Jessica Borja <[email protected]> Co-authored-by: JessicaBorja <[email protected]>
1 parent fdc0d08 commit 07367ba

12 files changed

+12093
-106
lines changed

.coveragerc

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[run]
2+
omit = example.py

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,7 @@ venv.bak/
107107
# Pickled files from me
108108
*pickle*dat
109109

110+
.vscode/
111+
112+
#whitelist
113+
!tests/files/mdpp_12744267_SAC_target_radius/*.csv

experiments/dqn_vanilla_hps.py

-104
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
'''###IMP dummy_seed should always be last in the order in the OrderedDict below!!!
2+
'''
3+
num_seeds = 5
4+
5+
from collections import OrderedDict
6+
var_env_configs = OrderedDict({
7+
'state_space_size': [8],#, 10, 12, 14] # [2**i for i in range(1,6)]
8+
'action_space_size': [8],#2, 4, 8, 16] # [2**i for i in range(1,6)]
9+
'delay': [0], # + [2**i for i in range(4)],
10+
'sequence_length': [1], #, 2, 3, 4],#i for i in range(1,4)]
11+
'reward_density': [0.25], # np.linspace(0.0, 1.0, num=5)
12+
'make_denser': [False],
13+
'terminal_state_density': [0.25], # np.linspace(0.1, 1.0, num=5)
14+
'transition_noise': [0],#, 0.01, 0.02, 0.10, 0.25]
15+
'reward_noise': [0],#, 1, 5, 10, 25] # Std dev. of normal dist.
16+
'image_representations': [True],
17+
'image_transforms': ['none'], #image_transforms, # ['shift', 'scale', 'flip', 'rotate', 'shift,scale,rotate,flip']
18+
'image_width': [100],
19+
'image_height': [100],
20+
'dummy_seed': [i for i in range(num_seeds)],
21+
})
22+
23+
var_agent_configs = OrderedDict({
24+
"learning_starts": [500, 1000, 2000],
25+
"lr": [1e-3, 1e-4, 1e-5], # "lr": grid_search([1e-2, 1e-4, 1e-6]),
26+
"n_step": [1,2,3,4],
27+
"noisy": [True, False],
28+
"target_network_update_freq": [8, 80, 800],
29+
})
30+
31+
32+
# formula [(W−K+2P)/S]+1; for padding=same: P = ((S-1)*W - S + K)/2
33+
filters_84x84 = [
34+
[16, [8, 8], 4], # changes from 84x84x1 with padding 4 to 22x22x16 (or 26x26x16 for 100x100x1)
35+
[32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 13x13x32 for 100x100x1)
36+
[256, [11, 11], 1], # changes to 1x1x256 with padding 0 (or 3x3x256 for 100x100x1); this is the only layer with valid padding in Ray!
37+
]
38+
39+
filters_100x100 = [
40+
[16, [8, 8], 4], # changes from 84x84x1 with padding 4 to 22x22x16 (or 26x26x16 for 100x100x1)
41+
[32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 13x13x32 for 100x100x1)
42+
[64, [13, 13], 1], # changes to 1x1x64 with padding 0 (or 3x3x64 for 100x100x1); this is the only layer with valid padding in Ray!
43+
]
44+
# [num_outputs(=8 in this case), [1, 1], 1] conv2d appended by Ray always followed by a Dense layer with 1 output
45+
46+
# filters_99x99 = [
47+
# [16, [8, 8], 4], # 51x51x16
48+
# [32, [4, 4], 2],
49+
# [64, [13, 13], 1],
50+
# ]
51+
52+
filters_100x100_large = [
53+
[16, [8, 8], 4],
54+
[32, [4, 4], 2],
55+
[256, [13, 13], 1],
56+
]
57+
58+
filters_50x50 = [
59+
[16, [4, 4], 2],
60+
[32, [4, 4], 2],
61+
[64, [13, 13], 1],
62+
]
63+
64+
filters_400x400 = [
65+
[16, [32, 32], 16],
66+
[32, [4, 4], 2],
67+
[64, [13, 13], 1],
68+
]
69+
70+
71+
var_model_configs = OrderedDict({
72+
"conv_filters": [filters_100x100, filters_100x100_large],
73+
})
74+
75+
var_configs = OrderedDict({
76+
"env": var_env_configs,
77+
"agent": var_agent_configs,
78+
"model": var_model_configs,
79+
})
80+
81+
env_config = {
82+
"env": "RLToy-v0",
83+
"horizon": 100,
84+
"env_config": {
85+
'seed': 0, #seed
86+
'state_space_type': 'discrete',
87+
'action_space_type': 'discrete',
88+
'generate_random_mdp': True,
89+
'repeats_in_sequences': False,
90+
'reward_scale': 1.0,
91+
'completely_connected': True,
92+
},
93+
}
94+
95+
algorithm = "DQN"
96+
agent_config = {
97+
"adam_epsilon": 1e-4,
98+
"buffer_size": 1000000,
99+
"double_q": True,
100+
"dueling": True,
101+
"exploration_final_eps": 0.01,
102+
"exploration_fraction": 0.1,
103+
"schedule_max_timesteps": 20000,
104+
# "hiddens": None,
105+
"num_atoms": 10, # [5, 10, 20]
106+
"prioritized_replay": True,
107+
"prioritized_replay_alpha": 0.75, #
108+
"prioritized_replay_beta": 0.4,
109+
"final_prioritized_replay_beta": 1.0, #
110+
"beta_annealing_fraction": 1.0, #
111+
112+
"sample_batch_size": 4,
113+
"timesteps_per_iteration": 1000,
114+
"train_batch_size": 32,
115+
"min_iter_time_s": 0,
116+
}
117+
118+
119+
model_config = {
120+
"model": {
121+
"fcnet_hiddens": [256, 256],
122+
# "custom_preprocessor": "ohe",
123+
"custom_options": {}, # extra options to pass to your preprocessor
124+
"conv_activation": "relu",
125+
# "no_final_linear": False,
126+
# "vf_share_layers": True,
127+
# "fcnet_activation": "tanh",
128+
"use_lstm": False,
129+
"max_seq_len": 20,
130+
"lstm_cell_size": 256,
131+
"lstm_use_prev_action_reward": False,
132+
},
133+
}
134+
135+
from ray import tune
136+
eval_config = {
137+
"evaluation_interval": 1, # I think this means every x training_iterations
138+
"evaluation_config": {
139+
"explore": False,
140+
"exploration_fraction": 0,
141+
"exploration_final_eps": 0,
142+
"evaluation_num_episodes": 10,
143+
"horizon": 100,
144+
"env_config": {
145+
"dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
146+
'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
147+
'reward_noise': tune.function(lambda a: a.normal(0, 0)),
148+
'action_loss_weight': 0.0,
149+
}
150+
},
151+
}

mdp_playground/analysis/analysis.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def join_files(file_prefix, file_suffix):
236236
for i in range(stats_pd.shape[0] - 1):
237237
if (
238238
stats_pd["timesteps_total"].iloc[i]
239-
> stats_pd["timesteps_total"].iloc[i + 1]
239+
>= stats_pd["timesteps_total"].iloc[i + 1]
240240
):
241241
final_rows_for_a_config.append(i)
242242

0 commit comments

Comments
 (0)