Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cave integration #16

Merged
merged 24 commits into from
Oct 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
905bf22
BOHB output and CAVE analysis
JessicaBorja Feb 17, 2021
f43d430
add multiple runs csv to analysis
JessicaBorja Feb 18, 2021
e035294
mdpp to cave
JessicaBorja Mar 10, 2021
5db72df
Merge branch 'master' into single_launcher
JessicaBorja Mar 10, 2021
100cd63
cave plot experiments and analysis same as master
JessicaBorja Mar 10, 2021
57c60ad
changes to match master
JessicaBorja Mar 24, 2021
f4a4df3
inspect baselines weights, save best eval model, experiments
JessicaBorja May 8, 2021
65285cc
mdpp to cave analysis options, notebook
JessicaBorja May 10, 2021
99202be
cfg_processor
JessicaBorja May 13, 2021
413448f
processor changes
JessicaBorja May 13, 2021
5e76bde
add changes to baselines file and config processor
JessicaBorja May 13, 2021
f86c043
CAVE integration initial stuff from Jessica's branch
RaghuSpaceRajan Oct 1, 2021
984e06e
small change
RaghuSpaceRajan Oct 1, 2021
b84d193
small change
RaghuSpaceRajan Oct 1, 2021
d0dced7
Merge master into CAVE branch
RaghuSpaceRajan Oct 1, 2021
9f016d2
Update to_bohb_results()
RaghuSpaceRajan Oct 5, 2021
5229d63
Made reading in stats more robust, improved MDPP to cave conversion
RaghuSpaceRajan Oct 5, 2021
dfe7f86
Deleted some experiments
RaghuSpaceRajan Oct 6, 2021
7d2c02a
add tests
RaghuSpaceRajan Oct 6, 2021
241542e
Try to fix failing tests
RaghuSpaceRajan Oct 6, 2021
b0294bd
small bug
RaghuSpaceRajan Oct 6, 2021
305e33a
Update code coverage to exclude example.py
RaghuSpaceRajan Oct 6, 2021
aad3070
Disable CAVE tests
RaghuSpaceRajan Oct 6, 2021
1458d22
Fix tests
RaghuSpaceRajan Oct 6, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[run]
omit = example.py
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,7 @@ venv.bak/
# Pickled files from me
*pickle*dat

.vscode/

#whitelist
!tests/files/mdpp_12744267_SAC_target_radius/*.csv
104 changes: 0 additions & 104 deletions experiments/dqn_vanilla_hps.py

This file was deleted.

151 changes: 151 additions & 0 deletions experiments/rainbow_image_representations_tune_hps_cave_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
'''###IMP dummy_seed should always be last in the order in the OrderedDict below!!!
'''
num_seeds = 5

from collections import OrderedDict
var_env_configs = OrderedDict({
'state_space_size': [8],#, 10, 12, 14] # [2**i for i in range(1,6)]
'action_space_size': [8],#2, 4, 8, 16] # [2**i for i in range(1,6)]
'delay': [0], # + [2**i for i in range(4)],
'sequence_length': [1], #, 2, 3, 4],#i for i in range(1,4)]
'reward_density': [0.25], # np.linspace(0.0, 1.0, num=5)
'make_denser': [False],
'terminal_state_density': [0.25], # np.linspace(0.1, 1.0, num=5)
'transition_noise': [0],#, 0.01, 0.02, 0.10, 0.25]
'reward_noise': [0],#, 1, 5, 10, 25] # Std dev. of normal dist.
'image_representations': [True],
'image_transforms': ['none'], #image_transforms, # ['shift', 'scale', 'flip', 'rotate', 'shift,scale,rotate,flip']
'image_width': [100],
'image_height': [100],
'dummy_seed': [i for i in range(num_seeds)],
})

var_agent_configs = OrderedDict({
"learning_starts": [500, 1000, 2000],
"lr": [1e-3, 1e-4, 1e-5], # "lr": grid_search([1e-2, 1e-4, 1e-6]),
"n_step": [1,2,3,4],
"noisy": [True, False],
"target_network_update_freq": [8, 80, 800],
})


# formula [(W−K+2P)/S]+1; for padding=same: P = ((S-1)*W - S + K)/2
filters_84x84 = [
[16, [8, 8], 4], # changes from 84x84x1 with padding 4 to 22x22x16 (or 26x26x16 for 100x100x1)
[32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 13x13x32 for 100x100x1)
[256, [11, 11], 1], # changes to 1x1x256 with padding 0 (or 3x3x256 for 100x100x1); this is the only layer with valid padding in Ray!
]

filters_100x100 = [
[16, [8, 8], 4], # changes from 84x84x1 with padding 4 to 22x22x16 (or 26x26x16 for 100x100x1)
[32, [4, 4], 2], # changes to 11x11x32 with padding 2 (or 13x13x32 for 100x100x1)
[64, [13, 13], 1], # changes to 1x1x64 with padding 0 (or 3x3x64 for 100x100x1); this is the only layer with valid padding in Ray!
]
# [num_outputs(=8 in this case), [1, 1], 1] conv2d appended by Ray always followed by a Dense layer with 1 output

# filters_99x99 = [
# [16, [8, 8], 4], # 51x51x16
# [32, [4, 4], 2],
# [64, [13, 13], 1],
# ]

filters_100x100_large = [
[16, [8, 8], 4],
[32, [4, 4], 2],
[256, [13, 13], 1],
]

filters_50x50 = [
[16, [4, 4], 2],
[32, [4, 4], 2],
[64, [13, 13], 1],
]

filters_400x400 = [
[16, [32, 32], 16],
[32, [4, 4], 2],
[64, [13, 13], 1],
]


var_model_configs = OrderedDict({
"conv_filters": [filters_100x100, filters_100x100_large],
})

var_configs = OrderedDict({
"env": var_env_configs,
"agent": var_agent_configs,
"model": var_model_configs,
})

env_config = {
"env": "RLToy-v0",
"horizon": 100,
"env_config": {
'seed': 0, #seed
'state_space_type': 'discrete',
'action_space_type': 'discrete',
'generate_random_mdp': True,
'repeats_in_sequences': False,
'reward_scale': 1.0,
'completely_connected': True,
},
}

algorithm = "DQN"
agent_config = {
"adam_epsilon": 1e-4,
"buffer_size": 1000000,
"double_q": True,
"dueling": True,
"exploration_final_eps": 0.01,
"exploration_fraction": 0.1,
"schedule_max_timesteps": 20000,
# "hiddens": None,
"num_atoms": 10, # [5, 10, 20]
"prioritized_replay": True,
"prioritized_replay_alpha": 0.75, #
"prioritized_replay_beta": 0.4,
"final_prioritized_replay_beta": 1.0, #
"beta_annealing_fraction": 1.0, #

"sample_batch_size": 4,
"timesteps_per_iteration": 1000,
"train_batch_size": 32,
"min_iter_time_s": 0,
}


model_config = {
"model": {
"fcnet_hiddens": [256, 256],
# "custom_preprocessor": "ohe",
"custom_options": {}, # extra options to pass to your preprocessor
"conv_activation": "relu",
# "no_final_linear": False,
# "vf_share_layers": True,
# "fcnet_activation": "tanh",
"use_lstm": False,
"max_seq_len": 20,
"lstm_cell_size": 256,
"lstm_use_prev_action_reward": False,
},
}

from ray import tune
eval_config = {
"evaluation_interval": 1, # I think this means every x training_iterations
"evaluation_config": {
"explore": False,
"exploration_fraction": 0,
"exploration_final_eps": 0,
"evaluation_num_episodes": 10,
"horizon": 100,
"env_config": {
"dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
'reward_noise': tune.function(lambda a: a.normal(0, 0)),
'action_loss_weight': 0.0,
}
},
}
2 changes: 1 addition & 1 deletion mdp_playground/analysis/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def join_files(file_prefix, file_suffix):
for i in range(stats_pd.shape[0] - 1):
if (
stats_pd["timesteps_total"].iloc[i]
> stats_pd["timesteps_total"].iloc[i + 1]
>= stats_pd["timesteps_total"].iloc[i + 1]
):
final_rows_for_a_config.append(i)

Expand Down
Loading