Skip to content

Commit e540813

Browse files
Merge pull request #1 from RaghuSpaceRajan/mdpp_plots
Merge Suresh and Jessica's changes into experimental branch
2 parents 7694f98 + 87b1484 commit e540813

File tree

9 files changed

+1646
-197
lines changed

9 files changed

+1646
-197
lines changed

config_readme.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Configuration Files
2+
3+
Configuration files are an easy way to define the experiments to run in mdp-playground. They can be passed as command line arguments to run_experiments.py either by using `-c` or `--config-file` argument. The example below shows how to run the sequence length and delay experiment for dqn specifying the configuration file through the command line. This file must be a Python file.
4+
5+
run_experiments.py -c ./experiments/dqn_seq_del.py
6+
run_experiments.py --config-file ./experiments/dqn_seq_del.py
7+
8+
There are 2 types of configurations across the experiments:
9+
- Variable Config and
10+
- Static Config
11+
12+
## Variable configurations
13+
14+
Variable configurations allow you to define variables whose impact is desired to be studied. For instance one might be interested on the effect on the agents' performance when varying the `sequence_length` and `delay` meta-features for the current experiment. Then `delay` and `sequence_length` would be a key in `var_env_configs` dict and its corresponding value would be a *list of values they can take*. Then a cartesian product of these lists is taken to generate various possible configurations to be run.
15+
16+
var_env_configs = OrderedDict({
17+
'state_space_size': [8],
18+
'action_space_size': [8],
19+
'delay': [0] + [2**i for i in range(4)],
20+
'sequence_length': [1, 2, 3, 4],
21+
'reward_density': [0.25],
22+
'make_denser': [False],
23+
'terminal_state_density': [0.25],
24+
'transition_noise': [0],
25+
'reward_noise': [0],
26+
'dummy_seed': [i for i in range(num_seeds)]
27+
})
28+
29+
var_configs = OrderedDict({
30+
"env": var_env_configs,
31+
})
32+
33+
Variable configurations can be specified for either the environment, agent or the model across the current experiment. This can be specified through the OrderedDicts `var_env_configs`, `var_agent_configs` and `var_model_configs` configuration options respectively.
34+
35+
Because Ray does not have a common way to address this specification of configurations for its agents, we offer the utility set `var_agent_configs` and `var_model_configs` the same way as `var_env_configs` is specified.
36+
37+
var_configs = OrderedDict({
38+
"env": var_env_configs,
39+
"agent": var_agent_configs,
40+
"model" : var_model_configs
41+
})
42+
43+
44+
Please see sample experiment config files in the experiments directory to see how to set the values for a given algorithm.
45+
46+
## Static Configurations
47+
`env_config`, `agent_config` and `model_config` are dictionaries which hold the static configuration for the current experiment as a normal Python dict.
48+
49+
**Example**:
50+
51+
env_config = {
52+
"env": "RLToy-v0",
53+
"horizon": 100,
54+
"env_config": {
55+
'seed': 0,
56+
'state_space_type': 'discrete',
57+
'action_space_type': 'discrete',
58+
'generate_random_mdp': True,
59+
'repeats_in_sequences': False,
60+
'reward_scale': 1.0,
61+
'completely_connected': True,
62+
},
63+
}
64+
Please look into the code for details.
65+
<!--stackedit_data:
66+
eyJoaXN0b3J5IjpbMTIwMDgzNDAyMCw5ODA0NTQ0ODcsMTE0MD
67+
A3ODc3NCwtMTQyODk4MjI4LC0xNzIzNDk4MDQwLDEwNTIzNjcx
68+
OV19
69+
-->

experiments/dqn_vanilla_hps.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
num_seeds = 10
2+
from collections import OrderedDict
3+
var_env_configs = OrderedDict({
4+
'state_space_size': [8],#, 10, 12, 14] # [2**i for i in range(1,6)]
5+
'action_space_size': [8],#2, 4, 8, 16] # [2**i for i in range(1,6)]
6+
'delay': [0],
7+
'sequence_length': [1],#i for i in range(1,4)]
8+
'reward_density': [0.25], # np.linspace(0.0, 1.0, num=5)
9+
'make_denser': [False],
10+
'terminal_state_density': [0.25], # np.linspace(0.1, 1.0, num=5)
11+
'transition_noise': [0],#, 0.01, 0.02, 0.10, 0.25]
12+
'reward_noise': [0],#, 1, 5, 10, 25] # Std dev. of normal dist.
13+
'dummy_seed': [i for i in range(num_seeds)],
14+
})
15+
16+
var_agent_configs = OrderedDict({
17+
# normalise obs./reward, atari preprocessing
18+
'learning_starts' : [200, 500, 1000, 5000, 10000]
19+
#'target_network_update_freq': [200, 400, 800, 1600, 3200]
20+
#'train_batch_size': [4, 8, 16, 32, 64, 128],
21+
})
22+
23+
var_configs = OrderedDict({
24+
"env": var_env_configs,
25+
"agent": var_agent_configs
26+
})
27+
28+
env_config = {
29+
"env": "RLToy-v0",
30+
"horizon": 100,
31+
"env_config": {
32+
'seed': 0, #seed
33+
'state_space_type': 'discrete',
34+
'action_space_type': 'discrete',
35+
'generate_random_mdp': True,
36+
'repeats_in_sequences': False,
37+
'reward_scale': 1.0,
38+
'completely_connected': True,
39+
},
40+
}
41+
42+
algorithm = "DQN"
43+
agent_config = {
44+
"adam_epsilon": 1e-4,
45+
"beta_annealing_fraction": 1.0,
46+
"buffer_size": 1000000,
47+
"double_q": False,
48+
"dueling": False,
49+
"exploration_final_eps": 0.01,
50+
"exploration_fraction": 0.1,
51+
"final_prioritized_replay_beta": 1.0,
52+
"hiddens": None,
53+
"learning_starts": 1000,
54+
"lr": 1e-4, #
55+
"n_step": 1,
56+
"noisy": False,
57+
"num_atoms": 1,
58+
"prioritized_replay": False,
59+
"prioritized_replay_alpha": 0.5,
60+
#"sample_batch_size": 4,
61+
"rollout_fragment_length":4,
62+
"schedule_max_timesteps": 20000,
63+
"target_network_update_freq": 800,
64+
"timesteps_per_iteration": 1000,
65+
"min_iter_time_s": 0,
66+
"train_batch_size": 32,
67+
}
68+
69+
model_config = {
70+
"model": {
71+
"fcnet_hiddens": [256, 256],
72+
"custom_preprocessor": "ohe",
73+
"custom_options": {}, # extra options to pass to your preprocessor
74+
"fcnet_activation": "tanh",
75+
"use_lstm": False,
76+
"max_seq_len": 20,
77+
"lstm_cell_size": 256,
78+
"lstm_use_prev_action_reward": False,
79+
},
80+
}
81+
82+
from ray import tune
83+
eval_config = {
84+
"evaluation_interval": 1, # I think this means every x training_iterations
85+
"evaluation_config": {
86+
"explore": False,
87+
"exploration_fraction": 0,
88+
"exploration_final_eps": 0,
89+
"evaluation_num_episodes": 10,
90+
"horizon": 100,
91+
"env_config": {
92+
"dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
93+
'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
94+
'reward_noise': tune.function(lambda a: a.normal(0, 0)),
95+
'action_loss_weight': 0.0,
96+
}
97+
},
98+
}

latex_generation.ipynb

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from pylatex import Document, NoEscape, StandAloneGraphic, Figure, SubFigure, Command\n",
10+
"import pylatex"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 2,
16+
"metadata": {},
17+
"outputs": [],
18+
"source": [
19+
"def generate_figure_section(doc, fig_data):\n",
20+
" '''Generates latex code related to figure (with optional sub figures) section\n",
21+
" \n",
22+
" Parameters\n",
23+
" ----------\n",
24+
" fig_data : dict<str,str>\n",
25+
" eg: fig_data = {\n",
26+
" // figure details\n",
27+
" 'fig_caption': r'<fig_caption>',\n",
28+
" 'fig_caption_option': r'<fig_caption_option>',\n",
29+
" 'fig_label': r'<fig_label>',\n",
30+
" \n",
31+
" [OPTIONAL] 'sub_figs': [\n",
32+
" {\n",
33+
" // sub-figure details\n",
34+
" }\n",
35+
" ],\n",
36+
" \n",
37+
" [OPTIONAL] 'img_file': '<img_file_path>',\n",
38+
" }\n",
39+
" '''\n",
40+
" \n",
41+
" fig_caption = fig_data['fig_caption']\n",
42+
" fig_caption_option = fig_data['fig_caption_option'] \n",
43+
" \n",
44+
" figure = Figure(position='!h')\n",
45+
" with doc.create(figure) as fig:\n",
46+
" # add centering tag\n",
47+
" fig.append(Command('centering'))\n",
48+
" \n",
49+
" if 'sub_figs' in fig_data.keys():\n",
50+
" sub_figs = fig_data['sub_figs']\n",
51+
" sub_fig_width = r'{0:0.2f}\\textwidth'.format(1/len(sub_figs))\n",
52+
"\n",
53+
" # iterate per sub figure\n",
54+
" for sub_fig_data in sub_figs:\n",
55+
" sub_fig_caption = sub_fig_data['fig_caption']\n",
56+
" sub_fig_caption_option = sub_fig_data['fig_caption_option']\n",
57+
" sub_fig_img_file = sub_fig_data['img_file']\n",
58+
" \n",
59+
" # if label is not provided, use image file name instead\n",
60+
" if 'fig_label' in sub_fig_data.keys():\n",
61+
" sub_fig_label = sub_fig_data['fig_label']\n",
62+
" else:\n",
63+
" sub_fig_label = sub_fig_img_file.replace('figures/', 'fig:').replace('.pdf', '')\n",
64+
"\n",
65+
" sub_figure = SubFigure(position='b', width=NoEscape(sub_fig_width))\n",
66+
" with doc.create(sub_figure) as sub_fig:\n",
67+
" # add centering tag\n",
68+
" sub_fig.append(Command('centering'))\n",
69+
"\n",
70+
" # add sub figure image\n",
71+
" sub_fig.add_image(sub_fig_img_file, width=NoEscape(r'\\textwidth'))\n",
72+
" # sub_fig.append(StandAloneGraphic(image_options='width=' + NoEscape(sub_fig_img_width), filename=sub_fig_img_file))\n",
73+
"\n",
74+
" # add sub figure caption\n",
75+
" sub_fig.append(Command('caption', NoEscape(sub_fig_caption), NoEscape(sub_fig_caption_option)))\n",
76+
"\n",
77+
" # add sub figure label\n",
78+
" sub_fig.append(Command('label', NoEscape(sub_fig_label)))\n",
79+
" \n",
80+
" # add figure image\n",
81+
" if 'img_file' in fig_data.keys():\n",
82+
" fig_img_file = fig_data['img_file']\n",
83+
" fig.add_image(fig_img_file, width=NoEscape(r'\\textwidth'))\n",
84+
" # fig.append(StandAloneGraphic(image_options='width=' + NoEscape(fig_img_width), filename=fig_img_file))\n",
85+
" \n",
86+
" # if label is not provided, use image file name instead\n",
87+
" if 'fig_label' in fig_data.keys():\n",
88+
" fig_label = fig_data['fig_label']\n",
89+
" else:\n",
90+
" fig_label = fig_img_file.replace('figures/', 'fig:').replace('.pdf', '')\n",
91+
" \n",
92+
" # add figure caption\n",
93+
" fig.append(Command('caption', NoEscape(fig_caption), NoEscape(fig_caption_option)))\n",
94+
" \n",
95+
" # add figure label\n",
96+
" fig.append(Command('label', NoEscape(fig_label)))"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 3,
102+
"metadata": {},
103+
"outputs": [],
104+
"source": [
105+
"save_file_name = './testing'\n",
106+
"doc = Document(save_file_name)\n",
107+
"\n",
108+
"# sample example\n",
109+
"fig_data = {\n",
110+
" 'fig_caption': r'{\\small Training Learning Curves for DQN \\textbf{when varying delay and sequence lengths}. Please note the different colorbar scales.}',\n",
111+
" 'fig_caption_option': r'DQN',\n",
112+
" 'fig_label': r'fig:dqn_seq_del_train_curves',\n",
113+
" \n",
114+
" 'img_file': 'figures/dqn_seq_del_train_learning_curves_episode_reward_mean.pdf',\n",
115+
"}\n",
116+
"generate_figure_section(doc, fig_data)\n",
117+
"\n",
118+
"# sample example with sub figures\n",
119+
"sub_fig_data = {\n",
120+
" 'fig_caption': r'Mean episodic reward (limited to 100 timesteps) at the end of training for three different tabular baseline algorithms \\textbf{when varying reward delay}. Error bars represent 1 standard deviation.',\n",
121+
" 'fig_caption_option': r' Mean episodic reward at the end of training ',\n",
122+
" 'fig_label': r'fig:tabular_delay',\n",
123+
" \n",
124+
" 'sub_figs' : [\n",
125+
" {\n",
126+
" 'fig_caption': r'{\\small Q-Learning}',\n",
127+
" 'fig_caption_option': r'Q-Learning',\n",
128+
" 'fig_label': r'fig:q_learn_tabular_del_train_final_reward_delay_episode_reward_mean_1d',\n",
129+
" 'img_file': 'figures/q_learn_tabular_del_train_final_reward_delay_episode_reward_mean_1d.pdf',\n",
130+
" },\n",
131+
" {\n",
132+
" 'img_file': 'figures/sarsa_tabular_del_train_final_reward_delay_episode_reward_mean_1d.pdf',\n",
133+
" 'fig_caption': r'{\\small SARSA}',\n",
134+
" 'fig_caption_option': r'SARSA',\n",
135+
" },\n",
136+
" {\n",
137+
" 'img_file': 'figures/double_q_learn_tabular_del_train_final_reward_delay_episode_reward_mean_1d.pdf',\n",
138+
" 'fig_caption': r'{\\small Double Q-Learning}',\n",
139+
" 'fig_caption_option': r'Double Q-Learning',\n",
140+
" },\n",
141+
" ],\n",
142+
"}\n",
143+
"generate_figure_section(doc, sub_fig_data)\n",
144+
"\n",
145+
"doc.generate_tex()"
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": null,
151+
"metadata": {},
152+
"outputs": [],
153+
"source": []
154+
}
155+
],
156+
"metadata": {
157+
"kernelspec": {
158+
"display_name": "Python 3",
159+
"language": "python",
160+
"name": "python3"
161+
},
162+
"language_info": {
163+
"codemirror_mode": {
164+
"name": "ipython",
165+
"version": 3
166+
},
167+
"file_extension": ".py",
168+
"mimetype": "text/x-python",
169+
"name": "python",
170+
"nbconvert_exporter": "python",
171+
"pygments_lexer": "ipython3",
172+
"version": "3.6.12"
173+
}
174+
},
175+
"nbformat": 4,
176+
"nbformat_minor": 4
177+
}

0 commit comments

Comments
 (0)