automl
diff --git a/‎config_readme.md
Lines changed: 69 additions & 0 deletions b/‎config_readme.md
Lines changed: 69 additions & 0 deletions
diff --git a/‎experiments/dqn_vanilla_hps.py
Lines changed: 98 additions & 0 deletions b/‎experiments/dqn_vanilla_hps.py
Lines changed: 98 additions & 0 deletions
diff --git a/‎latex_generation.ipynb
Lines changed: 177 additions & 0 deletions b/‎latex_generation.ipynb
Lines changed: 177 additions & 0 deletions
@@ -0,0 +1,69 @@
+# Configuration Files
+
+Configuration files are an easy way to define the experiments to run in mdp-playground. They can be passed as command line arguments to run_experiments.py either by using `-c` or `--config-file` argument. The example below shows how to run the sequence length and delay experiment for dqn specifying the configuration file through the command line. This file must be a Python file.
+
+    run_experiments.py -c ./experiments/dqn_seq_del.py
+    run_experiments.py --config-file ./experiments/dqn_seq_del.py
+
+ There are 2 types of configurations across the experiments:
+ - Variable Config and
+ - Static Config
+ 
+ ## Variable configurations
+
+Variable configurations allow you to define variables whose impact is desired to be studied. For instance one might be interested on the effect on the agents' performance when varying the `sequence_length` and `delay` meta-features for the current experiment. Then `delay`  and `sequence_length` would be a key in `var_env_configs` dict and its corresponding value would be a *list of values they can take*.  Then a cartesian product of these lists is taken to generate various possible configurations to be run. 
+
+    var_env_configs = OrderedDict({
+		'state_space_size': [8],
+		'action_space_size': [8],
+		'delay': [0] + [2**i for i in  range(4)],
+		'sequence_length': [1, 2, 3, 4],
+		'reward_density': [0.25],
+		'make_denser': [False],
+		'terminal_state_density': [0.25],
+		'transition_noise': [0],
+		'reward_noise': [0],
+		'dummy_seed': [i for i in  range(num_seeds)]
+	})
+	
+    var_configs = OrderedDict({
+    	    "env": var_env_configs,
+     })
+
+Variable configurations can be specified for either the environment, agent or the model across the current experiment. This can be specified through the OrderedDicts `var_env_configs`, `var_agent_configs` and `var_model_configs`  configuration options respectively.
+
+Because Ray does not have a common way to address this specification of configurations for its agents, we offer the utility set `var_agent_configs` and `var_model_configs` the same way as `var_env_configs` is specified.
+
+    var_configs = OrderedDict({
+    	    "env": var_env_configs,
+    	    "agent": var_agent_configs,
+    	    "model" : var_model_configs 
+     })
+
+
+ Please see sample experiment config files in the experiments directory to see how to set the values for a given algorithm. 
+
+## Static Configurations
+`env_config`, `agent_config` and `model_config` are dictionaries which hold the static configuration for the current experiment as a normal Python dict.
+
+**Example**:
+
+    env_config = {
+    "env": "RLToy-v0",
+    "horizon": 100,
+    "env_config": {
+	    'seed': 0,
+	    'state_space_type': 'discrete',
+	    'action_space_type': 'discrete',
+	    'generate_random_mdp': True,
+	    'repeats_in_sequences': False,
+	    'reward_scale': 1.0,
+	    'completely_connected': True,
+	    },
+    }
+Please look into the code for details.
+<!--stackedit_data:
+eyJoaXN0b3J5IjpbMTIwMDgzNDAyMCw5ODA0NTQ0ODcsMTE0MD
+A3ODc3NCwtMTQyODk4MjI4LC0xNzIzNDk4MDQwLDEwNTIzNjcx
+OV19
+-->
@@ -0,0 +1,98 @@
+num_seeds = 10
+from collections import OrderedDict
+var_env_configs = OrderedDict({
+    'state_space_size': [8],#, 10, 12, 14] # [2**i for i in range(1,6)]
+    'action_space_size': [8],#2, 4, 8, 16] # [2**i for i in range(1,6)]
+    'delay': [0],
+    'sequence_length': [1],#i for i in range(1,4)]
+    'reward_density': [0.25], # np.linspace(0.0, 1.0, num=5)
+    'make_denser': [False],
+    'terminal_state_density': [0.25], # np.linspace(0.1, 1.0, num=5)
+    'transition_noise': [0],#, 0.01, 0.02, 0.10, 0.25]
+    'reward_noise': [0],#, 1, 5, 10, 25] # Std dev. of normal dist.
+    'dummy_seed': [i for i in range(num_seeds)],
+})
+
+var_agent_configs = OrderedDict({
+    # normalise obs./reward, atari preprocessing
+    'learning_starts' : [200, 500, 1000, 5000, 10000]
+    #'target_network_update_freq': [200, 400, 800, 1600, 3200]
+    #'train_batch_size': [4, 8, 16, 32, 64, 128],
+})
+
+var_configs = OrderedDict({
+    "env": var_env_configs,
+    "agent": var_agent_configs
+})
+
+env_config = {
+    "env": "RLToy-v0",
+    "horizon": 100,
+    "env_config": {
+        'seed': 0, #seed
+        'state_space_type': 'discrete',
+        'action_space_type': 'discrete',
+        'generate_random_mdp': True,
+        'repeats_in_sequences': False,
+        'reward_scale': 1.0,
+        'completely_connected': True,
+    },
+}
+
+algorithm = "DQN"
+agent_config = {
+    "adam_epsilon": 1e-4,
+    "beta_annealing_fraction": 1.0,
+    "buffer_size": 1000000,
+    "double_q": False,
+    "dueling": False,
+    "exploration_final_eps": 0.01,
+    "exploration_fraction": 0.1,
+    "final_prioritized_replay_beta": 1.0,
+    "hiddens": None,
+    "learning_starts": 1000,
+    "lr": 1e-4, # 
+    "n_step": 1,
+    "noisy": False,
+    "num_atoms": 1,
+    "prioritized_replay": False,
+    "prioritized_replay_alpha": 0.5,
+    #"sample_batch_size": 4,
+    "rollout_fragment_length":4,
+    "schedule_max_timesteps": 20000,
+    "target_network_update_freq": 800,
+    "timesteps_per_iteration": 1000,
+    "min_iter_time_s": 0,
+    "train_batch_size": 32,
+}
+
+model_config = {
+    "model": {
+        "fcnet_hiddens": [256, 256],
+        "custom_preprocessor": "ohe",
+        "custom_options": {},  # extra options to pass to your preprocessor
+        "fcnet_activation": "tanh",
+        "use_lstm": False,
+        "max_seq_len": 20,
+        "lstm_cell_size": 256,
+        "lstm_use_prev_action_reward": False,
+    },
+}
+
+from ray import tune
+eval_config = {
+    "evaluation_interval": 1, # I think this means every x training_iterations
+    "evaluation_config": {
+        "explore": False,
+        "exploration_fraction": 0,
+        "exploration_final_eps": 0,
+        "evaluation_num_episodes": 10,
+        "horizon": 100,
+        "env_config": {
+            "dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
+            'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
+            'reward_noise': tune.function(lambda a: a.normal(0, 0)),
+            'action_loss_weight': 0.0,
+        }
+    },
+}
@@ -0,0 +1,177 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pylatex import Document, NoEscape, StandAloneGraphic, Figure, SubFigure, Command\n",
+    "import pylatex"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_figure_section(doc, fig_data):\n",
+    "    '''Generates latex code related to figure (with optional sub figures) section\n",
+    "    \n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    fig_data : dict<str,str>\n",
+    "        eg: fig_data = {\n",
+    "                // figure details\n",
+    "                'fig_caption': r'<fig_caption>',\n",
+    "                'fig_caption_option': r'<fig_caption_option>',\n",
+    "                'fig_label': r'<fig_label>',\n",
+    "                \n",
+    "                [OPTIONAL] 'sub_figs': [\n",
+    "                            {\n",
+    "                                // sub-figure details\n",
+    "                            }\n",
+    "                        ],\n",
+    "                \n",
+    "                [OPTIONAL] 'img_file': '<img_file_path>',\n",
+    "            }\n",
+    "    '''\n",
+    "    \n",
+    "    fig_caption = fig_data['fig_caption']\n",
+    "    fig_caption_option = fig_data['fig_caption_option']  \n",
+    "    \n",
+    "    figure = Figure(position='!h')\n",
+    "    with doc.create(figure) as fig:\n",
+    "        # add centering tag\n",
+    "        fig.append(Command('centering'))\n",
+    "        \n",
+    "        if 'sub_figs' in fig_data.keys():\n",
+    "            sub_figs = fig_data['sub_figs']\n",
+    "            sub_fig_width = r'{0:0.2f}\\textwidth'.format(1/len(sub_figs))\n",
+    "\n",
+    "            # iterate per sub figure\n",
+    "            for sub_fig_data in sub_figs:\n",
+    "                sub_fig_caption = sub_fig_data['fig_caption']\n",
+    "                sub_fig_caption_option = sub_fig_data['fig_caption_option']\n",
+    "                sub_fig_img_file = sub_fig_data['img_file']\n",
+    "                \n",
+    "                # if label is not provided, use image file name instead\n",
+    "                if 'fig_label' in sub_fig_data.keys():\n",
+    "                    sub_fig_label = sub_fig_data['fig_label']\n",
+    "                else:\n",
+    "                    sub_fig_label = sub_fig_img_file.replace('figures/', 'fig:').replace('.pdf', '')\n",
+    "\n",
+    "                sub_figure = SubFigure(position='b', width=NoEscape(sub_fig_width))\n",
+    "                with doc.create(sub_figure) as sub_fig:\n",
+    "                    # add centering tag\n",
+    "                    sub_fig.append(Command('centering'))\n",
+    "\n",
+    "                    # add sub figure image\n",
+    "                    sub_fig.add_image(sub_fig_img_file, width=NoEscape(r'\\textwidth'))\n",
+    "                    # sub_fig.append(StandAloneGraphic(image_options='width=' + NoEscape(sub_fig_img_width), filename=sub_fig_img_file))\n",
+    "\n",
+    "                    # add sub figure caption\n",
+    "                    sub_fig.append(Command('caption', NoEscape(sub_fig_caption), NoEscape(sub_fig_caption_option)))\n",
+    "\n",
+    "                    # add sub figure label\n",
+    "                    sub_fig.append(Command('label', NoEscape(sub_fig_label)))\n",
+    "        \n",
+    "        # add figure image\n",
+    "        if 'img_file' in fig_data.keys():\n",
+    "            fig_img_file = fig_data['img_file']\n",
+    "            fig.add_image(fig_img_file, width=NoEscape(r'\\textwidth'))\n",
+    "            # fig.append(StandAloneGraphic(image_options='width=' + NoEscape(fig_img_width), filename=fig_img_file))\n",
+    "        \n",
+    "        # if label is not provided, use image file name instead\n",
+    "        if 'fig_label' in fig_data.keys():\n",
+    "            fig_label = fig_data['fig_label']\n",
+    "        else:\n",
+    "            fig_label = fig_img_file.replace('figures/', 'fig:').replace('.pdf', '')\n",
+    "            \n",
+    "        # add figure caption\n",
+    "        fig.append(Command('caption', NoEscape(fig_caption), NoEscape(fig_caption_option)))\n",
+    "        \n",
+    "        # add figure label\n",
+    "        fig.append(Command('label', NoEscape(fig_label)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "save_file_name = './testing'\n",
+    "doc = Document(save_file_name)\n",
+    "\n",
+    "# sample example\n",
+    "fig_data = {\n",
+    "    'fig_caption': r'{\\small Training Learning Curves for DQN \\textbf{when varying delay and sequence lengths}. Please note the different colorbar scales.}',\n",
+    "    'fig_caption_option': r'DQN',\n",
+    "    'fig_label': r'fig:dqn_seq_del_train_curves',\n",
+    "    \n",
+    "    'img_file': 'figures/dqn_seq_del_train_learning_curves_episode_reward_mean.pdf',\n",
+    "}\n",
+    "generate_figure_section(doc, fig_data)\n",
+    "\n",
+    "# sample example with sub figures\n",
+    "sub_fig_data = {\n",
+    "    'fig_caption': r'Mean episodic reward (limited to 100 timesteps) at the end of training for three different tabular baseline algorithms \\textbf{when varying reward delay}. Error bars represent 1 standard deviation.',\n",
+    "    'fig_caption_option': r' Mean episodic reward at the end of training ',\n",
+    "    'fig_label': r'fig:tabular_delay',\n",
+    "    \n",
+    "    'sub_figs' : [\n",
+    "        {\n",
+    "            'fig_caption': r'{\\small Q-Learning}',\n",
+    "            'fig_caption_option': r'Q-Learning',\n",
+    "            'fig_label': r'fig:q_learn_tabular_del_train_final_reward_delay_episode_reward_mean_1d',\n",
+    "            'img_file': 'figures/q_learn_tabular_del_train_final_reward_delay_episode_reward_mean_1d.pdf',\n",
+    "        },\n",
+    "        {\n",
+    "            'img_file': 'figures/sarsa_tabular_del_train_final_reward_delay_episode_reward_mean_1d.pdf',\n",
+    "            'fig_caption': r'{\\small SARSA}',\n",
+    "            'fig_caption_option': r'SARSA',\n",
+    "        },\n",
+    "        {\n",
+    "            'img_file': 'figures/double_q_learn_tabular_del_train_final_reward_delay_episode_reward_mean_1d.pdf',\n",
+    "            'fig_caption': r'{\\small Double Q-Learning}',\n",
+    "            'fig_caption_option': r'Double Q-Learning',\n",
+    "        },\n",
+    "    ],\n",
+    "}\n",
+    "generate_figure_section(doc, sub_fig_data)\n",
+    "\n",
+    "doc.generate_tex()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}