automl · May 30, 2021
diff --git a/‎README.md
+7-7 b/‎README.md
+7-7
diff --git a/‎default_config.py
-5 b/‎default_config.py
-5
diff --git a/‎docs/_autosummary/mdp_playground.envs.mujoco_env_wrapper.rst b/‎docs/_autosummary/mdp_playground.envs.mujoco_env_wrapper.rst
diff --git a/‎docs/_autosummary/mdp_playground.spaces.test_image_multi_discrete.rst b/‎docs/_autosummary/mdp_playground.spaces.test_image_multi_discrete.rst
diff --git a/‎example.py
+2-2 b/‎example.py
+2-2
diff --git a/‎experiments/dqn_p_noises_various_lrs.py
+97 b/‎experiments/dqn_p_noises_various_lrs.py
+97
diff --git a/‎experiments/dqn_r_noises_various_lrs.py
+98 b/‎experiments/dqn_r_noises_various_lrs.py
+98
diff --git a/‎experiments/dqn_space_invaders_r_noise.py
+3-3 b/‎experiments/dqn_space_invaders_r_noise.py
+3-3
diff --git a/‎experiments/dqn_space_invaders_r_noise_various_lrs.py
+123 b/‎experiments/dqn_space_invaders_r_noise_various_lrs.py
+123
diff --git a/‎experiments/dqn_test_expt.py
-3 b/‎experiments/dqn_test_expt.py
-3
diff --git a/‎experiments/rainbow_hydra.py
+15-129 b/‎experiments/rainbow_hydra.py
+15-129
diff --git a/‎experiments/rainbow_hydra_qbert.py
+11-22 b/‎experiments/rainbow_hydra_qbert.py
+11-22
diff --git a/‎experiments/rainbow_random_agent_configs_qbert.py
+145 b/‎experiments/rainbow_random_agent_configs_qbert.py
+145
diff --git a/‎mdp_playground/analysis/analysis.py
+24-11 b/‎mdp_playground/analysis/analysis.py
+24-11
diff --git a/‎mdp_playground/config_processor/config_processor.py
+353-104 b/‎mdp_playground/config_processor/config_processor.py
+353-104
diff --git a/‎mdp_playground/envs/gym_env_wrapper.py
+7-3 b/‎mdp_playground/envs/gym_env_wrapper.py
+7-3
diff --git a/‎mdp_playground/envs/rl_toy_env.py
+81-105 b/‎mdp_playground/envs/rl_toy_env.py
+81-105
diff --git a/‎mdp_playground/spaces/__init__.py
+3-1 b/‎mdp_playground/spaces/__init__.py
+3-1
diff --git a/‎mdp_playground/spaces/image_continuous.py
+194 b/‎mdp_playground/spaces/image_continuous.py
+194
diff --git a/‎mdp_playground/spaces/image_multi_discrete.py
+8-5 b/‎mdp_playground/spaces/image_multi_discrete.py
+8-5
diff --git a/‎mdp_playground/spaces/test_image_continuous.py
+62 b/‎mdp_playground/spaces/test_image_continuous.py
+62
diff --git a/‎mdp_playground/spaces/test_image_multi_discrete.py
+3-1 b/‎mdp_playground/spaces/test_image_multi_discrete.py
+3-1
diff --git a/‎plot_experiments.ipynb
+93-4 b/‎plot_experiments.ipynb
+93-4
diff --git a/‎run_experiments.py
+16-8 b/‎run_experiments.py
+16-8
diff --git a/‎run_experiments_on_cluster_nemo.sh
+8-10 b/‎run_experiments_on_cluster_nemo.sh
+8-10
diff --git a/‎setup.py
+1-1 b/‎setup.py
+1-1
diff --git a/‎tests/test_mdp_playground.py
+63 b/‎tests/test_mdp_playground.py
+63
@@ -133,12 +133,12 @@ To plot results from experiments, run `jupyter-notebook` and open [`plot_experim
 If you use MDP Playground in your work, please cite the following paper:
 
 ```bibtex
-@article{rajan2019mdp,
-    title={MDP Playground: Meta-Features in Reinforcement Learning},
-    author={Raghu Rajan and Frank Hutter},
-    year={2019},
-    eprint={1909.07750},
-    archivePrefix={arXiv},
-    primaryClass={cs.LG}
+@article{rajan2020mdp,
+      title={MDP Playground: Controlling Dimensions of Hardness in Reinforcement Learning},
+      author={Raghu Rajan and Jessica Lizeth Borja Diaz and Suresh Guttikonda and Fabio Ferreira and André Biedenkapp and Frank Hutter},
+      year={2020},
+      eprint={1909.07750},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG}
 }
 ```
@@ -76,8 +76,3 @@
         "lstm_use_prev_action_reward": False,
     },
 }
-
-varying_configs = get_grid_of_configs(var_configs)
-# print("VARYING_CONFIGS:", varying_configs)
-
-final_configs = combined_processing(env_config, agent_config, model_config, eval_config, varying_configs=varying_configs, framework='ray', algorithm='SAC')
@@ -25,7 +25,7 @@ def discrete_environment_example():
     config["seed"] = 0
 
     config["state_space_type"] = "discrete"
-    config["state_space_size"] = 8
+    config["action_space_size"] = 8
     config["delay"] = 1
     config["sequence_length"] = 3
     config["reward_scale"] = 2.5
@@ -59,7 +59,7 @@ def discrete_environment_image_representations_example():
     config["seed"] = 0
 
     config["state_space_type"] = "discrete"
-    config["state_space_size"] = 8
+    config["action_space_size"] = 8
     config["image_representations"] = True
     config["delay"] = 1
     config["sequence_length"] = 3
 
@@ -0,0 +1,97 @@
+timesteps_total = 20_000
+num_seeds = 10
+from collections import OrderedDict
+var_env_configs = OrderedDict({
+    'state_space_size': [8],#, 10, 12, 14] # [2**i for i in range(1,6)]
+    'action_space_size': [8],#2, 4, 8, 16] # [2**i for i in range(1,6)]
+    'delay': [0],
+    'sequence_length': [1],#i for i in range(1,4)]
+    'reward_density': [0.25], # np.linspace(0.0, 1.0, num=5)
+    'make_denser': [False],
+    'terminal_state_density': [0.25], # np.linspace(0.1, 1.0, num=5)
+    'transition_noise': [0, 0.01, 0.02, 0.10, 0.25],
+    'reward_noise': [0], # Std dev. of normal dist.
+    'dummy_seed': [i for i in range(num_seeds)],
+})
+
+import numpy as np
+var_agent_configs = OrderedDict({
+    "lr": list(np.power(10.,np.linspace(-1, -6, 16))), # "lr": grid_search([1e-2, 1e-4, 1e-6]),
+})
+
+
+var_configs = OrderedDict({
+"env": var_env_configs,
+"agent": var_agent_configs,
+})
+
+env_config = {
+    "env": "RLToy-v0",
+    "horizon": 100,
+    "env_config": {
+        'seed': 0, #seed
+        'state_space_type': 'discrete',
+        'action_space_type': 'discrete',
+        'generate_random_mdp': True,
+        'repeats_in_sequences': False,
+        'reward_scale': 1.0,
+        'completely_connected': True,
+    },
+}
+
+algorithm = "DQN"
+agent_config = {
+    "adam_epsilon": 1e-4,
+    "beta_annealing_fraction": 1.0,
+    "buffer_size": 20_000,
+    "double_q": False,
+    "dueling": False,
+    "exploration_final_eps": 0.01,
+    "exploration_fraction": 0.1,
+    "final_prioritized_replay_beta": 1.0,
+    "hiddens": None,
+    "learning_starts": 1000,
+    # "lr": 1e-4, # "lr": grid_search([1e-2, 1e-4, 1e-6]),
+    "n_step": 1,
+    "noisy": False,
+    "num_atoms": 1,
+    "prioritized_replay": False,
+    "prioritized_replay_alpha": 0.5,
+    "sample_batch_size": 4,
+    "schedule_max_timesteps": 20000,
+    "target_network_update_freq": 800,
+    "timesteps_per_iteration": 1000,
+    "min_iter_time_s": 0,
+    "train_batch_size": 32,
+}
+
+model_config = {
+    "model": {
+        "fcnet_hiddens": [256, 256],
+        "custom_preprocessor": "ohe",
+        "custom_options": {},  # extra options to pass to your preprocessor
+        "fcnet_activation": "tanh",
+        "use_lstm": False,
+        "max_seq_len": 20,
+        "lstm_cell_size": 256,
+        "lstm_use_prev_action_reward": False,
+    },
+}
+
+from ray import tune
+eval_config = {
+    "evaluation_interval": 1, # I think this means every x training_iterations
+    "evaluation_config": {
+        "explore": False,
+        "exploration_fraction": 0,
+        "exploration_final_eps": 0,
+        "evaluation_num_episodes": 10,
+        "horizon": 100,
+        "env_config": {
+            "dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
+            'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
+            'reward_noise': tune.function(lambda a: a.normal(0, 0)),
+            'action_loss_weight': 0.0,
+        }
+    },
+}
@@ -0,0 +1,98 @@
+timesteps_total = 20_000
+num_seeds = 10
+from collections import OrderedDict
+var_env_configs = OrderedDict({
+    'state_space_size': [8],#, 10, 12, 14] # [2**i for i in range(1,6)]
+    'action_space_size': [8],#2, 4, 8, 16] # [2**i for i in range(1,6)]
+    'delay': [0],
+    'sequence_length': [1],#i for i in range(1,4)]
+    'reward_density': [0.25], # np.linspace(0.0, 1.0, num=5)
+    'make_denser': [False],
+    'terminal_state_density': [0.25], # np.linspace(0.1, 1.0, num=5)
+    'transition_noise': [0],
+    'reward_noise': [0, 1, 5, 10, 25], # Std dev. of normal dist.
+    'dummy_seed': [i for i in range(num_seeds)],
+})
+
+import numpy as np
+var_agent_configs = OrderedDict({
+    "lr": [1e-4] #list(np.power(10.,np.linspace(-1, -6, 16))), # "lr": grid_search([1e-2, 1e-4, 1e-6]),
+})
+
+
+var_configs = OrderedDict({
+"env": var_env_configs,
+"agent": var_agent_configs,
+})
+
+env_config = {
+    "env": "RLToy-v0",
+    "horizon": 100,
+    "env_config": {
+        'seed': 0, #seed
+        'state_space_type': 'discrete',
+        'action_space_type': 'discrete',
+        'generate_random_mdp': True,
+        'repeats_in_sequences': False,
+        'reward_scale': 1.0,
+        'completely_connected': True,
+    },
+}
+
+algorithm = "DQN"
+agent_config = {
+    "adam_epsilon": 1e-4,
+    "beta_annealing_fraction": 1.0,
+    "buffer_size": 20_000,
+    'clip_rewards': False,
+    "double_q": False,
+    "dueling": False,
+    "exploration_final_eps": 0.01,
+    "exploration_fraction": 0.1,
+    "final_prioritized_replay_beta": 1.0,
+    "hiddens": None,
+    "learning_starts": 1000,
+    # "lr": 1e-4, # "lr": grid_search([1e-2, 1e-4, 1e-6]),
+    "n_step": 1,
+    "noisy": False,
+    "num_atoms": 1,
+    "prioritized_replay": False,
+    "prioritized_replay_alpha": 0.5,
+    "sample_batch_size": 4,
+    "schedule_max_timesteps": 20000,
+    "target_network_update_freq": 800,
+    "timesteps_per_iteration": 1000,
+    "min_iter_time_s": 0,
+    "train_batch_size": 32,
+}
+
+model_config = {
+    "model": {
+        "fcnet_hiddens": [256, 256],
+        "custom_preprocessor": "ohe",
+        "custom_options": {},  # extra options to pass to your preprocessor
+        "fcnet_activation": "tanh",
+        "use_lstm": False,
+        "max_seq_len": 20,
+        "lstm_cell_size": 256,
+        "lstm_use_prev_action_reward": False,
+    },
+}
+
+from ray import tune
+eval_config = {
+    "evaluation_interval": 1, # I think this means every x training_iterations
+    "evaluation_config": {
+        "explore": False,
+        "exploration_fraction": 0,
+        "exploration_final_eps": 0,
+        "evaluation_num_episodes": 10,
+        "horizon": 100,
+        "env_config": {
+            "dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
+            'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
+            'reward_noise': tune.function(lambda a: a.normal(0, 0)),
+            'action_loss_weight': 0.0,
+        }
+    },
+}
@@ -3,7 +3,7 @@
 from collections import OrderedDict
 import numpy as np
 var_env_configs = OrderedDict({
-    'reward_noise': list(np.array([0, 1, 5, 10, 25])/100), # Std dev. of normal dist.
+    'reward_noise': list(np.array([0, 1, 5, 10, 25, 50, 100, 200, 400, 800])/100), # Std dev. of normal dist.
     'dummy_seed': [i for i in range(num_seeds)],
 })
 
@@ -35,15 +35,15 @@
 agent_config = { # Taken from Ray tuned_examples
     'adam_epsilon': 0.00015,
     'buffer_size': 500000,
-    'clip_rewards': True,
+    'clip_rewards': False,
     'double_q': False,
     'dueling': False,
     'exploration_config': {   'epsilon_timesteps': 200000,
                            'final_epsilon': 0.01},
     'final_prioritized_replay_beta': 1.0,
     'hiddens': [512],
     'learning_starts': 20000,
-    'lr': 6.25e-05,
+    # 'lr': 6.25e-05,
     'n_step': 1,
     'noisy': False,
     'num_atoms': 1,
 
@@ -0,0 +1,123 @@
+num_seeds = 5
+timesteps_total = 10_000_000
+from collections import OrderedDict
+import numpy as np
+var_env_configs = OrderedDict({
+    'reward_noise': list(np.array([0])/100), # Std dev. of normal dist.
+    'dummy_seed': [i for i in range(num_seeds)],
+})
+
+import numpy as np
+var_agent_configs = OrderedDict({
+    "lr": list(np.power(10.,np.linspace(-1, -6, 11))), # "lr": grid_search([1e-2, 1e-4, 1e-6]),
+})
+
+var_configs = OrderedDict({
+"env": var_env_configs,
+"agent": var_agent_configs,
+})
+
+env_config = {
+    "env": "GymEnvWrapper-Atari",
+    "env_config": {
+        "AtariEnv": {
+            "game": "space_invaders",
+            'obs_type': 'image',
+            'frameskip': 1,
+        },
+        # "GymEnvWrapper": {
+        "atari_preprocessing": True,
+        'frame_skip': 4,
+        'grayscale_obs': False,
+        'state_space_type': 'discrete',
+        'action_space_type': 'discrete',
+        'seed': 0,
+        # },
+        # 'seed': 0, #seed
+    },
+}
+
+algorithm = "DQN"
+agent_config = { # Taken from Ray tuned_examples
+    'adam_epsilon': 0.00015,
+    'buffer_size': 500000,
+    'clip_rewards': False,
+    'double_q': False,
+    'dueling': False,
+    'exploration_config': {   'epsilon_timesteps': 200000,
+                           'final_epsilon': 0.01},
+    'final_prioritized_replay_beta': 1.0,
+    'hiddens': [512],
+    'learning_starts': 20000,
+    # 'lr': 6.25e-05,
+    'n_step': 1,
+    'noisy': False,
+    'num_atoms': 1,
+    'num_gpus': 0,
+    'num_workers': 3,
+    'prioritized_replay': False,
+    'prioritized_replay_alpha': 0.5,
+    'prioritized_replay_beta_annealing_timesteps': 2000000,
+    'rollout_fragment_length': 4,
+    'target_network_update_freq': 8000,
+    'timesteps_per_iteration': 10000,
+    'train_batch_size': 32,
+    "tf_session_args": {
+        # note: overriden by `local_tf_session_args`
+        "intra_op_parallelism_threads": 4,
+        "inter_op_parallelism_threads": 4,
+        # "gpu_options": {
+        #     "allow_growth": True,
+        # },
+        # "log_device_placement": False,
+        "device_count": {
+            "CPU": 2
+        },
+        # "allow_soft_placement": True,  # required by PPO multi-gpu
+    },
+    # Override the following tf session args on the local worker
+    "local_tf_session_args": {
+        "intra_op_parallelism_threads": 4,
+        "inter_op_parallelism_threads": 4,
+    },
+
+}
+
+
+model_config = {
+    # "model": {
+    #     "fcnet_hiddens": [256, 256],
+    #     "fcnet_activation": "tanh",
+    #     "use_lstm": False,
+    #     "max_seq_len": 20,
+    #     "lstm_cell_size": 256,
+    #     "lstm_use_prev_action_reward": False,
+    # },
+}
+
+from ray import tune
+eval_config = {
+    "evaluation_interval": None, # I think this means every x training_iterations
+    "evaluation_config": {
+        "explore": False,
+        "exploration_fraction": 0,
+        "exploration_final_eps": 0,
+        "evaluation_num_episodes": 10,
+        # "horizon": 100,
+        "env_config": {
+            "dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
+            'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
+            'reward_noise': tune.function(lambda a: a.normal(0, 0)),
+            'action_loss_weight': 0.0,
+        }
+    },
+}
+value_tuples = []
+for config_type, config_dict in var_configs.items():
+    for key in config_dict:
+        assert type(var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
+        value_tuples.append(var_configs[config_type][key])
+
+import itertools
+cartesian_product_configs = list(itertools.product(*value_tuples))
+print("Total number of configs. to run:", len(cartesian_product_configs))
@@ -1,6 +1,3 @@
-from mdp_playground.config_processor import *
-
-# framework = 'ray'
 timesteps_total = 10_000
 num_seeds = 10
 from collections import OrderedDict
 
@@ -1,12 +1,10 @@
 num_seeds = 1
 timesteps_total = 20_000
-num_agent_configs = 1000
-num_prob_inst = 1000
+num_configs = 1000
 
-import numpy as np
 from collections import OrderedDict
 
-var_env_configs = OrderedDict({
+sobol_env_configs = OrderedDict({
     'action_space_size': (8,),#, 10, 12, 14] # [2**i for i in range(1,6)]
     # 'action_space_size': (64),#2, 4, 8, 16] # [2**i for i in range(1,6)]
     'delay': "cat, " + str([i for i in range(11)]), # + [2**i for i in range(4)],
@@ -17,84 +15,13 @@
     'terminal_state_density': (0.25,), # np.linspace(0.1, 1.0, num=5)
     'reward_dist': "float, [0.01, 0.8]",
     'reward_scale': "float, log, [0.1, 100]",
-    'dummy_seed': (0,), #"cat, " + str([i for i in range(num_seeds)]),
+    'dummy_seed': (0,), # "cat, " + str([i for i in range(num_seeds)]), #seed
 })
 
 
-print(var_env_configs)
-cartesian_product_configs = []
-def sobol_configs_from_config_dict(config_dict):
-    '''
-    '''
+print(sobol_env_configs)
 
-    num_dims = 0
-    for key in config_dict:
-        val = config_dict[key]
-        if type(val) == tuple: # i.e. a constant value
-            pass
-        else: # i.e. a variable value
-            num_dims += 1
-
-    print("Generating sobol sequence with " + str(num_prob_inst) + " and " + str(num_dims) + " dimensions:")
-
-    from scipy.optimize._shgo_lib.sobol_seq import Sobol # Only generates real vectors in range 0 to 1 per dimension
-    import json
-    sobol_gen = Sobol()
-    sobol = sobol_gen.i4_sobol_generate(num_dims, num_prob_inst, skip=0)
-    print(sobol)
-
-    for sample in sobol:
-        # print(sample)
-        cartesian_product_configs.append({}) # new config
-        j = 0
-        for key in config_dict:
-            val = config_dict[key]
-            if type(val) == tuple: # i.e. a constant value
-                cartesian_product_configs[-1][key] = val[0]
-            # The rest are config spaces for param settings
-            elif "int" in val:
-                lower = float(val.split("[")[1].split(",")[0].strip())
-                upper = float(val.split("]")[0].split(",")[-1].strip())
-                log = True if "log" in val else False
-                #TODO log vals
-                sobol_val = lower + (upper - lower) * sample[j]
-                cartesian_product_configs[-1][key] = int(sobol_val)
-                j += 1
-            elif "float" in val:
-                lower = float(val.split("[")[1].split(",")[0].strip())
-                upper = float(val.split("]")[0].split(",")[-1].strip())
-                log = True if "log" in val else False
-                if log:
-                    lower = np.log(lower)
-                    upper = np.log(upper)
-                sobol_val = lower + (upper - lower) * sample[j]
-                if log:
-                    sobol_val = np.exp(sobol_val)
-                if key == "reward_dist":
-                    sobol_val = [sobol_val, 1.0]
-                cartesian_product_configs[-1][key] = sobol_val
-                j += 1
-            elif "cat" in val:
-                choices = json.loads("[" + val.split("[")[1].split("]")[0] + "]") # Seems faster than ast.literal_eval (See https://stackoverflow.com/questions/1894269/how-to-convert-string-representation-of-list-to-a-list)
-                len_c = len(choices)
-                if sample[j] == 1.0: #TODO remove? Don't know if sobol samples include 1.0
-                    sample[j] -= 1e-10
-                index = int(sample[j] * len_c)
-                cartesian_product_configs[-1][key] = choices[index]
-                j += 1
-
-
-
-sobol_configs_from_config_dict(var_env_configs)
-# import pprint
-# pp = pprint.PrettyPrinter(indent=4)
-
-for i, conf in enumerate(cartesian_product_configs):
-    cartesian_product_configs[i] = tuple(conf.values()) #hack
-    # print(conf)
-    # pp.pprint(cartesian_product_configs[i])
-
-var_agent_configs = OrderedDict({
+random_agent_configs = OrderedDict({
 
     "lr": "float, log, [1e-5, 1e-3]", # 1e-4
     "learning_starts": "int, [1, 2000]", # 500
@@ -107,53 +34,22 @@ def sobol_configs_from_config_dict(config_dict):
 
 })
 
-var_agent_configs = OrderedDict(sorted(var_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above.
+random_agent_configs = OrderedDict(sorted(random_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above, which would create problems with config processing.
 
-def create_config_space_from_config_dict(config_dict):
-    '''
-    '''
-    import ConfigSpace as CS
-    cs = CS.ConfigurationSpace(seed=1234)
-    import ConfigSpace.hyperparameters as CSH
-    import json
 
-    for key in config_dict:
-        val = config_dict[key]
-        if "int" in val:
-            lower = int(val.split("[")[1].split(",")[0].strip())
-            upper = int(val.split("]")[0].split(",")[-1].strip())
-            log = True if "log" in val else False
-            cs.add_hyperparameter(CSH.UniformIntegerHyperparameter(name=key, lower=lower, upper=upper, log=log))
-        elif "float" in val:
-            lower = float(val.split("[")[1].split(",")[0].strip())
-            upper = float(val.split("]")[0].split(",")[-1].strip())
-            log = True if "log" in val else False
-            cs.add_hyperparameter(CSH.UniformFloatHyperparameter(name=key, lower=lower, upper=upper, log=log))
-        elif "cat" in val:
-            choices = json.loads("[" + val.split("[")[1].split("]")[0] + "]") # Seems faster than ast.literal_eval (See https://stackoverflow.com/questions/1894269/how-to-convert-string-representation-of-list-to-a-list)
-            cs.add_hyperparameter(CSH.CategoricalHyperparameter(name=key, choices=choices))
-            # print(type(CSH.CategoricalHyperparameter(name=key, choices=choices).choices[0]))
+random_configs = OrderedDict({
+"env": {},
+"agent": random_agent_configs,
 
-    return cs
+})
 
-cs = create_config_space_from_config_dict(var_agent_configs)
-print("Agent variable ConfigSpace:")
-print(cs)
-random_configs = cs.sample_configuration(size=num_agent_configs)
-# print("type(random_configs):", type(random_configs))
-for i in range(len(random_configs)):
-    # if random_configs[i].get_dictionary()["train_batch_size"] == 4 \
-    # and random_configs[i].get_dictionary()["buffer_size"] < 33:
-    #     print("Config:", i, "train_batch_size, buffer_size:", random_configs[i].get_dictionary()["train_batch_size"], random_configs[i].get_dictionary()["buffer_size"])
-    random_configs[i] = tuple(random_configs[i].get_dictionary().values()) #hack ####TODO Change run_experiments.py and here to directly pass whole config dict to run_experiments.py. Would need to replace in every config.py file.
-# print(random_configs)
 
-var_configs = OrderedDict({
-"env": var_env_configs,
-"agent": var_agent_configs,
+sobol_configs = OrderedDict({
+"env": sobol_env_configs,
 
 })
 
+
 env_config = {
     "env": "RLToy-v0",
     "horizon": 100,
@@ -163,7 +59,7 @@ def create_config_space_from_config_dict(config_dict):
         'action_space_type': 'discrete',
         'generate_random_mdp': True,
         'repeats_in_sequences': False,
-        'reward_scale': 1.0,
+        # 'reward_scale': 1.0,
         'completely_connected': True,
     },
 }
@@ -226,7 +122,7 @@ def create_config_space_from_config_dict(config_dict):
         "custom_preprocessor": "ohe",
         "custom_options": {},  # extra options to pass to your preprocessor
         "fcnet_activation": "tanh",
-        "use_lstm": False,
+        # "use_lstm": False,
         "max_seq_len": 20,
         "lstm_cell_size": 256,
         "lstm_use_prev_action_reward": False,
@@ -250,13 +146,3 @@ def create_config_space_from_config_dict(config_dict):
         }
     },
 }
-
-# value_tuples = []
-# for config_type, config_dict in var_configs.items():
-#     for key in config_dict:
-#         assert type(var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
-#         value_tuples.append(var_configs[config_type][key])
-#
-# import itertools
-# cartesian_product_configs = list(itertools.product(*value_tuples))
-# print("Total number of configs. to run:", len(cartesian_product_configs))
@@ -2,27 +2,18 @@
 timesteps_total = 10_000_000
 from collections import OrderedDict
 
-var_env_configs = OrderedDict({
+sobol_env_configs = OrderedDict({
     'delay': [0],
     'dummy_seed': [i for i in range(num_seeds)],
 })
 
-var_configs = OrderedDict({
-"env": var_env_configs
-})
-
-value_tuples = []
-for config_type, config_dict in var_configs.items():
-    for key in config_dict:
-        assert type(var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
-        value_tuples.append(var_configs[config_type][key])
+sobol_configs = OrderedDict({
+"env": sobol_env_configs
 
-import itertools
-cartesian_product_configs = list(itertools.product(*value_tuples))
-print("Total number of grid configs. to run:", len(cartesian_product_configs))
+})
 
 
-var_agent_configs = OrderedDict({
+random_agent_configs = OrderedDict({
 
     "lr": "float, log, [1e-5, 1e-3]", # 1e-4
     "learning_starts": "int, [1, 2000]", # 500
@@ -35,7 +26,7 @@
 
 })
 
-var_agent_configs = OrderedDict(sorted(var_agent_configs.items(), key=lambda t: t[0])) #hack because saved configs used below as random_configs are ordered alphabetically.
+random_agent_configs = OrderedDict(sorted(random_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above, which would create problems with config processing.
 
 random_configs = \
 [(1.86e-12, 1480, 0.0697, 311, 0.000545, 8, 1845, 64), # top 10 configs begin from here
@@ -59,14 +50,12 @@
  (0.0133, 6541, 0.218, 1393, 1.21e-05, 1, 3, 16),
  (0.0515, 507, 0.48100000000000004, 1866, 1.23e-05, 3, 136, 128)]
 
-for i in range(len(random_configs)):
-    random_configs[i] = tuple(random_configs[i]) ##IMP I think these are tuples because cartesian_product_configs by default has tuples.
 
-var_configs = OrderedDict({
-"env": var_env_configs,
-"agent": var_agent_configs,
-
-})
+# var_configs = OrderedDict({
+# "env": var_env_configs,
+# "agent": var_agent_configs,
+#
+# })
 
 env_config = {
     "env": "GymEnvWrapper-Atari",
 
@@ -0,0 +1,145 @@
+num_seeds = 1
+timesteps_total = 10_000_000
+num_configs = 100
+
+from collections import OrderedDict
+
+random_agent_configs = OrderedDict({
+
+    "lr": "float, log, [1e-5, 1e-3]", # 1e-4
+    "learning_starts": "int, [10, 20000]", # 500
+    "target_network_update_freq": "int, log, [10, 10000]", # 800,
+    "exploration_fraction": "float, [0.01, 0.99]", # 0.1,
+    "n_step": "int, [1, 16]", # 1
+    "buffer_size": "int, log, [333, 500000]", # ?? 1000000, # Sizes up to 32 crashed with Ray 0.7.3 (but not always!), size 16 did not crash with Ray 0.9.0dev
+    "adam_epsilon": "float, log, [1e-12, 1e-1]", # ?? 1e-4,
+    "train_batch_size": "cat, [4, 8, 16, 32, 64, 128]", # 32,
+
+})
+
+random_agent_configs = OrderedDict(sorted(random_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above, which would create problems with config processing.
+
+
+random_configs = OrderedDict({
+"env": {},
+"agent": random_agent_configs,
+
+})
+
+
+# These are currently needed to write dummy_seed to stats CSV. A seed column is
+# needed for data loading
+
+sobol_env_configs = OrderedDict({
+    'dummy_seed': (0,), # "cat, " + str([i for i in range(num_seeds)]), #seed
+})
+
+# print(sobol_env_configs)
+
+sobol_configs = OrderedDict({
+"env": sobol_env_configs,
+
+})
+
+
+env_config = {
+    "env": "GymEnvWrapper-Atari",
+    "env_config": {
+        "AtariEnv": {
+            "game": 'qbert',
+            'obs_type': 'image',
+            'frameskip': 1,
+        },
+        # "GymEnvWrapper": {
+        "atari_preprocessing": True,
+        'frame_skip': 4,
+        'grayscale_obs': False,
+        'state_space_type': 'discrete',
+        'action_space_type': 'discrete',
+        'seed': 0,
+        # },
+        # 'seed': 0, #seed
+    },
+}
+
+algorithm = "DQN"
+agent_config = {
+    # "adam_epsilon": 1e-4,
+    # "buffer_size": 1000000,
+    "double_q": True,
+    "dueling": True,
+    # "lr": 1e-3,
+    "exploration_final_eps": 0.01,
+    # "exploration_fraction": 0.1,
+    "schedule_max_timesteps": 10_000_000,
+    # "learning_starts": 500,
+    # "target_network_update_freq": 800,
+    # "n_step": 4,
+    "noisy": False,
+    "num_atoms": 10, # [5, 10, 20]
+    "prioritized_replay": True,
+    "prioritized_replay_alpha": 0.75, #
+    "prioritized_replay_beta": 0.4,
+    "final_prioritized_replay_beta": 1.0, #
+    "beta_annealing_fraction": 1.0, #
+    # "hiddens": None,
+    'hiddens': [512],
+
+    "sample_batch_size": 4,
+    "timesteps_per_iteration": 10000,
+    # "train_batch_size": 32,
+    "min_iter_time_s": 0,
+
+    'num_gpus': 0,
+    "num_workers": 3, # extra workers I think
+    # "num_cpus_for_driver": 2,
+
+    "tf_session_args": {
+        # note: overriden by `local_tf_session_args`
+        "intra_op_parallelism_threads": 4,
+        "inter_op_parallelism_threads": 4,
+        # "gpu_options": {
+        #     "allow_growth": True,
+        # },
+        # "log_device_placement": False,
+        "device_count": {
+            "CPU": 2
+        },
+        # "allow_soft_placement": True,  # required by PPO multi-gpu
+    },
+    # Override the following tf session args on the local worker
+    "local_tf_session_args": {
+        "intra_op_parallelism_threads": 4,
+        "inter_op_parallelism_threads": 4,
+    },
+
+}
+
+model_config = {
+    # "model": {
+    #     "fcnet_hiddens": [256, 256],
+    #     "fcnet_activation": "tanh",
+    #     "use_lstm": False,
+    #     "max_seq_len": 20,
+    #     "lstm_cell_size": 256,
+    #     "lstm_use_prev_action_reward": False,
+    # },
+}
+
+from ray import tune
+eval_config = {
+    "evaluation_interval": None, # I think this means every x training_iterations
+    "evaluation_config": {
+        "explore": False,
+        "exploration_fraction": 0,
+        "exploration_final_eps": 0,
+        "evaluation_num_episodes": 10,
+        "horizon": 100,
+        "env_config": {
+            "dummy_eval": True, #hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
+            'transition_noise': 0 if "state_space_type" in env_config["env_config"] and env_config["env_config"]["state_space_type"] == "discrete" else tune.function(lambda a: a.normal(0, 0)),
+            'reward_noise': tune.function(lambda a: a.normal(0, 0)),
+            'action_loss_weight': 0.0,
+        }
+    },
+}
@@ -43,7 +43,9 @@ def load_data(self, experiments: dict, load_eval=True, exp_type='grid'):
             list_exp_data.append(exp_data)
         return list_exp_data
 
-    def get_exp_data(self, dir_name, exp_name, exp_type='grid', num_metrics=3, load_eval=True, threshold=0.05, sample_freq=1): #, max_total_configs=200):
+    def get_exp_data(self, dir_name, exp_name, exp_type='grid', num_metrics=3,
+                    load_eval=True, threshold=0.05, sample_freq=1):
+                    #, max_total_configs=200):
         '''Get training and evaluation data from a single set of recorded CSV stats files.
 
         Parameters
@@ -147,29 +149,37 @@ def join_files(file_prefix, file_suffix):
 
         config_counts = []
         dims_values = []
-        #Keep only config_names that we wan't to measure
-        #traning iteration is always first, metrics are always last.
+        # Keep only config_names that we want to measure
+        # traning iteration is always first, metrics are always last.
         self.full_config_names = col_names.copy()
         full_config_names = self.full_config_names
         full_config_names.remove("training_iteration")
 
-        # mean_vals = [ np.mean(stats_pd.loc[stats_pd['target_network_update_freq'] == val]["episode_reward_mean"])
-        #                 for val in stats_pd["target_network_update_freq"].unique() ]
 
         #config counts includes seed
-        self.seed_idx = -1
+        self.seed_idx = None # seed used to be fixed as the last, i.e.,
+        # quickest varying dimension in the <experiment config>.py file's
+        # config space because then all runs on a single env would be recorded
+        # consecutively in the stats CSV
+        self.ts_idx = None
         for i, c in enumerate(full_config_names[:-num_metrics]):
             dims_values.append(stats_pd[c].unique())
             config_counts.append(stats_pd[c].nunique())
-            if("seed" in c): ##TODO this will just set seed index to be the "last" column name with seed in it.
+            if("seed" in c): # ##TODO this will just set seed index to be
+            # the "last" column name with seed in it.
                 self.seed_idx = i
+            if c == "timesteps_total":
+                self.ts_idx = i
 
-        config_counts.append(num_metrics) #hardcoded number of training stats that were recorded
+
+        config_counts.append(num_metrics) # #hardcoded number of training
+        # stats that were recorded
         config_counts = tuple(config_counts)
         self.metric_names = full_config_names[-num_metrics:]
         self.config_names = full_config_names[:-num_metrics]
 
-        # Slice into training stats and get end of training stats for individual training runs in the experiment
+        # Slice into training stats and get end of training stats for
+        # individual training runs in the experiment
         final_rows_for_a_config = []
         previous_i = 0
         list_of_learning_curves = []
@@ -365,6 +375,7 @@ def join_files(file_prefix, file_suffix):
         exp_data['eval_aucs'] = eval_aucs
 
         # related to plots
+        # #TODO Remove the self from these since they are per expt. variables?
         exp_data['metric_names'] = self.metric_names
         exp_data['tick_labels'] = self.tick_labels
         exp_data['axis_labels'] = self.axis_labels
@@ -376,6 +387,7 @@ def join_files(file_prefix, file_suffix):
         exp_data['config_names'] = self.config_names
         exp_data['dims_values'] = self.dims_values
         exp_data['seed_idx'] = self.seed_idx
+        exp_data['ts_idx'] = self.ts_idx
 
         return exp_data
 
@@ -684,6 +696,7 @@ def plot_learning_curves(self, list_exp_data, save_fig=False, train=True, metric
         #HACK
         if len(list_exp_data) > 0:
             exp_data = list_exp_data[0] #TODO make changes to handle multiple experiments plot
+            warnings.warn("Using only 1st expt. for the foll. plots")
         else:
             return
 
@@ -722,10 +735,10 @@ def plot_learning_curves(self, list_exp_data, save_fig=False, train=True, metric
             j_index = (i//nseeds_) % ncols_ #
             if i == 0:
                 to_plot_ = stats_data[0:final_rows_for_a_config[i]+1, metric_num]
-                to_plot_x = stats_data[0:final_rows_for_a_config[i]+1,-3]
+                to_plot_x = stats_data[0:final_rows_for_a_config[i]+1, exp_data['ts_idx']]
             else:
                 to_plot_ = stats_data[final_rows_for_a_config[i-1]+1:final_rows_for_a_config[i]+1, metric_num]
-                to_plot_x = stats_data[final_rows_for_a_config[i-1]+1:final_rows_for_a_config[i]+1, -3]
+                to_plot_x = stats_data[final_rows_for_a_config[i-1]+1:final_rows_for_a_config[i]+1, exp_data['ts_idx']]
             # print(to_plot_[-1])
         #     if i % 10 == 0:
         #         fig = plt.figure(figsize=(12, 7))
 
@@ -63,9 +63,13 @@ def __init__(self, env, **config):
                 self.transition_noise = lambda a: 0.0
 
         if "reward_noise" in config:
-            self.reward_noise = config["reward_noise"]
+            if callable(config["reward_noise"]):
+                self.reward_noise = config["reward_noise"]
+            else:
+                reward_noise_std = config["reward_noise"]
+                self.reward_noise = lambda a: a.normal(0, reward_noise_std)
         else:
-            self.reward_noise = lambda a: 0.0
+            self.reward_noise = None
 
         if "wrap_deepmind_ray" in config and config["wrap_deepmind_ray"]: #hack ##TODO remove?
             self.env = wrap_deepmind(self.env, dim=42, framestack=True)
@@ -195,7 +199,7 @@ def step(self, action):
             # print("rewards:", self.reward_buffer, old_reward, reward)
             del self.reward_buffer[0]
 
-        noise_in_reward = self.reward_noise(self.np_random) #random ###TODO Would be better to parameterise this in terms of state, action and time_step as well. Would need to change implementation to have a queue for the rewards achieved and then pick the reward that was generated delay timesteps ago.
+        noise_in_reward = self.reward_noise(self.np_random) if self.reward_noise else 0 #random ###TODO Would be better to parameterise this in terms of state, action and time_step as well. Would need to change implementation to have a queue for the rewards achieved and then pick the reward that was generated delay timesteps ago.
         self.total_abs_noise_in_reward_episode += np.abs(noise_in_reward)
         self.total_reward_episode += reward
         reward += noise_in_reward
 
@@ -2,6 +2,8 @@
 from mdp_playground.spaces.box_extended import BoxExtended
 from mdp_playground.spaces.multi_discrete_extended import MultiDiscreteExtended
 from mdp_playground.spaces.image_multi_discrete import ImageMultiDiscrete
+from mdp_playground.spaces.image_continuous import ImageContinuous
 from mdp_playground.spaces.tuple_extended import TupleExtended
 
-__all__ = ["BoxExtended", "DiscreteExtended", "MultiDiscreteExtended", "ImageMultiDiscrete", "TupleExtended"]
+__all__ = ["BoxExtended", "DiscreteExtended", "MultiDiscreteExtended",\
+            "ImageMultiDiscrete", "ImageContinuous", "TupleExtended"]
@@ -0,0 +1,194 @@
+import warnings
+import numpy as np
+import gym
+from gym.spaces import Box, Space
+import PIL.ImageDraw as ImageDraw
+import PIL.Image as Image
+from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM
+import os
+
+class ImageContinuous(Box):
+    '''A space that maps a continuous 1- or 2-D space 1-to-1 to images so that the
+    images may be used as representations for corresponding continuous environments.
+
+    Methods
+    -------
+    get_concatenated_image(continuous_obs)
+        Gets an image representation for a given feature space observation
+    '''
+
+    def __init__(self, feature_space, term_spaces=None, width=100, height=100,\
+                circle_radius=5, target_point=None, relevant_indices=[0,1],\
+                seed=None, use_custom_images=None, cust_path=None, dtype=np.uint8):
+        '''
+        Parameters
+        ----------
+        feature_space : Gym.spaces.Box
+            The feature space to which this class associates images as external
+            observations
+        term_spaces : list of Gym.spaces.Box
+            Sub-spaces of the feature space which are terminal
+        width : int
+            The width of the image
+        height : int
+            The height of the image
+        circle_radius : int
+            The radius of the circle which represents the agent and target point
+        target_point : np.array
+
+        relevant_indices : list
+
+        seed : int
+            Seed for this space
+        '''
+        # ##TODO Define a common superclass for this and ImageMultiDiscrete
+        self.feature_space = feature_space
+        assert (self.feature_space.high != np.inf).any()
+        assert (self.feature_space.low != -np.inf).any()
+        self.width = width
+        self.height = height
+        # Warn if resolution is too low?
+        self.circle_radius = circle_radius
+        self.target_point = target_point
+        self.term_spaces = term_spaces
+        self.relevant_indices = relevant_indices
+        all_indices = set(range(self.feature_space.shape[0]))
+        self.irrelevant_indices = list(all_indices - set(self.relevant_indices))
+        if len(self.irrelevant_indices) == 0:
+            self.irrelevant_features = False
+        else:
+            self.irrelevant_features = True
+
+        self.goal_colour = (0, 255, 0)
+        self.agent_colour = (0, 0, 255)
+        self.term_colour = (0, 0, 0)
+
+        assert len(feature_space.shape) == 1
+        relevant_dims = len(relevant_indices)
+        irr_dims = len(self.irrelevant_indices)
+        assert relevant_dims <= 2 and irr_dims <=2, "Image observations are "\
+                "supported only "\
+                "for 1- or 2-D feature spaces."
+
+
+        # Shape has 1 appended for Ray Rllib to be compatible IIRC
+        super(ImageContinuous, self).__init__(shape=(width, height, 1), \
+                dtype=dtype, low=0, high=255)
+        super(ImageContinuous, self).seed(seed=seed)
+
+        if self.target_point is not None:
+            self.target_point_pixel = self.convert_to_pixel(target_point)
+
+
+    def generate_image(self, position, relevant=True):
+        '''
+        Parameters
+        ----------
+        position : np.array
+
+        '''
+        # Use RGB
+        image_ = Image.new("RGB", (self.width, self.height), color=(255,255,255))
+        # Use L for black and white 8-bit pixels instead of RGB in case not
+        # using custom images
+        # image_ = Image.new("L", (self.width, self.height))
+        draw = ImageDraw.Draw(image_)
+
+        # Draw term_spaces first, so that others are drawn over it
+        if self.term_spaces is not None and relevant:
+            for term_space in self.term_spaces:
+                low = self.convert_to_pixel(term_space.low)
+                high = self.convert_to_pixel(term_space.high)
+
+                leftUpPoint = tuple((low))
+                rightDownPoint = tuple((high))
+                twoPointList = [leftUpPoint, rightDownPoint]
+                draw.rectangle(twoPointList, fill=self.term_colour)
+
+        R = self.circle_radius
+
+        if self.target_point is not None and relevant:
+            # print("draw2", self.target_point_pixel)
+            leftUpPoint = tuple((self.target_point_pixel - R))
+            rightDownPoint = tuple((self.target_point_pixel + R))
+            twoPointList = [leftUpPoint, rightDownPoint]
+            draw.ellipse(twoPointList, fill=self.goal_colour)
+
+        pos_pixel = self.convert_to_pixel(position)
+        # print("draw1", pos_pixel)
+        # Draw circle https://stackoverflow.com/a/2980931/11063709
+        leftUpPoint = tuple(pos_pixel - R)
+        rightDownPoint = tuple(pos_pixel + R)
+        twoPointList = [leftUpPoint, rightDownPoint]
+        draw.ellipse(twoPointList, fill=self.agent_colour)
+
+
+
+
+        # Because numpy is row-major and Image is column major, need to transpose
+        # ret_arr = np.array(image_).T # For 2-D
+        ret_arr = np.transpose(np.array(image_), axes=(1, 0, 2))
+
+        return ret_arr
+
+    def get_concatenated_image(self, obs):
+        '''Gets the "stitched together" image made from images corresponding to
+        each continuous sub-space within the continuous space, concatenated
+        along the X-axis.
+        '''
+        concatenated_image = []
+        # For relevant/irrelevant sub-spaces:
+        concatenated_image.append(self.generate_image(obs[self.relevant_indices]))
+        if self.irrelevant_features:
+            irr_image = self.generate_image(obs[self.irrelevant_indices], relevant=False)
+            concatenated_image.append(irr_image)
+
+        concatenated_image = np.concatenate(tuple(concatenated_image), axis=0)
+
+        return np.atleast_3d(concatenated_image) # because Ray expects an
+        # image to have >=3 dims
+
+    def convert_to_pixel(self, position):
+        '''
+        '''
+        # It's implicit that both relevant and irrelevant sub-spaces have the
+        # same max and min here:
+        max = self.feature_space.high[self.relevant_indices]
+        min = self.feature_space.low[self.relevant_indices]
+        pos_pixel = ((position - min) / (max - min))
+        pos_pixel = (pos_pixel * self.shape[:2]).astype(int)
+
+        return pos_pixel
+
+
+    def sample(self):
+
+        sampled = self.feature_space.sample()
+        return self.get_concatenated_image(sampled)
+
+    def __repr__(self):
+        return "{} with continuous underlying space of shape: {} and "\
+                "images of resolution: {} and dtype: {}".format(self.__class__,\
+                self.feature_space.shape,\
+                self.shape, self.dtype)
+
+    def contains(self, x):
+        """
+        Return boolean specifying if x is a valid
+        member of this space
+        """
+        if x.shape == (self.width, self.height, 1): #TODO compare each pixel for all possible images?
+            return True
+
+    def to_jsonable(self, sample_n):
+        """Convert a batch of samples from this space to a JSONable data type."""
+        # By default, assume identity is JSONable
+        raise NotImplementedError
+
+    def from_jsonable(self, sample_n):
+        """Convert a JSONable data type to a batch of samples from this space."""
+        # By default, assume identity is JSONable
+        raise NotImplementedError
+
+    def __eq__(self, other):
+        raise NotImplementedError
@@ -16,7 +16,7 @@ class ImageMultiDiscrete(Box):
         Gets an image representation for a given multi_discrete_state
     '''
 
-    def __init__(self, state_space_sizes, width=100, height=100, circle_radius=20, transforms='rotate,flip,scale,shift', sh_quant=1, scale_range=(0.5,1.5), ro_quant=1, seed=None, use_custom_images=None, cust_path=None): # , polygon_sides=4
+    def __init__(self, state_space_sizes, width=100, height=100, circle_radius=20, transforms='rotate,flip,scale,shift', sh_quant=1, scale_range=(0.5,1.5), ro_quant=1, seed=None, use_custom_images=None, cust_path=None, dtype=np.uint8): # , polygon_sides=4
         '''
         Parameters
         ----------
@@ -84,7 +84,7 @@ def __init__(self, state_space_sizes, width=100, height=100, circle_radius=20, t
 
 
         # self.shape = (width, height, 1)
-        super(ImageMultiDiscrete, self).__init__(shape=(width, height, 1), dtype=np.int64, low=0, high=255) #
+        super(ImageMultiDiscrete, self).__init__(shape=(width, height, 1), dtype=dtype, low=0, high=255) #
         super(ImageMultiDiscrete, self).seed(seed=seed) #
 
     # def seed(self, seed=None):
@@ -214,21 +214,24 @@ def get_concatenated_image(self, multi_discrete_state,):
         #     concatenated_image.append(self.disjoint_states[i][multi_discrete_state[i]])
         concatenated_image = np.concatenate(tuple(concatenated_image), axis=0)
 
-        return concatenated_image[..., np.newaxis] # because Ray expects an image to have >=3 dims
+        return np.atleast_3d(concatenated_image) # because Ray expects an image to have >=3 dims
 
     # def get_multi_discrete_state(self,
 
     def sample(self):
         sss = np.array(self.state_space_sizes)
-        sampled = (self.np_random.random_sample(sss.shape) * sss).astype(np.int64) # Based on Gym's MultiDiscrete sampling
+        sampled = (self.np_random.random_sample(sss.shape) * sss).astype(self.dtype) # Based on Gym's MultiDiscrete sampling
         # if type(sampled) == int:
         #     sampled = [sampled]
         sampled = list(sampled)
 
         return self.get_concatenated_image(sampled)
 
     def __repr__(self):
-        return "ImageMultiDiscrete with multi-discrete space of shape: {} and images of resolution: {}".format(self.state_space_sizes, self.shape)
+        return "{} with multi-discrete space of shape: {} and "\
+                "images of resolution: {} and dtype: {}".format(self.__class__,\
+                self.state_space_sizes,\
+                self.shape, self.dtype)
 
     def contains(self, x):
         """
 
@@ -0,0 +1,62 @@
+import unittest
+import numpy as np
+from mdp_playground.spaces.image_continuous import ImageContinuous
+from gym.spaces import Box
+# import PIL.ImageDraw as ImageDraw
+import PIL.Image as Image
+
+
+class TestImageContinuous(unittest.TestCase):
+
+    def test_image_continuous(self):
+        lows = 0.0
+        highs = 20.0
+        cs2 = Box(shape=(2,), low=lows, high=highs,)
+        cs4 = Box(shape=(4,), low=lows, high=highs,)
+
+        imc = ImageContinuous(cs2, width=400, height=400,)
+        pos = np.array([5.0, 7.0])
+        img1 = Image.fromarray(np.squeeze(imc.generate_image(pos)), 'RGB')
+        img1.show()
+
+        target = np.array([10, 10])
+        imc = ImageContinuous(cs2, target_point=target, width=400, height=400,)
+        img1 = Image.fromarray(np.squeeze(imc.generate_image(pos)), 'RGB')
+        img1.show()
+
+        # Terminal sub-spaces
+        lows = np.array([2., 4.])
+        highs = np.array([3., 6.])
+        cs2_term1 = Box(low=lows, high=highs,)
+        lows = np.array([12., 3.])
+        highs = np.array([13., 4.])
+        cs2_term2 = Box(low=lows, high=highs,)
+        term_spaces = [cs2_term1, cs2_term2]
+
+        target = np.array([10, 10])
+        imc = ImageContinuous(cs2, target_point=target, term_spaces=term_spaces,\
+                        width=400, height=400,)
+        pos = np.array([5.0, 7.0])
+        img1 = Image.fromarray(np.squeeze(imc.get_concatenated_image(pos)), 'RGB')
+        img1.show()
+
+
+        # Irrelevant features
+        target = np.array([10, 10])
+        imc = ImageContinuous(cs4, target_point=target, width=400, height=400,)
+        pos = np.array([5.0, 7.0, 10.0, 15.0])
+        img1 = Image.fromarray(np.squeeze(imc.get_concatenated_image(pos)), 'RGB')
+        img1.show()
+        # print(imc.get_concatenated_image(pos).shape)
+
+        # Random sample and __repr__
+        imc = ImageContinuous(cs4, target_point=target, width=400, height=400,)
+        print(imc)
+        img1 = Image.fromarray(np.squeeze(imc.sample()), 'RGB')
+        img1.show()
+
+
+
+
+if __name__ == '__main__':
+    unittest.main()
@@ -1,6 +1,6 @@
 import unittest
 import numpy as np
-from gym.spaces.image_multi_discrete import ImageMultiDiscrete
+from mdp_playground.spaces.image_multi_discrete import ImageMultiDiscrete
 from gym.spaces import Discrete, MultiDiscrete
 # import gym
 # from gym.spaces import MultiDiscrete
@@ -13,6 +13,8 @@ class TestImageMultiDiscrete(unittest.TestCase):
 
     def test_image_multi_discrete(self):
         ds4 = Discrete(4)
+        ds4 = [ds4.n]
+        print(ds4)
         imd = ImageMultiDiscrete(ds4, transforms='shift')
         from PIL import Image
         # img1 = Image.fromarray(imd.disjoint_states[0][1], 'L')
 
@@ -448,7 +448,7 @@
    "source": [
     "# Save configs in list_exp_data_ (hacky variable name)\n",
     "import pickle\n",
-    "pik = \"mdpp_hydra_reward_scales_pickle.dat\"\n",
+    "pik = \"mdpp_hydra_configs_pickle.dat\"\n",
     "\n",
     "import os.path\n",
     "if not os.path.exists(pik):\n",
@@ -465,7 +465,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Save configs in list_exp_data_reward_scales\n",
+    "# Save reward_scales in list_exp_data_reward_scales\n",
     "import pickle\n",
     "pik = \"mdpp_hydra_reward_scales_pickle.dat\"\n",
     "\n",
@@ -529,6 +529,31 @@
     "del list_exp_data_reward_scales[259]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "contents = []\n",
+    "for key in list_exp_data[0]:\n",
+    "    contents.append(key)\n",
+    "print(contents)\n",
+    "# print(list_exp_data[0]['train_stats'])\n",
+    "print(len(list_exp_data[0]['dims_values']))\n",
+    "\n",
+    "print(len(list_exp_data_with_configs))\n",
+    "print(list_exp_data_with_configs[0]['train_stats'].iloc[0,:])\n",
+    "print(list_exp_data_with_configs[0]['train_stats'].iloc[1,:])\n",
+    "\n",
+    "# print(list_exp_data_with_configs[0]['train_stats']['learning_starts'])\n",
+    "# learn_startss_mean = list_exp_data_with_configs[0]['train_stats']['learning_starts'].mean()\n",
+    "# print(\"mean(learn_startss):\", learn_startss_mean)\n",
+    "\n",
+    "print(len(list_exp_data_reward_scales))\n",
+    "# print(list_exp_data_reward_scales[0]['train_stats'])"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -587,20 +612,24 @@
     "top_configs = {}\n",
     "top_configs_mins = {}\n",
     "perfs_all_envs = {}\n",
+    "\n",
+    "print(\"env x agent grid size:\", num_env_configs, num_agent_configs)\n",
     "for perf_set in perf_sets:\n",
     "    top_configs[perf_set] = []\n",
     "    top_configs_mins[perf_set] = []\n",
     "    perfs_all_envs[perf_set] = np.zeros(shape=(num_env_configs, num_agent_configs))\n",
     "\n",
     "corrs = {}\n",
+    "corrs_spm = {}\n",
     "import itertools\n",
     "corr_sets = ['train', 'eval', 'train_auc', 'eval_auc']\n",
     "corr_combos = list(itertools.combinations(corr_sets, 2))\n",
     "\n",
     "# corr_sets = ['train_eval', 'train_auc_eval_auc', 'eval_eval_auc', 'train_eval_auc', 'train_train_auc', 'eval_train_auc']\n",
     "for corr_combo in corr_combos:\n",
     "    corrs[corr_combo[0] + ' and ' + corr_combo[1]] = []\n",
-    "\n",
+    "    corrs_spm[corr_combo[0] + ' and ' + corr_combo[1]] = []\n",
+    "    \n",
     "for i in range(num_env_configs):\n",
     "#     if i == 259:\n",
     "#         continue\n",
@@ -627,13 +656,18 @@
     "    for combo in corr_combos:\n",
     "        corr_ = prs(perfs[combo[0]], perfs[combo[1]])[0]\n",
     "        corrs[combo[0] + ' and ' + combo[1]].append(corr_)\n",
+    "        \n",
+    "        corr_ = spm(perfs[combo[0]], perfs[combo[1]])[0]\n",
+    "        corrs_spm[combo[0] + ' and ' + combo[1]].append(corr_)\n",
+    "        \n",
     "\n",
     "#     corrs['train_eval']\n",
     "#     corrs['train_auc_eval_auc'].append(prs(perfs['train_auc'], perfs['eval_auc']))\n",
     "#     corrs['eval_eval_auc'].append(prs(perfs['eval'], perfs['eval_auc']))\n",
     "#     corrs['train_eval_auc'].append(prs(perfs['train'], perfs['eval_auc']))\n",
     "#     corrs['train_train_auc'].append(prs(perfs['train'], perfs['train_auc']))\n",
     "#     corrs['eval_train_auc'].append(prs(perfs['eval'], perfs['train_auc']))\n",
+    "\n",
     "\n"
    ]
   },
@@ -734,6 +768,8 @@
     "\n",
     "\n",
     "    print(\"Final portfolio:\", portfolio[perf_set])\n",
+    "    print(\"Final portfolio perf.:\", np.sum(hydra_perfs[perf_set]))\n",
+    "    print(\"Oracle perf.:\", sum_over_maxes[perf_set][0])\n",
     "    print(\"Final portfolio mins:\", portfolio_mins[perf_set])\n",
     "\n",
     "    import matplotlib.pyplot as plt\n",
@@ -745,6 +781,8 @@
     "    plt.legend()\n",
     "    plt.xlabel('Portfolio building iter.')\n",
     "    plt.ylabel('Reward or number of configs.')\n",
+    "    plt.yscale('log')\n",
+    "    plt.grid(which='both')\n",
     "    plt.show()\n",
     "\n",
     "#     print(port_perfs_mins, sum(port_perfs_mins))\n",
@@ -760,6 +798,7 @@
     "    plt.plot(sum_over_maxes[perf_set], label=\"Sum over maxes\")\n",
     "    plt.plot(max_over_sums, label=\"Max over sums\")\n",
     "    plt.legend()\n",
+    "    plt.grid(which='both')\n",
     "    plt.show()\n",
     "    \n",
     "    \n",
@@ -775,14 +814,64 @@
     "# print(perfs_all_envs)\n",
     "for combo in corr_combos:\n",
     "#     print(\"Corr. on \" + str(combo[0] + ' and ' + combo[1]), corrs[combo[0] + ' and ' + combo[1]])\n",
-    "    print(\"Max corr. on \" + str(combo[0] + ' and ' + combo[1]), max(corrs[combo[0] + ' and ' + combo[1]]))\n",
+    "    print(\"Max (across envs) corr. on \" + str(combo[0] + ' and ' + combo[1]), max(corrs[combo[0] + ' and ' + combo[1]]))\n",
     "    print(\"Min corr. on \" + str(combo[0] + ' and ' + combo[1]), min(corrs[combo[0] + ' and ' + combo[1]]))\n",
+    "    print(\"Max spm corr. on \" + str(combo[0] + ' and ' + combo[1]), max(corrs_spm[combo[0] + ' and ' + combo[1]]))\n",
+    "    print(\"Min spm corr. on \" + str(combo[0] + ' and ' + combo[1]), min(corrs_spm[combo[0] + ' and ' + combo[1]]))\n",
+    "    \n",
     "#     corrs[combo[0] + ' and ' + combo[1]]\n",
     "\n",
     "# for i in range(num_env_configs):\n",
     "#     corrs[combo[0] + ' and ' + combo[1]]\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Spearman correlation of agent configs on 100 random pairs of envs\n",
+    "import random\n",
+    "\n",
+    "random.seed(0)\n",
+    "\n",
+    "# From https://stackoverflow.com/a/48581219/11063709\n",
+    "n = 1000\n",
+    "A = list(range(n))\n",
+    "k = 2\n",
+    "m = 100\n",
+    "\n",
+    "samples = set()\n",
+    "tries = 0\n",
+    "while len(samples) < m:\n",
+    "    samples.add(tuple(sorted(random.sample(A, k))))\n",
+    "    tries += 1\n",
+    "\n",
+    "samples = list(samples)\n",
+    "# print(samples)\n",
+    "# print(tries)\n",
+    "\n",
+    "corrs_spm_agents_on_envs = {}\n",
+    "for perf_set in perf_sets:\n",
+    "    corrs_spm_agents_on_envs[perf_set] = []\n",
+    "\n",
+    "print(\"Spearman correlation of agent configs on 100 random pairs of envs:\")\n",
+    "print(\"Mean, std, max, min\")\n",
+    "for perf_set in perf_sets:    \n",
+    "    for i in range(len(samples)):\n",
+    "#         print(perfs[perf_set])\n",
+    "        env_0_perfs = perfs_all_envs[perf_set][samples[i][0], :]\n",
+    "        env_1_perfs = perfs_all_envs[perf_set][samples[i][1], :]\n",
+    "    \n",
+    "        corr_spm = spm(env_0_perfs, env_1_perfs)[0]\n",
+    "        corrs_spm_agents_on_envs[perf_set].append(corr_spm)\n",
+    "        \n",
+    "#     print(corrs_spm_agents_on_envs[perf_set])\n",
+    "\n",
+    "    print(perf_set, np.mean(corrs_spm_agents_on_envs[perf_set]), np.std(corrs_spm_agents_on_envs[perf_set]), np.max(corrs_spm_agents_on_envs[perf_set]), np.min(corrs_spm_agents_on_envs[perf_set]))"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
 
@@ -51,7 +51,7 @@
                     'agent_config and model_config are dicts which hold the '
                     'static configuration for the current experiment as a '
                     'normal Python dict.')
-# TODO Update docs regarding how to get configs to run: i.e., Cartesian
+# ####TODO Update docs regarding how to get configs to run: i.e., Cartesian
 # product, or random, etc.
 parser.add_argument('-e', '--exp-name', dest='exp_name', action='store',
                     default='mdpp_default_experiment',
@@ -73,6 +73,8 @@
                     'for the experiment will be taken and ordered as a list '
                     'and this number corresponds to the configuration number '
                     'in this list. Please look in to the code for details.')
+# ###TODO Remove? #hack to run 1000 x 1000 env configs x agent configs.
+# Storing all million of them in memory may be too inefficient?
 parser.add_argument('-a', '--agent-config-num', dest='agent_config_num',
                     action='store', default=None, type=int,
                     help='Used for running the configurations of experiments '
@@ -84,7 +86,8 @@
                     ').')
 parser.add_argument('-m', '--save-model', dest='save_model', action='store',
                     default=False, type=bool,
-                    help='Option to save trained NN model at the end of '
+                    help='Option to save trained NN model and framework \
+                    generated files at the end of '
                     'training.')
 parser.add_argument('-t', '--framework-dir', dest='framework_dir',
                     action='store', default='/tmp/', type=str,
@@ -122,6 +125,7 @@
     logging.error("Log level {} not in {}.".format(args.log_level,
                                                    log_levels.keys()))
 
+config_file = args.config_file
 
 if args.config_file[-3:] == '.py':
     config_file = args.config_file[:-3]
@@ -137,7 +141,7 @@
 
 print("Stats file being written to:", stats_file_name)
 
-config, final_configs =  config_processor.process_configs(config_file, stats_file_prefix=stats_file_name, framework=args.framework, config_num=args.config_num, log_level=log_level_)
+config, final_configs =  config_processor.process_configs(config_file, stats_file_prefix=stats_file_name, framework=args.framework, config_num=args.config_num, log_level=log_level_, framework_dir=args.framework_dir)
 
 print("Configuration number(s) that will be run:", "all" if args.config_num is None else args.config_num)
 
@@ -154,6 +158,7 @@
 
 if args.config_num is None:
     # final_configs = config.final_configs
+    print("Total number of configs to run:", len(final_configs))
     pass
 else:
     final_configs = [final_configs[args.config_num]]
@@ -187,20 +192,23 @@
 
     analysis = tune.run(
         algorithm,
-        name=algorithm + str(stats_file_name.split('/')[-1]) + '_' \
-        + str(args.config_num), ####IMP "name" has to be specified, otherwise,
+        name=algorithm + '_' + str(stats_file_name.split('/')[-1]) + '_' \
+        , ####IMP "name" has to be specified, otherwise,
         # it may lead to clashing for temp file in ~/ray_results/... directory.
         stop={
             "timesteps_total": timesteps_total,
               },
         config=tune_config,
         checkpoint_at_end=args.save_model,
-        local_dir=args.framework_dir + '/_ray_results',
+        local_dir=args.framework_dir + '/_ray_results_' + str(args.config_num),
         #return_trials=True # add trials = tune.run( above
     )
 
-    pickle.dump(analysis, open("{}_analysis.pickle".format(args.exp_name),
-                "wb"))
+    if args.save_model:
+        pickle.dump(analysis, open("{}_analysis.pickle".format(args.exp_name),
+                    "wb"))
+
+config_processor.post_processing(framework=args.framework)
 
 end = time.time()
 print("No. of seconds to run:", end - start)
@@ -1,8 +1,8 @@
 #!/bin/bash
 #MOAB -N mdpp
-#MOAB -t 0-19 # specifies array job indices
+#MOAB -t 0-49 # specifies array job indices
 #MOAB -l nodes=1:ppn=5
-#MOAB -l walltime=0:50:00:00
+#MOAB -l walltime=0:40:00:00
 #MOAB -l pmem=8GB # Seems like it is memory per CPU core
 #MOAB -d /work/ws/nemo/fr_rr1034-ws_mdpp-0 # initial working dir.
 
@@ -23,7 +23,7 @@ echo "TMPDIR: " $TMPDIR
 
 printenv
 
-export EXP_NAME='rainbow_hydra_qbert' # Ideally contains Area of research + algorithm + dataset # Could just pass this as job name?
+export EXP_NAME='dqn_space_invaders_r_noise' # Ideally contains Area of research + algorithm + dataset # Could just pass this as job name?
 
 echo -e '\033[32m'
 # Print some information about the job to STDOUT
@@ -48,7 +48,7 @@ echo Shell used is $SHELL
 # source activate /home/rajanr/anaconda2/envs/py36
 # source activate /home/rajanr/anaconda3/envs/py36_toy_rl
 . /home/fr/fr_fr/fr_rr1034/anaconda3/etc/profile.d/conda.sh # for anaconda3
-conda activate /home/fr/fr_fr/fr_rr1034/anaconda3/envs/old_py36_toy_rl # should be conda activate and not source when using anaconda3?
+conda activate /home/fr/fr_fr/fr_rr1034/anaconda3/envs/py36_toy_rl # should be conda activate and not source when using anaconda3?
 echo $?
 echo Paths: $PATH
 #/home/rajanr/anaconda3/bin/conda activate /home/rajanr/anaconda2/envs/py36
@@ -74,13 +74,11 @@ JOB_ID=`echo ${MOAB_JOBID} | cut -d'[' -f 1`
 mkdir -p mdpp_${JOB_ID}
 cd mdpp_${JOB_ID}
 # cd /home/rajanr/mdpp
-echo ${MOAB_JOBID} ${MOAB_JOBARRAYINDEX} ${MOAB_JOBNAME}
+echo "MOAB_JOBID:" ${MOAB_JOBID} "MOAB_JOBARRAYINDEX:" ${MOAB_JOBARRAYINDEX} "MOAB_JOBNAME:" ${MOAB_JOBNAME}
 
-# for i in {0..0}
-# do
-echo -e "Running env config $i:\n"
-\time -v python3 /home/fr/fr_fr/fr_rr1034/mdp-playground/run_experiments.py --exp-name ${EXP_NAME} --config-file /home/fr/fr_fr/fr_rr1034/mdp-playground/experiments/${EXP_NAME} --config-num 0 --agent-config-num ${MOAB_JOBARRAYINDEX} --framework-dir ${TMPDIR}
-# done
+
+\time -v python3 /home/fr/fr_fr/fr_rr1034/mdp-playground/run_experiments.py --exp-name ${EXP_NAME} --config-file /home/fr/fr_fr/fr_rr1034/mdp-playground/experiments/${EXP_NAME} --config-num ${MOAB_JOBARRAYINDEX} --framework-dir ${TMPDIR}
+#/work/ws/nemo/fr_rr1034-ws_mdpp-0/mdpp_10405451/ray
 
 
 #python output_argv_1.py
 
@@ -65,7 +65,7 @@
     # package_dir={"": "src"},
     packages=find_packages(),
     python_requires=">=3.6",
-    install_requires=['gym'],
+    install_requires=['gym<=0.14', 'dill'],
     extras_require={
       'extras_disc': extras_require,
       'extras_cont': extras_require_cont,
 
@@ -476,6 +476,69 @@ def test_continuous_dynamics_target_point_sparse(self):
         env.close()
 
 
+    def test_continuous_image_representations(self):
+        ''''''
+        print('\033[32;1;4mTEST_CONTINUOUS_IMAGE_REPRESENTATIONS\033[0m')
+        config = {}
+        config["log_filename"] = log_filename
+        config["seed"] = 0
+
+        config["state_space_type"] = "continuous"
+        config["action_space_type"] = "continuous"
+        config["state_space_dim"] = 2
+        config["action_space_dim"] = 2
+        config["delay"] = 0
+        config["sequence_length"] = 1 # seq_len is always going to be 1 for move_to_a_point R. assert for this?
+        config["transition_dynamics_order"] = 1
+        config["inertia"] = 1.0
+        config["time_unit"] = 1
+
+        config["reward_function"] = "move_to_a_point"
+        # config["make_denser"] = False
+        config["state_space_max"] = 5 # Will be a Box in the range [-max, max]
+        config["target_point"] = [-0.29792, 1.71012]
+        config["target_radius"] = 0.172 # to give reward in 3rd last step. At each step, the distance reduces by ~0.035355 to the final point of this trajectory which is also the target point by design for this test.
+        config["reward_scale"] = 2.0
+
+
+        config["image_representations"] = True
+        config["image_width"] = 100
+        config["image_height"] = 100
+        env = RLToyEnv(**config)
+        state = env.get_augmented_state()['augmented_state'][-1]
+        # init state: [ 1.9652315 -2.4397445]
+        expected_image_sums = [7546980, 7546980, 7546980, 7547490, 7587270]
+
+        # obs = env.curr_obs
+        # import PIL.Image as Image
+        # img1 = Image.fromarray(np.squeeze(obs), 'RGB')
+        # img1.show()
+
+        for i in range(5):
+            # action = env.action_space.sample()
+            action = np.array([-0.45, 0.8]) # just to test if acting "in a line" works
+            next_obs, reward, done, info = env.step(action)
+            next_state = env.get_augmented_state()['augmented_state'][-1]
+            print("sars', done =", state, action, reward, next_state, done)
+            state = next_state.copy()
+
+            # obs = env.curr_obs
+            # import PIL.Image as Image
+            # img1 = Image.fromarray(np.squeeze(obs), 'RGB')
+            # img1.show()
+
+            if i < len(expected_image_sums):
+                assert next_obs.sum() == expected_image_sums[i], "Expected sum over image pixels: " + str(expected_image_sums[i]) + ". Was: " + str(next_obs.sum())
+
+        final_dist = np.linalg.norm(state - np.array(config["target_point"]))
+        assert final_dist < config["target_radius"]
+
+        # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
+        # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
+        env.reset()
+        env.close()
+
+
     def test_discrete_dynamics(self):
         '''Tests the P dynamics. Tests whether actions taken in terminal states lead back to the same terminal state. Tests if state in discrete environments is an int.
         '''
Original file line number	Diff line number	Diff line change
`@@ -76,8 +76,3 @@`
`76`	`76`	`"lstm_use_prev_action_reward": False,`
`77`	`77`	`},`
`78`	`78`	`}`
`79`		`-`
`80`		`-varying_configs = get_grid_of_configs(var_configs)`
`81`		`-# print("VARYING_CONFIGS:", varying_configs)`
`82`		`-`
`83`		`-final_configs = combined_processing(env_config, agent_config, model_config, eval_config, varying_configs=varying_configs, framework='ray', algorithm='SAC')`