automl
diff --git a/‎default_config.py
-5 b/‎default_config.py
-5
diff --git a/‎experiments/rainbow_hydra.py
+15-129 b/‎experiments/rainbow_hydra.py
+15-129
diff --git a/‎experiments/rainbow_hydra_qbert.py
+11-22 b/‎experiments/rainbow_hydra_qbert.py
+11-22
@@ -76,8 +76,3 @@
         "lstm_use_prev_action_reward": False,
     },
 }
-
-varying_configs = get_grid_of_configs(var_configs)
-# print("VARYING_CONFIGS:", varying_configs)
-
-final_configs = combined_processing(env_config, agent_config, model_config, eval_config, varying_configs=varying_configs, framework='ray', algorithm='SAC')
@@ -1,12 +1,10 @@
 num_seeds = 1
 timesteps_total = 20_000
-num_agent_configs = 1000
-num_prob_inst = 1000
+num_configs = 1000
 
-import numpy as np
 from collections import OrderedDict
 
-var_env_configs = OrderedDict({
+sobol_env_configs = OrderedDict({
     'action_space_size': (8,),#, 10, 12, 14] # [2**i for i in range(1,6)]
     # 'action_space_size': (64),#2, 4, 8, 16] # [2**i for i in range(1,6)]
     'delay': "cat, " + str([i for i in range(11)]), # + [2**i for i in range(4)],
@@ -17,84 +15,13 @@
     'terminal_state_density': (0.25,), # np.linspace(0.1, 1.0, num=5)
     'reward_dist': "float, [0.01, 0.8]",
     'reward_scale': "float, log, [0.1, 100]",
-    'dummy_seed': (0,), #"cat, " + str([i for i in range(num_seeds)]),
+    'dummy_seed': (0,), # "cat, " + str([i for i in range(num_seeds)]), #seed
 })
 
 
-print(var_env_configs)
-cartesian_product_configs = []
-def sobol_configs_from_config_dict(config_dict):
-    '''
-    '''
+print(sobol_env_configs)
 
-    num_dims = 0
-    for key in config_dict:
-        val = config_dict[key]
-        if type(val) == tuple: # i.e. a constant value
-            pass
-        else: # i.e. a variable value
-            num_dims += 1
-
-    print("Generating sobol sequence with " + str(num_prob_inst) + " and " + str(num_dims) + " dimensions:")
-
-    from scipy.optimize._shgo_lib.sobol_seq import Sobol # Only generates real vectors in range 0 to 1 per dimension
-    import json
-    sobol_gen = Sobol()
-    sobol = sobol_gen.i4_sobol_generate(num_dims, num_prob_inst, skip=0)
-    print(sobol)
-
-    for sample in sobol:
-        # print(sample)
-        cartesian_product_configs.append({}) # new config
-        j = 0
-        for key in config_dict:
-            val = config_dict[key]
-            if type(val) == tuple: # i.e. a constant value
-                cartesian_product_configs[-1][key] = val[0]
-            # The rest are config spaces for param settings
-            elif "int" in val:
-                lower = float(val.split("[")[1].split(",")[0].strip())
-                upper = float(val.split("]")[0].split(",")[-1].strip())
-                log = True if "log" in val else False
-                #TODO log vals
-                sobol_val = lower + (upper - lower) * sample[j]
-                cartesian_product_configs[-1][key] = int(sobol_val)
-                j += 1
-            elif "float" in val:
-                lower = float(val.split("[")[1].split(",")[0].strip())
-                upper = float(val.split("]")[0].split(",")[-1].strip())
-                log = True if "log" in val else False
-                if log:
-                    lower = np.log(lower)
-                    upper = np.log(upper)
-                sobol_val = lower + (upper - lower) * sample[j]
-                if log:
-                    sobol_val = np.exp(sobol_val)
-                if key == "reward_dist":
-                    sobol_val = [sobol_val, 1.0]
-                cartesian_product_configs[-1][key] = sobol_val
-                j += 1
-            elif "cat" in val:
-                choices = json.loads("[" + val.split("[")[1].split("]")[0] + "]") # Seems faster than ast.literal_eval (See https://stackoverflow.com/questions/1894269/how-to-convert-string-representation-of-list-to-a-list)
-                len_c = len(choices)
-                if sample[j] == 1.0: #TODO remove? Don't know if sobol samples include 1.0
-                    sample[j] -= 1e-10
-                index = int(sample[j] * len_c)
-                cartesian_product_configs[-1][key] = choices[index]
-                j += 1
-
-
-
-sobol_configs_from_config_dict(var_env_configs)
-# import pprint
-# pp = pprint.PrettyPrinter(indent=4)
-
-for i, conf in enumerate(cartesian_product_configs):
-    cartesian_product_configs[i] = tuple(conf.values()) #hack
-    # print(conf)
-    # pp.pprint(cartesian_product_configs[i])
-
-var_agent_configs = OrderedDict({
+random_agent_configs = OrderedDict({
 
     "lr": "float, log, [1e-5, 1e-3]", # 1e-4
     "learning_starts": "int, [1, 2000]", # 500
@@ -107,53 +34,22 @@ def sobol_configs_from_config_dict(config_dict):
 
 })
 
-var_agent_configs = OrderedDict(sorted(var_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above.
+random_agent_configs = OrderedDict(sorted(random_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above, which would create problems with config processing.
 
-def create_config_space_from_config_dict(config_dict):
-    '''
-    '''
-    import ConfigSpace as CS
-    cs = CS.ConfigurationSpace(seed=1234)
-    import ConfigSpace.hyperparameters as CSH
-    import json
 
-    for key in config_dict:
-        val = config_dict[key]
-        if "int" in val:
-            lower = int(val.split("[")[1].split(",")[0].strip())
-            upper = int(val.split("]")[0].split(",")[-1].strip())
-            log = True if "log" in val else False
-            cs.add_hyperparameter(CSH.UniformIntegerHyperparameter(name=key, lower=lower, upper=upper, log=log))
-        elif "float" in val:
-            lower = float(val.split("[")[1].split(",")[0].strip())
-            upper = float(val.split("]")[0].split(",")[-1].strip())
-            log = True if "log" in val else False
-            cs.add_hyperparameter(CSH.UniformFloatHyperparameter(name=key, lower=lower, upper=upper, log=log))
-        elif "cat" in val:
-            choices = json.loads("[" + val.split("[")[1].split("]")[0] + "]") # Seems faster than ast.literal_eval (See https://stackoverflow.com/questions/1894269/how-to-convert-string-representation-of-list-to-a-list)
-            cs.add_hyperparameter(CSH.CategoricalHyperparameter(name=key, choices=choices))
-            # print(type(CSH.CategoricalHyperparameter(name=key, choices=choices).choices[0]))
+random_configs = OrderedDict({
+"env": {},
+"agent": random_agent_configs,
 
-    return cs
+})
 
-cs = create_config_space_from_config_dict(var_agent_configs)
-print("Agent variable ConfigSpace:")
-print(cs)
-random_configs = cs.sample_configuration(size=num_agent_configs)
-# print("type(random_configs):", type(random_configs))
-for i in range(len(random_configs)):
-    # if random_configs[i].get_dictionary()["train_batch_size"] == 4 \
-    # and random_configs[i].get_dictionary()["buffer_size"] < 33:
-    #     print("Config:", i, "train_batch_size, buffer_size:", random_configs[i].get_dictionary()["train_batch_size"], random_configs[i].get_dictionary()["buffer_size"])
-    random_configs[i] = tuple(random_configs[i].get_dictionary().values()) #hack ####TODO Change run_experiments.py and here to directly pass whole config dict to run_experiments.py. Would need to replace in every config.py file.
-# print(random_configs)
 
-var_configs = OrderedDict({
-"env": var_env_configs,
-"agent": var_agent_configs,
+sobol_configs = OrderedDict({
+"env": sobol_env_configs,
 
 })
 
+
 env_config = {
     "env": "RLToy-v0",
     "horizon": 100,
@@ -163,7 +59,7 @@ def create_config_space_from_config_dict(config_dict):
         'action_space_type': 'discrete',
         'generate_random_mdp': True,
         'repeats_in_sequences': False,
-        'reward_scale': 1.0,
+        # 'reward_scale': 1.0,
         'completely_connected': True,
     },
 }
@@ -226,7 +122,7 @@ def create_config_space_from_config_dict(config_dict):
         "custom_preprocessor": "ohe",
         "custom_options": {},  # extra options to pass to your preprocessor
         "fcnet_activation": "tanh",
-        "use_lstm": False,
+        # "use_lstm": False,
         "max_seq_len": 20,
         "lstm_cell_size": 256,
         "lstm_use_prev_action_reward": False,
@@ -250,13 +146,3 @@ def create_config_space_from_config_dict(config_dict):
         }
     },
 }
-
-# value_tuples = []
-# for config_type, config_dict in var_configs.items():
-#     for key in config_dict:
-#         assert type(var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
-#         value_tuples.append(var_configs[config_type][key])
-#
-# import itertools
-# cartesian_product_configs = list(itertools.product(*value_tuples))
-# print("Total number of configs. to run:", len(cartesian_product_configs))
@@ -2,27 +2,18 @@
 timesteps_total = 10_000_000
 from collections import OrderedDict
 
-var_env_configs = OrderedDict({
+sobol_env_configs = OrderedDict({
     'delay': [0],
     'dummy_seed': [i for i in range(num_seeds)],
 })
 
-var_configs = OrderedDict({
-"env": var_env_configs
-})
-
-value_tuples = []
-for config_type, config_dict in var_configs.items():
-    for key in config_dict:
-        assert type(var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
-        value_tuples.append(var_configs[config_type][key])
+sobol_configs = OrderedDict({
+"env": sobol_env_configs
 
-import itertools
-cartesian_product_configs = list(itertools.product(*value_tuples))
-print("Total number of grid configs. to run:", len(cartesian_product_configs))
+})
 
 
-var_agent_configs = OrderedDict({
+random_agent_configs = OrderedDict({
 
     "lr": "float, log, [1e-5, 1e-3]", # 1e-4
     "learning_starts": "int, [1, 2000]", # 500
@@ -35,7 +26,7 @@
 
 })
 
-var_agent_configs = OrderedDict(sorted(var_agent_configs.items(), key=lambda t: t[0])) #hack because saved configs used below as random_configs are ordered alphabetically.
+random_agent_configs = OrderedDict(sorted(random_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above, which would create problems with config processing.
 
 random_configs = \
 [(1.86e-12, 1480, 0.0697, 311, 0.000545, 8, 1845, 64), # top 10 configs begin from here
@@ -59,14 +50,12 @@
  (0.0133, 6541, 0.218, 1393, 1.21e-05, 1, 3, 16),
  (0.0515, 507, 0.48100000000000004, 1866, 1.23e-05, 3, 136, 128)]
 
-for i in range(len(random_configs)):
-    random_configs[i] = tuple(random_configs[i]) ##IMP I think these are tuples because cartesian_product_configs by default has tuples.
 
-var_configs = OrderedDict({
-"env": var_env_configs,
-"agent": var_agent_configs,
-
-})
+# var_configs = OrderedDict({
+# "env": var_env_configs,
+# "agent": var_agent_configs,
+#
+# })
 
 env_config = {
     "env": "GymEnvWrapper-Atari",
Original file line number	Diff line number	Diff line change
`@@ -76,8 +76,3 @@`
`76`	`76`	`"lstm_use_prev_action_reward": False,`
`77`	`77`	`},`
`78`	`78`	`}`
`79`		`-`
`80`		`-varying_configs = get_grid_of_configs(var_configs)`
`81`		`-# print("VARYING_CONFIGS:", varying_configs)`
`82`		`-`
`83`		`-final_configs = combined_processing(env_config, agent_config, model_config, eval_config, varying_configs=varying_configs, framework='ray', algorithm='SAC')`