Skip to content

Commit 8144206

Browse files
MAJOR: Added random and sobol generation for configs; updated requirements in setup.py; undid Ray 1.3.0 compat. changes
1 parent 2a56a1f commit 8144206

8 files changed

+523
-257
lines changed

default_config.py

-5
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,3 @@
7676
"lstm_use_prev_action_reward": False,
7777
},
7878
}
79-
80-
varying_configs = get_grid_of_configs(var_configs)
81-
# print("VARYING_CONFIGS:", varying_configs)
82-
83-
final_configs = combined_processing(env_config, agent_config, model_config, eval_config, varying_configs=varying_configs, framework='ray', algorithm='SAC')

experiments/rainbow_hydra.py

+15-129
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
num_seeds = 1
22
timesteps_total = 20_000
3-
num_agent_configs = 1000
4-
num_prob_inst = 1000
3+
num_configs = 1000
54

6-
import numpy as np
75
from collections import OrderedDict
86

9-
var_env_configs = OrderedDict({
7+
sobol_env_configs = OrderedDict({
108
'action_space_size': (8,),#, 10, 12, 14] # [2**i for i in range(1,6)]
119
# 'action_space_size': (64),#2, 4, 8, 16] # [2**i for i in range(1,6)]
1210
'delay': "cat, " + str([i for i in range(11)]), # + [2**i for i in range(4)],
@@ -17,84 +15,13 @@
1715
'terminal_state_density': (0.25,), # np.linspace(0.1, 1.0, num=5)
1816
'reward_dist': "float, [0.01, 0.8]",
1917
'reward_scale': "float, log, [0.1, 100]",
20-
'dummy_seed': (0,), #"cat, " + str([i for i in range(num_seeds)]),
18+
'dummy_seed': (0,), # "cat, " + str([i for i in range(num_seeds)]), #seed
2119
})
2220

2321

24-
print(var_env_configs)
25-
cartesian_product_configs = []
26-
def sobol_configs_from_config_dict(config_dict):
27-
'''
28-
'''
22+
print(sobol_env_configs)
2923

30-
num_dims = 0
31-
for key in config_dict:
32-
val = config_dict[key]
33-
if type(val) == tuple: # i.e. a constant value
34-
pass
35-
else: # i.e. a variable value
36-
num_dims += 1
37-
38-
print("Generating sobol sequence with " + str(num_prob_inst) + " and " + str(num_dims) + " dimensions:")
39-
40-
from scipy.optimize._shgo_lib.sobol_seq import Sobol # Only generates real vectors in range 0 to 1 per dimension
41-
import json
42-
sobol_gen = Sobol()
43-
sobol = sobol_gen.i4_sobol_generate(num_dims, num_prob_inst, skip=0)
44-
print(sobol)
45-
46-
for sample in sobol:
47-
# print(sample)
48-
cartesian_product_configs.append({}) # new config
49-
j = 0
50-
for key in config_dict:
51-
val = config_dict[key]
52-
if type(val) == tuple: # i.e. a constant value
53-
cartesian_product_configs[-1][key] = val[0]
54-
# The rest are config spaces for param settings
55-
elif "int" in val:
56-
lower = float(val.split("[")[1].split(",")[0].strip())
57-
upper = float(val.split("]")[0].split(",")[-1].strip())
58-
log = True if "log" in val else False
59-
#TODO log vals
60-
sobol_val = lower + (upper - lower) * sample[j]
61-
cartesian_product_configs[-1][key] = int(sobol_val)
62-
j += 1
63-
elif "float" in val:
64-
lower = float(val.split("[")[1].split(",")[0].strip())
65-
upper = float(val.split("]")[0].split(",")[-1].strip())
66-
log = True if "log" in val else False
67-
if log:
68-
lower = np.log(lower)
69-
upper = np.log(upper)
70-
sobol_val = lower + (upper - lower) * sample[j]
71-
if log:
72-
sobol_val = np.exp(sobol_val)
73-
if key == "reward_dist":
74-
sobol_val = [sobol_val, 1.0]
75-
cartesian_product_configs[-1][key] = sobol_val
76-
j += 1
77-
elif "cat" in val:
78-
choices = json.loads("[" + val.split("[")[1].split("]")[0] + "]") # Seems faster than ast.literal_eval (See https://stackoverflow.com/questions/1894269/how-to-convert-string-representation-of-list-to-a-list)
79-
len_c = len(choices)
80-
if sample[j] == 1.0: #TODO remove? Don't know if sobol samples include 1.0
81-
sample[j] -= 1e-10
82-
index = int(sample[j] * len_c)
83-
cartesian_product_configs[-1][key] = choices[index]
84-
j += 1
85-
86-
87-
88-
sobol_configs_from_config_dict(var_env_configs)
89-
# import pprint
90-
# pp = pprint.PrettyPrinter(indent=4)
91-
92-
for i, conf in enumerate(cartesian_product_configs):
93-
cartesian_product_configs[i] = tuple(conf.values()) #hack
94-
# print(conf)
95-
# pp.pprint(cartesian_product_configs[i])
96-
97-
var_agent_configs = OrderedDict({
24+
random_agent_configs = OrderedDict({
9825

9926
"lr": "float, log, [1e-5, 1e-3]", # 1e-4
10027
"learning_starts": "int, [1, 2000]", # 500
@@ -107,53 +34,22 @@ def sobol_configs_from_config_dict(config_dict):
10734

10835
})
10936

110-
var_agent_configs = OrderedDict(sorted(var_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above.
37+
random_agent_configs = OrderedDict(sorted(random_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above, which would create problems with config processing.
11138

112-
def create_config_space_from_config_dict(config_dict):
113-
'''
114-
'''
115-
import ConfigSpace as CS
116-
cs = CS.ConfigurationSpace(seed=1234)
117-
import ConfigSpace.hyperparameters as CSH
118-
import json
11939

120-
for key in config_dict:
121-
val = config_dict[key]
122-
if "int" in val:
123-
lower = int(val.split("[")[1].split(",")[0].strip())
124-
upper = int(val.split("]")[0].split(",")[-1].strip())
125-
log = True if "log" in val else False
126-
cs.add_hyperparameter(CSH.UniformIntegerHyperparameter(name=key, lower=lower, upper=upper, log=log))
127-
elif "float" in val:
128-
lower = float(val.split("[")[1].split(",")[0].strip())
129-
upper = float(val.split("]")[0].split(",")[-1].strip())
130-
log = True if "log" in val else False
131-
cs.add_hyperparameter(CSH.UniformFloatHyperparameter(name=key, lower=lower, upper=upper, log=log))
132-
elif "cat" in val:
133-
choices = json.loads("[" + val.split("[")[1].split("]")[0] + "]") # Seems faster than ast.literal_eval (See https://stackoverflow.com/questions/1894269/how-to-convert-string-representation-of-list-to-a-list)
134-
cs.add_hyperparameter(CSH.CategoricalHyperparameter(name=key, choices=choices))
135-
# print(type(CSH.CategoricalHyperparameter(name=key, choices=choices).choices[0]))
40+
random_configs = OrderedDict({
41+
"env": {},
42+
"agent": random_agent_configs,
13643

137-
return cs
44+
})
13845

139-
cs = create_config_space_from_config_dict(var_agent_configs)
140-
print("Agent variable ConfigSpace:")
141-
print(cs)
142-
random_configs = cs.sample_configuration(size=num_agent_configs)
143-
# print("type(random_configs):", type(random_configs))
144-
for i in range(len(random_configs)):
145-
# if random_configs[i].get_dictionary()["train_batch_size"] == 4 \
146-
# and random_configs[i].get_dictionary()["buffer_size"] < 33:
147-
# print("Config:", i, "train_batch_size, buffer_size:", random_configs[i].get_dictionary()["train_batch_size"], random_configs[i].get_dictionary()["buffer_size"])
148-
random_configs[i] = tuple(random_configs[i].get_dictionary().values()) #hack ####TODO Change run_experiments.py and here to directly pass whole config dict to run_experiments.py. Would need to replace in every config.py file.
149-
# print(random_configs)
15046

151-
var_configs = OrderedDict({
152-
"env": var_env_configs,
153-
"agent": var_agent_configs,
47+
sobol_configs = OrderedDict({
48+
"env": sobol_env_configs,
15449

15550
})
15651

52+
15753
env_config = {
15854
"env": "RLToy-v0",
15955
"horizon": 100,
@@ -163,7 +59,7 @@ def create_config_space_from_config_dict(config_dict):
16359
'action_space_type': 'discrete',
16460
'generate_random_mdp': True,
16561
'repeats_in_sequences': False,
166-
'reward_scale': 1.0,
62+
# 'reward_scale': 1.0,
16763
'completely_connected': True,
16864
},
16965
}
@@ -226,7 +122,7 @@ def create_config_space_from_config_dict(config_dict):
226122
"custom_preprocessor": "ohe",
227123
"custom_options": {}, # extra options to pass to your preprocessor
228124
"fcnet_activation": "tanh",
229-
"use_lstm": False,
125+
# "use_lstm": False,
230126
"max_seq_len": 20,
231127
"lstm_cell_size": 256,
232128
"lstm_use_prev_action_reward": False,
@@ -250,13 +146,3 @@ def create_config_space_from_config_dict(config_dict):
250146
}
251147
},
252148
}
253-
254-
# value_tuples = []
255-
# for config_type, config_dict in var_configs.items():
256-
# for key in config_dict:
257-
# assert type(var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
258-
# value_tuples.append(var_configs[config_type][key])
259-
#
260-
# import itertools
261-
# cartesian_product_configs = list(itertools.product(*value_tuples))
262-
# print("Total number of configs. to run:", len(cartesian_product_configs))

experiments/rainbow_hydra_qbert.py

+11-22
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,18 @@
22
timesteps_total = 10_000_000
33
from collections import OrderedDict
44

5-
var_env_configs = OrderedDict({
5+
sobol_env_configs = OrderedDict({
66
'delay': [0],
77
'dummy_seed': [i for i in range(num_seeds)],
88
})
99

10-
var_configs = OrderedDict({
11-
"env": var_env_configs
12-
})
13-
14-
value_tuples = []
15-
for config_type, config_dict in var_configs.items():
16-
for key in config_dict:
17-
assert type(var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
18-
value_tuples.append(var_configs[config_type][key])
10+
sobol_configs = OrderedDict({
11+
"env": sobol_env_configs
1912

20-
import itertools
21-
cartesian_product_configs = list(itertools.product(*value_tuples))
22-
print("Total number of grid configs. to run:", len(cartesian_product_configs))
13+
})
2314

2415

25-
var_agent_configs = OrderedDict({
16+
random_agent_configs = OrderedDict({
2617

2718
"lr": "float, log, [1e-5, 1e-3]", # 1e-4
2819
"learning_starts": "int, [1, 2000]", # 500
@@ -35,7 +26,7 @@
3526

3627
})
3728

38-
var_agent_configs = OrderedDict(sorted(var_agent_configs.items(), key=lambda t: t[0])) #hack because saved configs used below as random_configs are ordered alphabetically.
29+
random_agent_configs = OrderedDict(sorted(random_agent_configs.items(), key=lambda t: t[0])) #hack because ConfigSpace below orders alphabetically, the returned configs are in a jumbled order compared to the order above, which would create problems with config processing.
3930

4031
random_configs = \
4132
[(1.86e-12, 1480, 0.0697, 311, 0.000545, 8, 1845, 64), # top 10 configs begin from here
@@ -59,14 +50,12 @@
5950
(0.0133, 6541, 0.218, 1393, 1.21e-05, 1, 3, 16),
6051
(0.0515, 507, 0.48100000000000004, 1866, 1.23e-05, 3, 136, 128)]
6152

62-
for i in range(len(random_configs)):
63-
random_configs[i] = tuple(random_configs[i]) ##IMP I think these are tuples because cartesian_product_configs by default has tuples.
6453

65-
var_configs = OrderedDict({
66-
"env": var_env_configs,
67-
"agent": var_agent_configs,
68-
69-
})
54+
# var_configs = OrderedDict({
55+
# "env": var_env_configs,
56+
# "agent": var_agent_configs,
57+
#
58+
# })
7059

7160
env_config = {
7261
"env": "GymEnvWrapper-Atari",

0 commit comments

Comments
 (0)