Skip to content

Commit 9c018e8

Browse files
authored
Merge branch 'main' into deephyper
2 parents 20cb057 + e105db0 commit 9c018e8

File tree

15 files changed

+189
-18
lines changed

15 files changed

+189
-18
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/sh
2+
#SBATCH --partition=general --qos=long
3+
#SBATCH --time=168:00:00
4+
#SBATCH --mincpus=2
5+
#SBATCH --mem=36000
6+
#SBATCH --job-name=lcdbL
7+
#SBATCH --output=lcdbL%a.txt
8+
#SBATCH --error=lcdbL%a.txt
9+
#SBATCH --array=1-83
10+
ulimit -n 8000
11+
cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/
12+
rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing
13+
cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/
14+
srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_large.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/sh
2+
#SBATCH --partition=general --qos=long
3+
#SBATCH --time=168:00:00
4+
#SBATCH --mincpus=2
5+
#SBATCH --mem=12000
6+
#SBATCH --job-name=lcdbM
7+
#SBATCH --output=lcdbM%a.txt
8+
#SBATCH --error=lcdbM%a.txt
9+
#SBATCH --array=1-146
10+
ulimit -n 8000
11+
cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/
12+
rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing
13+
cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/
14+
srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_medium.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/sh
2+
#SBATCH --partition=general --qos=long
3+
#SBATCH --time=168:00:00
4+
#SBATCH --mincpus=2
5+
#SBATCH --mem=6000
6+
#SBATCH --job-name=lcdbS
7+
#SBATCH --output=lcdbS%a.txt
8+
#SBATCH --error=lcdbS%a.txt
9+
#SBATCH --array=1-115
10+
ulimit -n 8000
11+
cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/
12+
rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing
13+
cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/
14+
srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_small.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}"
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import json
2+
import pymysql
3+
import pandas as pd
4+
import time
5+
6+
pw2 = 'database_password'
7+
8+
def postprocess_table(table_name):
9+
10+
cnx = pymysql.connect(host='lcdb_experiments.ewi.tudelft.nl', user='lcdb', passwd=pw2, db='db_lcdb')
11+
query = '''select * from %s where postprocess=1;''' % table_name
12+
to_process = pd.read_sql_query(query, cnx)
13+
14+
print('found %d rows for processing...' % len(to_process))
15+
16+
query_list = []
17+
18+
for i in range(0, len(to_process)):
19+
print('working on row %d' % i)
20+
21+
row = to_process.iloc[i]
22+
23+
query = '''select * from %s where workflow='%s' and openmlid=%d and hyperparameters='%s' and status='created';''' % (
24+
table_name, row.workflow, row.openmlid, row.hyperparameters)
25+
26+
datas = pd.read_sql_query(query, cnx)
27+
if len(datas) < 1:
28+
print('this row doesnt have any jobs remaining... too bad!')
29+
else:
30+
trainsize_small = json.loads(row.train_sizes)[0]
31+
32+
trainsizes_todo = []
33+
for train_size in datas['train_sizes'].unique():
34+
train_size_ = json.loads(train_size)
35+
if train_size_[0] > trainsize_small:
36+
trainsizes_todo.append(train_size)
37+
38+
for trainsize in trainsizes_todo:
39+
query_list.append(
40+
'''update %s set status='skipped' where workflow='%s' and openmlid=%d and hyperparameters='%s' and status='created' and train_sizes='%s';''' % (
41+
table_name, row.workflow, row.openmlid, row.hyperparameters, trainsize))
42+
43+
query_list.append('''update %s set postprocess=0 where id=%d''' % (table_name, row.ID))
44+
45+
print('I have to execute %d queries... Lets get to work!' % len(query_list))
46+
47+
affected_rows = []
48+
if len(query_list) > 0:
49+
cursor = cnx.cursor()
50+
for query in query_list:
51+
print('performing query: %s' % query)
52+
tmp = (cursor.execute(query))
53+
print('rows affected: %d' % tmp)
54+
affected_rows.append(tmp)
55+
cursor.close()
56+
cnx.commit()
57+
cnx.close()
58+
59+
60+
while True:
61+
try:
62+
print('trying small...')
63+
postprocess_table('jobs_small')
64+
print('trying medium...')
65+
postprocess_table('jobs_medium')
66+
print('trying large...')
67+
postprocess_table('jobs_large')
68+
except Exception as e:
69+
print('failed with error %s' % str(e))
70+
print('going to sleep for 5 min...')
71+
time.sleep(60*5)

publications/2023-neurips/config/knn_large.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_large
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.KNNWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/config/knn_medium.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_medium
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.KNNWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/config/knn_small.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_small
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.KNNWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/config/liblinear_large.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_large
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.LibLinearWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/config/liblinear_medium.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_medium
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.LibLinearWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/config/liblinear_small.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_small
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.LibLinearWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/config/libsvm_large.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_large
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.LibSVMWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/config/libsvm_medium.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_medium
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.LibSVMWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/config/libsvm_small.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ provider = mysql
44
database = db_lcdb
55
table = jobs_small
66

7+
n_jobs = 2
8+
79
# train_size and hyperparameters are omitted since they are computed automatically
810
keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
911
workflow = lcdb.workflow.sklearn.LibSVMWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
1921
maxruntime = 1800
2022
measure_memory = 0
2123

22-
resultfields = result:LONGTEXT
24+
resultfields = result:LONGTEXT, postprocess:boolean
2325
resultfields.timestamps = false

publications/2023-neurips/lcdb/cli/_create.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,5 @@ def main(
8888
pd.DataFrame(configs, columns=skopt_space.dimension_names).to_csv(
8989
output_file, index=False
9090
)
91-
9291
if verbose:
9392
print(f"Experiments written to {output_file}")

publications/2023-neurips/lcdb/workflow/_util.py

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ def get_all_experiments(
245245
seed: int,
246246
max_num_anchors_per_row: int,
247247
LHS: bool,
248+
random_hps_per_dataset: bool,
248249
) -> List[Dict]:
249250
"""Create a sample of experimental configurations for a given workflow.
250251
@@ -264,14 +265,54 @@ def get_all_experiments(
264265
max_num_anchors_per_row=max_num_anchors_per_row,
265266
)
266267

267-
# import the workflow class
268-
workflow_path = config.get("PY_EXPERIMENTER", "workflow")
269-
workflow_class = import_attr_from_module(workflow_path)
270-
271-
config_space = workflow_class.get_config_space()
272-
default_config = get_default_config(config_space)
273-
274-
config_space.seed(seed)
268+
df_experiments_grouped = df_experiments.groupby("openmlid")
269+
270+
experiments = []
271+
272+
for name, group in df_experiments_grouped:
273+
print('working on dataset %d...' % name)
274+
# import the workflow class
275+
workflow_path = config.get("PY_EXPERIMENTER", "workflow")
276+
workflow_class = import_attr_from_module(workflow_path)
277+
278+
config_space = workflow_class.get_config_space()
279+
default_config = get_default_config(config_space)
280+
281+
seed_post_processed = seed
282+
if random_hps_per_dataset:
283+
seed_post_processed = seed_post_processed + int(name)
284+
config_space.seed(seed_post_processed)
285+
286+
if LHS:
287+
print('using LHS with seed %d...' % seed_post_processed)
288+
lhs_generator = LHSGenerator(config_space, n=num_configs, seed=seed)
289+
hp_samples = lhs_generator.generate()
290+
else:
291+
print('using random sampling with seed %d...' % seed_post_processed)
292+
hp_samples = config_space.sample_configuration(num_configs)
293+
if num_configs == 1:
294+
hp_samples = [hp_samples]
295+
hp_samples.insert(0, default_config)
296+
297+
# create all rows for the experiments
298+
experiments = experiments + [
299+
{
300+
"workflow": workflow_path,
301+
"openmlid": openmlid,
302+
"valid_prop": v_p,
303+
"test_prop": t_p,
304+
"seed_outer": s_o,
305+
"seed_inner": s_i,
306+
"train_sizes": train_sizes,
307+
"maxruntime": maxruntime,
308+
"hyperparameters": dict(hp),
309+
"monotonic": mon,
310+
"measure_memory": measure_memory,
311+
}
312+
for (openmlid, v_p, t_p, s_o, s_i, train_sizes, mon, maxruntime, measure_memory), hp in it.product(
313+
group.values, hp_samples
314+
)
315+
]
275316

276317
if LHS:
277318
print("using LHS...")

0 commit comments

Comments
 (0)