[refactor] Format the repo

nabenabe0928 · nabenabe0928 · commit 354efa4c80a6 · 2022-10-06T04:54:25.000+09:00
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@ incumbents/
 **/.pytest_cache
 .coverage
 figs/
+archive/
 
 build/
 dist/
diff --git a/LICENSE b/LICENSE
@@ -175,7 +175,7 @@
 
    END OF TERMS AND CONDITIONS
 
-   Copyright 2022 AutoML Freiburg and contributors
+   Copyright 2022 Anonymous authors
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/plan.md b/plan.md
diff --git a/run_experiment.sh b/run_experiment.sh
@@ -12,12 +12,12 @@ run_tpe () {
 
     for quantile in 0.10 0.15
     do
-        cmd="${prefix} --warmstart False --metalearn False --quantile ${quantile}"
-        echo $cmd
-        $cmd
-        echo `date '+%y/%m/%d %H:%M:%S'`
+        # cmd="${prefix} --warmstart False --metalearn False --quantile ${quantile}"
+        # echo $cmd
+        # $cmd
+        # echo `date '+%y/%m/%d %H:%M:%S'`
 
-        for warmstart in True False
+        for warmstart in False # True
         do
             cmd="${prefix} --warmstart ${warmstart} --metalearn True --uniform_transform True --quantile ${quantile}"
             echo $cmd
@@ -39,7 +39,7 @@ run_bench () {
     seed=${1}
     bench_name=${2}
     dataset_name=${3}
-    for opt_name in tpe only-warmstart rgpe-parego rgpe-ehvi tstr-parego tstr-ehvi
+    for opt_name in tpe # only-warmstart rgpe-parego rgpe-ehvi tstr-parego tstr-ehvi
     do
         prefix="python run.py --exp_id ${seed} --opt_name ${opt_name} --bench_name ${bench_name} --dataset_name ${dataset_name}"
         if [[ "$opt_name" == "tpe" ]]
diff --git a/viz/run_synthetic.py b/viz/run_synthetic.py
diff --git a/viz/viz_dataset_dist.py b/viz/viz_dataset_dist.py
@@ -0,0 +1,78 @@
+from typing import Dict
+import json
+import pickle
+
+from fast_pareto import nondominated_rank
+
+import matplotlib.pyplot as plt
+
+import numpy as np
+
+from targets.hpolib.api import DatasetChoices as HPOlibChoices
+from targets.nmt_bench.api import DatasetChoices as NMTChoices
+
+
+plt.rcParams["font.family"] = "Times New Roman"
+plt.rcParams["font.size"] = 18
+plt.rcParams["mathtext.fontset"] = "stix"  # The setting of math font
+
+
+def get_nd_rank_for_hpolib(percentile: int) -> Dict[str, np.ndarray]:
+    nd_rank = {}
+    for dataset in HPOlibChoices:
+        print(dataset.name)
+        costs = pickle.load(open(f"targets/hpolib/metric_vals/{dataset.name}.pkl", "rb"))
+        data = np.asarray([costs["valid_mse"], costs["runtime"]]).T
+        nd_rank[dataset.name] = nondominated_rank(costs=data)
+
+    return nd_rank
+
+
+def get_nd_rank_for_nmt(percentile: int) -> Dict[str, np.ndarray]:
+    nd_rank = {}
+    for dataset in NMTChoices:
+        costs = json.load(open(f"nmt-bench/{dataset.value}"))
+        data = np.asarray([costs["bleu"], costs["decoding_time"]]).T
+        nd_rank[dataset.name] = nondominated_rank(costs=data, larger_is_better_objectives=[0])
+
+    return nd_rank
+
+
+def plot_cum(ax: plt.Axes, nd_rank: Dict[str, np.ndarray], percentile: int, set_ylabel: bool) -> None:
+    colors = ["red", "blue", "green", "purple"]
+    for i, (k, v) in enumerate(nd_rank.items()):
+        n_configs = v.size
+        order = np.argsort(v)[:int(n_configs * percentile / 100)]
+        cnt = np.zeros(n_configs)
+        cnt[np.arange(n_configs)[order]] = 1
+        if len(nd_rank) == 4:
+            dataset_name = " ".join([s.capitalize() for s in k.split("_")])
+        else:
+            lang = {"so": "Somali", "sw": "Swahili", "tl": "Tagalog", "en": "English"}
+            dataset_name = " to ".join([lang[s] for s in k.split("_")])
+        ax.plot(np.arange(n_configs), np.cumsum(cnt), label=dataset_name, color=colors[i])
+
+    title = f"Cumulated count of Top-{percentile}% configuration"
+    ax.set_title(title)
+    ax.set_xlabel("Config indices")
+
+    if set_ylabel:
+        ax.set_ylabel("Cumulated count")
+
+    ax.legend()
+    ax.grid()
+
+
+if __name__ == "__main__":
+    _, axes = plt.subplots(
+        figsize=(20, 5),
+        ncols=2,
+        gridspec_kw={"wspace": 0.1},
+    )
+    nd_rank = get_nd_rank_for_hpolib(percentile=1)
+    plot_cum(axes[0], nd_rank, percentile=1, set_ylabel=True)
+
+    nd_rank = get_nd_rank_for_nmt(percentile=5)
+    plot_cum(axes[1], nd_rank, percentile=5, set_ylabel=False)
+
+    plt.savefig("figs/dataset-dist.pdf", bbox_inches="tight")