automl · Neeratyoy · Mar 26, 2021 · Apr 14, 2021 · Apr 21, 2021 · Apr 28, 2021
diff --git a/.github/workflows/run_singularity_versions.yml b/.github/workflows/run_singularity_versions.yml
@@ -0,0 +1,57 @@
+name: Test Support for different Singularity Versions
+
+on: [push]
+
+jobs:
+  Tests:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        include:
+          - python-version: 3.7
+            DISPLAY_NAME: "Singularity Container Examples with S3.5"
+            RUN_CONTAINER_EXAMPLES: true
+            USE_SINGULARITY: false
+            SINGULARITY_VERSION: "3.5"
+          - python-version: 3.7
+            DISPLAY_NAME: "Singularity Container Examples with S3.6"
+            RUN_CONTAINER_EXAMPLES: true
+            USE_SINGULARITY: false
+            SINGULARITY_VERSION: "3.6"
+          - python-version: 3.7
+            DISPLAY_NAME: "Singularity Container Examples with S3.7"
+            RUN_CONTAINER_EXAMPLES: true
+            USE_SINGULARITY: false
+            SINGULARITY_VERSION: "3.7"
+
+      fail-fast: false
+
+    name: Tests ${{ matrix.python-version }} ${{ matrix.DISPLAY_NAME }}
+
+    env:
+      RUN_TESTS: ${{ matrix.RUN_TESTS }}
+      USE_SINGULARITY: ${{ matrix.USE_SINGULARITY }}
+      RUN_CODECOV: ${{ matrix.RUN_CODECOV }}
+      RUN_CODESTYLE: ${{ matrix.RUN_CODESTYLE }}
+      RUN_CONTAINER_EXAMPLES: ${{ matrix.RUN_CONTAINER_EXAMPLES }}
+      RUN_LOCAL_EXAMPLES: ${{ matrix.RUN_LOCAL_EXAMPLES }}
+      SINGULARITY_VERSION: ${{ matrix.SINGULARITY_VERSION }}
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Set up Go for Singularity
+      if: matrix.USE_SINGULARITY == true
+      uses: actions/setup-go@v2
+      with:
+        go-version: '1.14.15' # The Go version to download (if necessary) and use.
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        chmod +x ci_scripts/install_singularity.sh && source ./ci_scripts/install_singularity.sh
+    - name: Run Tests
+      run: chmod +x ci_scripts/script.sh && source ./ci_scripts/script.sh
diff --git a/README.md b/README.md
@@ -35,11 +35,14 @@ Further requirements are: [ConfigSpace](https://github.com/automl/ConfigSpace),
  This can be arbitrarily complex and further information can be found in the docstring of the benchmark.
 
 A simple example is the XGBoost benchmark which can be installed with `pip install .[xgboost]`
+
 ```python
-from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark
+from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark
+
 b = XGBoostBenchmark(task_id=167149)
 config = b.get_configuration_space(seed=1).sample_configuration()
-result_dict = b.objective_function(configuration=config, fidelity={"n_estimators": 128, "dataset_fraction": 0.5}, rng=1)
+result_dict = b.objective_function(configuration=config,
+                                   fidelity={"n_estimators": 128, "dataset_fraction": 0.5}, rng=1)
 
 ```
 
@@ -71,6 +74,7 @@ pip install .
 | NASBench1shot1SearchSpace*Benchmark | nasbench_1shot1  | Loading may take several minutes. There are 3 benchmarks in total (1,2,3) |
 | ParamNet*OnStepsBenchmark       | paramnet         | There are 6 benchmarks in total (Adult, Higgs, Letter, Mnist, Optdigits, Poker) |
 | ParamNet*OnTimeBenchmark        | paramnet         | There are 6 benchmarks in total (Adult, Higgs, Letter, Mnist, Optdigits, Poker) |
+| SurrogateSVMBenchmark              | surrogate_svm      | Random Forest Surrogate of a SVM on MNIST | 
 | Learna⁺                            | learna_benchmark   | Not deterministic.                    |
 | MetaLearna⁺                        | learna_benchmark   | Not deterministic.                    |
 | XGBoostBenchmark⁺                  | xgboost_benchmark  | Works with OpenML task ids. |
@@ -88,8 +92,8 @@ pip install .
 All of HPOBench's settings are stored in a file, the `hpobenchrc`-file. 
 It is a yaml file, which is automatically generated at the first use of HPOBench. 
 By default, it is placed in `$XDG_CONFIG_HOME`. If `$XDG_CONFIG_HOME` is not set, then the
-`hpobenchrc`-file is saved to `'~/.config/hpobench'`.
-Make sure to have write permissions in this directory. 
+`hpobenchrc`-file is saved to `'~/.config/hpobench'`. When using the containerized benchmarks, the unix socket is 
+defined via `$TEMP_DIR`. This is by default `\tmp`. Make sure to have write permissions in those directories. 
 
 In the `hpobenchrc`, you can specify for example the directory, in that the benchmark-containers are
 downloaded. We encourage you to take a look into the `hpobenchrc`, to find out more about all
@@ -136,6 +140,11 @@ OpenML data additionally maintains it's own cache which is located at `~/.openml
 
 Singularity additionally maintains it's own cache which can be removed with `singularity cache clean`
 
+#### Use HPOBench benchmarks
+
+If you use a benchmark in your experiments, please specify the version number of the HPOBench as well as the version of 
+the used container. When starting an experiment, HPOBench writes automatically the 2 version numbers to the log. 
+
 ### Troubleshooting
 
   - **Singularity throws an 'Invalid Image format' exception**

diff --git a/changelog.md b/changelog.md
@@ -1,3 +1,15 @@
+# 0.0.8
+  * Improve container integration
+    The containers had some problems when the file system was read-only. In this case, the home directory, which contains the 
+    hpobenchrc file, was not mounted, so the init of the containers failed. To circumvent this problem, we make multiple
+    changes: 
+    * We introduce an option to mount additional folder. This might be helpful when working on a cluster where the home 
+      directory is not available in the computation process.
+    * We also change the configuration file. The container does not read the yaml file anymore. Instead we bind the 
+      cache dir, data dir and socket dir into the container and let the container use them directly. We also remove the 
+      global data directory and use only the data dir from now onwards.
+  * Add the surrogate SVM on MNIST benchmark from the BOHB paper. 
+
 # 0.0.7
   * Fix an error in the NASBench1shot1 Benchmark (SearchSpace3).
   * Improve the behavior when a benchmark container is shut down.
@@ -30,7 +42,6 @@
   * Nas1shot1 and Nas101 take as as input parameter now a seed.
   * The config file is now based on yaml. Also, it automatically raises a warning if the configuration file-version 
     does not match the HPOBench-version.
-
 
 # 0.0.5
   * Rename package to HPOBench

diff --git a/ci_scripts/install_singularity.sh b/ci_scripts/install_singularity.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env sh
+
+echo "Install Singularity"
+
+sudo apt-get update && sudo apt-get install -y \
+  build-essential \
+  libssl-dev \
+  uuid-dev \
+  libgpgme11-dev \
+  squashfs-tools \
+  libseccomp-dev \
+  wget \
+  pkg-config \
+  git \
+  cryptsetup
+
+if [[ "$SINGULARITY_VERSION" == "3.5" ]]; then
+    export VERSION=3.5.3
+elif [[ "$SINGULARITY_VERSION" == "3.6" ]]; then
+    export VERSION=3.6.4
+elif [[ "$SINGULARITY_VERSION" == "3.7" ]]; then
+    export VERSION=3.7.3
+else
+    echo "Skip installing Singularity"
+fi
+
+wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-${VERSION}.tar.gz && \
+tar -xzf singularity-${VERSION}.tar.gz && \
+cd ./singularity && \
+./mconfig && \
+make -C builddir && \
+sudo make -C builddir install
+
+cd ..
+pip install .[singulaity]
diff --git a/examples/local/xgboost_local.py b/examples/local/xgboost_local.py
@@ -10,7 +10,7 @@
 import argparse
 from time import time
 
-from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
 from hpobench.util.openml_data_manager import get_openmlcc18_taskids
 
 

diff --git a/hpobench/__init__.py b/hpobench/__init__.py
@@ -1,4 +1,10 @@
-__contact__ = "automl.org"
+import logging
+
+_default_log_format = '[%(levelname)s] %(name)s at %(asctime)s --- %(message)s'
+logging.basicConfig(format=_default_log_format, level=logging.WARNING)
+root_logger = logging.getLogger()
 
-from hpobench.__version__ import __version__  # noqa: F401
-from hpobench.config import config_file  # noqa: F401
+from hpobench.__version__ import __version__  # noqa: F401, E402
+from hpobench.config import config_file  # noqa: F401, E402
+
+__contact__ = "automl.org"
diff --git a/hpobench/__version__.py b/hpobench/__version__.py
@@ -1 +1 @@
-__version__ = '0.0.7'
+__version__ = '0.0.8dev'
diff --git a/hpobench/abstract_benchmark.py b/hpobench/abstract_benchmark.py
@@ -226,12 +226,17 @@ def get_configuration_space(seed: Union[int, None] = None) -> ConfigSpace.Config
 
     @staticmethod
     @abc.abstractmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+    def get_fidelity_space(
+            seed: Union[int, None] = None, fidelity_choice: Union[int, None] = None
+    ) -> ConfigSpace.ConfigurationSpace:
         """ Defines the available fidelity parameters as a "fidelity space" for each benchmark.
         Parameters
         ----------
         seed: int, None
             Seed for the fidelity space.
+        fidelity_choice: int, None
+            integer value to choose the type of fidelity space
+
         Returns
         -------
         ConfigSpace.ConfigurationSpace

diff --git a/hpobench/benchmarks/ml/README.md b/hpobench/benchmarks/ml/README.md
@@ -0,0 +1,29 @@
+Each function evalution returns a dictionary with the following information:
+
+```
+└───function_value: 1 - accuracy (acc.) on validation set
+└───cost: time to fit model + time to evaluate acc. training set + time to evaluate acc. validation set
+└───info: dictionary (dict) with miscellaneous information
+|   └───train_loss: 1 - accuracy (acc.) on training set
+|   └───val_loss: 1 - accuracy (acc.) on validation set
+|   └───model_cost: time taken to fit the model
+|   └───train_scores: performance on all metrics over the training set (dict)
+|   |   └───f1: F1-score   
+|   |   └───acc: Accuracy
+|   |   └───bal_acc: Balanced accuracy
+|   └───train_costs: time taken to compute performance on all metrics over the training set (dict)
+|   |   └───f1: F1-score   
+|   |   └───acc: Accuracy
+|   |   └───bal_acc: Balanced accuracy 
+|   └───valid_scores: performance on all metrics over the validation set (dict)
+|   |   └───...
+|   └───valid_costs: time taken to compute performance on all metrics over the validation set (dict)
+|   |   └───...
+|   └───test_scores: performance on all metrics over the test set
+|   |   └───...
+|   └───test_costs: time taken to compute performance on all metrics over the test set (dict)
+|   |   └───...
+```
+
+*NOTE*: the keys `function_value`, `cost`, `info` need to exist when creating a new objective 
+function, while `info` can house any kind of auxilliary information required.
diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
@@ -0,0 +1,7 @@
+from .tabular_benchmark import TabularBenchmark
+from .svm_benchmark import SVMBenchmark
+from .rf_benchmark import RandomForestBenchmark
+from .xgboost_benchmark import XGBoostBenchmark
+from .histgb_benchmark import HistGBBenchmark
+from .lr_benchmark import LRBenchmark
+from .nn_benchmark import NNBenchmark
diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -0,0 +1,102 @@
+import numpy as np
+import ConfigSpace as CS
+from copy import deepcopy
+from typing import Union
+
+# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingClassifier
+
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+
+
+class HistGBBenchmark(MLBenchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            data_path: Union[str, None] = None
+    ):
+        super(HistGBBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice, data_path)
+        pass
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformIntegerHyperparameter(
+                'max_depth', lower=6, upper=30, default_value=6, log=True
+            ),
+            CS.UniformIntegerHyperparameter(
+                'max_leaf_node', lower=2, upper=64, default_value=32, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'eta', lower=2**-10, upper=1, default_value=0.1, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'l2_regularization', lower=2**-10, upper=2**10, default_value=0.1, log=True
+            )
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        fidelity1 = dict(
+            fixed=CS.Constant('n_estimators', value=100),
+            variable=CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=100, upper=1000, default_value=1000, log=False
+            )
+        )
+        fidelity2 = dict(
+            fixed=CS.Constant('subsample', value=1),
+            variable=CS.UniformFloatHyperparameter(
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
+            )
+        )
+        if fidelity_choice == 0:
+            # black-box setting (full fidelity)
+            ntrees = fidelity1["fixed"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 1:
+            # gray-box setting (multi-fidelity) - ntrees
+            ntrees = fidelity1["variable"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 2:
+            # gray-box setting (multi-fidelity) - data subsample
+            ntrees = fidelity1["fixed"]
+            subsample = fidelity2["variable"]
+        else:
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            ntrees = fidelity1["variable"]
+            subsample = fidelity2["variable"]
+        z_cs.add_hyperparameters([ntrees, subsample])
+        return z_cs
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
+        """
+        rng = self.rng if rng is None else rng
+        model = HistGradientBoostingClassifier(
+            **config.get_dictionary(),
+            max_iter=fidelity['n_estimators'],  # a fidelity being used during initialization
+            early_stopping=False,
+            random_state=rng
+        )
+        return model