From b2155ddb062a270b11f4d9c63724200e03827040 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 23 Jun 2021 20:32:38 +0200
Subject: [PATCH 01/95] Adding sample RF space for tabular collection design

---
 hpobench/benchmarks/ml/rf_benchmark.py | 391 +++++++++++++++++++++++++
 1 file changed, 391 insertions(+)
 create mode 100644 hpobench/benchmarks/ml/rf_benchmark.py

diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
new file mode 100644
index 00000000..35684c00
--- /dev/null
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -0,0 +1,391 @@
+import time
+import openml
+import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, make_scorer
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+
+
+class RandomForestBenchmark(AbstractBenchmark):
+    _issue_tasks = [3917, 3945]
+
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            benchmark_type: str = "raw"
+    ):
+        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
+        self.rng = check_random_state(self.seed)
+        super(RandomForestBenchmark, self).__init__(rng=seed)
+
+        self.benchmark_type = benchmark_type
+        self.task_id = task_id
+        self.valid_size = valid_size
+        self.accuracy_scorer = make_scorer(accuracy_score)
+
+        # Data variables
+        self.train_X = None
+        self.valid_X = None
+        self.test_X = None
+        self.train_y = None
+        self.valid_y = None
+        self.test_y = None
+        self.train_idx = None
+        self.test_idx = None
+        self.task = None
+        self.dataset = None
+        self.preprocessor = None
+        self.lower_bound_train_size = None
+        self.load_data_from_openml()
+
+        # Observation and fidelity spaces
+        self.fidelity_choice = fidelity_choice
+        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
+        self.x_cs = self.get_configuration_space(self.seed)
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformIntegerHyperparameter(
+                'max_depth', lower=1, upper=15, default_value=2, log=False
+            ),
+            CS.UniformIntegerHyperparameter(
+                'min_samples_split', lower=2, upper=128, default_value=2, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'max_features', lower=0.1, upper=0.9, default_value=0.5, log=False
+            ),
+            CS.UniformIntegerHyperparameter(
+                'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
+            ),
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+        if fidelity_choice == 0:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 1:
+            # only n_estimators as fidelity
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 2:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+            )
+        else:
+            # both n_estimators and subsample as fidelities
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+            )
+        z_cs.add_hyperparameters([ntrees, subsample])
+        return z_cs
+
+    def get_config(self, size=None):
+        """Samples configuration(s) from the (hyper) parameter space
+        """
+        if size is None:  # return only one config
+            return self.x_cs.sample_configuration()
+        return [self.x_cs.sample_configuration() for i in range(size)]
+
+    def get_fidelity(self, size=None):
+        """Samples candidate fidelities from the fidelity space
+        """
+        if size is None:  # return only one config
+            return self.z_cs.sample_configuration()
+        return [self.z_cs.sample_configuration() for i in range(size)]
+
+    def _convert_labels(self, labels):
+        """Converts boolean labels (if exists) to strings
+        """
+        label_types = list(map(lambda x: isinstance(x, bool), labels))
+        if np.all(label_types):
+            _labels = list(map(lambda x: str(x), labels))
+            if isinstance(labels, pd.Series):
+                labels = pd.Series(_labels, index=labels.index)
+            elif isinstance(labels, np.array):
+                labels = np.array(labels)
+        return labels
+
+    def load_data_from_openml(self, valid_size=None, verbose=False):
+        """Fetches data from OpenML and initializes the train-validation-test data splits
+
+        The validation set is fixed till this function is called again or explicitly altered
+        """
+        # fetches task
+        self.task = openml.tasks.get_task(self.task_id, download_data=False)
+        # fetches dataset
+        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
+        if verbose:
+            print(self.task, '\n')
+            print(self.dataset, '\n')
+
+        # loads full data
+        X, y, categorical_ind, feature_names = self.dataset.get_data(
+            target=self.task.target_name, dataset_format="dataframe"
+        )
+        categorical_ind = np.array(categorical_ind)
+        (cat_idx,) = np.where(categorical_ind)
+        (cont_idx,) = np.where(~categorical_ind)
+
+        # splitting dataset into train and test (10% test)
+        # train-test split is fixed for a task and its associated dataset
+        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
+        train_x = X.iloc[self.train_idx]
+        train_y = y.iloc[self.train_idx]
+        self.test_X = X.iloc[self.test_idx]
+        self.test_y = y.iloc[self.test_idx]
+
+        # splitting training into training and validation
+        # validation set is fixed till this function is called again or explicitly altered
+        valid_size = self.valid_size if valid_size is None else valid_size
+        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
+            train_x, train_y, test_size=valid_size,
+            shuffle=True, stratify=train_y, random_state=self.rng
+        )
+
+        # preprocessor to handle missing values, categorical columns encodings,
+        # and scaling numeric columns
+        self.preprocessor = make_pipeline(
+            ColumnTransformer([
+                (
+                    "cat",
+                    make_pipeline(SimpleImputer(strategy="most_frequent"),
+                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
+                    cat_idx.tolist(),
+                ),
+                (
+                    "cont",
+                    make_pipeline(SimpleImputer(strategy="median"),
+                                  StandardScaler()),
+                    cont_idx.tolist(),
+                )
+            ])
+        )
+        if verbose:
+            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
+
+        # preprocessor fit only on the training set
+        self.train_X = self.preprocessor.fit_transform(self.train_X)
+        # applying preprocessor built on the training set, across validation and test splits
+        self.valid_X = self.preprocessor.transform(self.valid_X)
+        self.test_X = self.preprocessor.transform(self.test_X)
+        # converting boolean labels to strings
+        self.train_y = self._convert_labels(self.train_y)
+        self.valid_y = self._convert_labels(self.valid_y)
+        self.test_y = self._convert_labels(self.test_y)
+
+        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
+        # use 10 times the number of classes as lower bound for the dataset fraction
+        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+
+        if verbose:
+            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
+
+        if verbose:
+            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
+            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
+            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
+            print("\nData loading complete!\n")
+        return
+
+    def shuffle_data_idx(self, train_id=None, ng=None):
+        rng = self.rng if rng is None else rng
+        train_idx = self.train_idx if train_idx is None else train_idx
+        rng.shuffle(train_idx)
+        return train_idx
+
+    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+        start = time.time()
+
+        # initializing model
+        model = RandomForestClassifier(
+            **config.get_dictionary(),
+            n_estimators=fidelity['n_estimators'],  # a fidelity being used during initialization
+            bootstrap=True,
+            random_state=self.rng
+        )
+
+        # preparing data
+        if eval == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+            train_idx = self.train_idx
+        else:
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+            train_idx = np.arange(len(train_X))
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here
+        # application of the other fidelity to the dataset that the model interfaces
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                fidelity['subsample'] * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        model.fit(train_X[train_idx], train_y.iloc[train_idx])
+        # computing statistics on training data
+        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
+
+        model_fit_time = time.time() - start
+        return model, model_fit_time, train_loss
+
+    def objective(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    def objective_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng, eval="test"
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        return dict()
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        return dict()
+
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        pass

From ce405e6bc43ea926c3786e1564a1e9b61d3754a3 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 23 Jun 2021 20:57:37 +0200
Subject: [PATCH 02/95] Placeholder SVM benchmark to interface tabular data
 collection

---
 hpobench/benchmarks/ml/svm_benchmark_2.py | 371 ++++++++++++++++++++++
 1 file changed, 371 insertions(+)
 create mode 100644 hpobench/benchmarks/ml/svm_benchmark_2.py

diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
new file mode 100644
index 00000000..6e8ec6c9
--- /dev/null
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -0,0 +1,371 @@
+import time
+import openml
+import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
+from sklearn.svm import SVC
+from sklearn.impute import SimpleImputer
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline, Pipeline
+from sklearn.metrics import accuracy_score, make_scorer
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+
+
+class SVMBenchmark(AbstractBenchmark):
+    _issue_tasks = [3917, 3945]
+
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            benchmark_type: str = "raw"
+    ):
+        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
+        self.rng = check_random_state(self.seed)
+        super(SVMBenchmark, self).__init__(rng=seed)
+
+        self.benchmark_type = benchmark_type
+        self.task_id = task_id
+        self.valid_size = valid_size
+        self.accuracy_scorer = make_scorer(accuracy_score)
+        #TODO: check the cache_size parameter from sklearn docs
+        self.cache_size = 200
+
+        # Data variables
+        self.train_X = None
+        self.valid_X = None
+        self.test_X = None
+        self.train_y = None
+        self.valid_y = None
+        self.test_y = None
+        self.train_idx = None
+        self.test_idx = None
+        self.task = None
+        self.dataset = None
+        self.preprocessor = None
+        self.lower_bound_train_size = None
+        self.load_data_from_openml()
+
+        # Observation and fidelity spaces
+        self.fidelity_choice = fidelity_choice
+        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
+        self.x_cs = self.get_configuration_space(self.seed)
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformFloatHyperparameter(
+                'C', lower=-10., upper=10., default_value=0., log=False
+            ),
+            CS.UniformFloatHyperparameter(
+                'gamma', lower=-10., upper=10., default_value=1., log=False
+            ),
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=None):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        For SVM, only a single fidelity exists, i.e., subsample fraction.
+        if fidelity_choice == 0
+            uses the entire data (subsample=1), reflecting the black-box setup
+        else
+            parameterizes the fraction of data to subsample
+
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+        if fidelity_choice == 0:
+            subsample = CS.Constant('subsample', value=1)
+        else:
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+            )
+        z_cs.add_hyperparameter(subsample)
+        return z_cs
+
+    def get_config(self, size=None):
+        """Samples configuration(s) from the (hyper) parameter space
+        """
+        if size is None:  # return only one config
+            return self.x_cs.sample_configuration()
+        return [self.x_cs.sample_configuration() for i in range(size)]
+
+    def get_fidelity(self, size=None):
+        """Samples candidate fidelities from the fidelity space
+        """
+        if size is None:  # return only one config
+            return self.z_cs.sample_configuration()
+        return [self.z_cs.sample_configuration() for i in range(size)]
+
+    def _convert_labels(self, labels):
+        """Converts boolean labels (if exists) to strings
+        """
+        label_types = list(map(lambda x: isinstance(x, bool), labels))
+        if np.all(label_types):
+            _labels = list(map(lambda x: str(x), labels))
+            if isinstance(labels, pd.Series):
+                labels = pd.Series(_labels, index=labels.index)
+            elif isinstance(labels, np.array):
+                labels = np.array(labels)
+        return labels
+
+    def load_data_from_openml(self, valid_size=None, verbose=False):
+        """Fetches data from OpenML and initializes the train-validation-test data splits
+
+        The validation set is fixed till this function is called again or explicitly altered
+        """
+        # fetches task
+        self.task = openml.tasks.get_task(self.task_id, download_data=False)
+        # fetches dataset
+        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
+        if verbose:
+            print(self.task, '\n')
+            print(self.dataset, '\n')
+
+        # loads full data
+        X, y, categorical_ind, feature_names = self.dataset.get_data(
+            target=self.task.target_name, dataset_format="dataframe"
+        )
+        categorical_ind = np.array(categorical_ind)
+        (cat_idx,) = np.where(categorical_ind)
+        (cont_idx,) = np.where(~categorical_ind)
+
+        # splitting dataset into train and test (10% test)
+        # train-test split is fixed for a task and its associated dataset
+        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
+        train_x = X.iloc[self.train_idx]
+        train_y = y.iloc[self.train_idx]
+        self.test_X = X.iloc[self.test_idx]
+        self.test_y = y.iloc[self.test_idx]
+
+        # splitting training into training and validation
+        # validation set is fixed till this function is called again or explicitly altered
+        valid_size = self.valid_size if valid_size is None else valid_size
+        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
+            train_x, train_y, test_size=valid_size,
+            shuffle=True, stratify=train_y, random_state=self.rng
+        )
+
+        # preprocessor to handle missing values, categorical columns encodings,
+        # and scaling numeric columns
+        self.preprocessor = make_pipeline(
+            ColumnTransformer([
+                (
+                    "cat",
+                    make_pipeline(SimpleImputer(strategy="most_frequent"),
+                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
+                    cat_idx.tolist(),
+                ),
+                (
+                    "cont",
+                    make_pipeline(SimpleImputer(strategy="median"),
+                                  StandardScaler()),
+                    cont_idx.tolist(),
+                )
+            ])
+        )
+        if verbose:
+            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
+
+        # preprocessor fit only on the training set
+        self.train_X = self.preprocessor.fit_transform(self.train_X)
+        # applying preprocessor built on the training set, across validation and test splits
+        self.valid_X = self.preprocessor.transform(self.valid_X)
+        self.test_X = self.preprocessor.transform(self.test_X)
+        # converting boolean labels to strings
+        self.train_y = self._convert_labels(self.train_y)
+        self.valid_y = self._convert_labels(self.valid_y)
+        self.test_y = self._convert_labels(self.test_y)
+
+        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
+        # use 10 times the number of classes as lower bound for the dataset fraction
+        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+
+        if verbose:
+            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
+
+        if verbose:
+            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
+            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
+            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
+            print("\nData loading complete!\n")
+        return
+
+    def shuffle_data_idx(self, train_id=None, ng=None):
+        rng = self.rng if rng is None else rng
+        train_idx = self.train_idx if train_idx is None else train_idx
+        rng.shuffle(train_idx)
+        return train_idx
+
+    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+        start = time.time()
+
+        # initializing model
+        rng = self.rng if rng is None else rng
+        config = config.get_dictionary()
+        for k, v in config.items():
+            config[k] = np.exp(float(v))
+        model = SVC(
+            **config,
+            random_state=rng,
+            cache_size=self.cache_size
+
+        )
+
+        # preparing data
+        if eval == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+            train_idx = self.train_idx
+        else:
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+            train_idx = np.arange(len(train_X))
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here
+        # application of the other fidelity to the dataset that the model interfaces
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                fidelity['subsample'] * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        model.fit(train_X[train_idx], train_y.iloc[train_idx])
+        # computing statistics on training data
+        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
+
+        model_fit_time = time.time() - start
+        return model, model_fit_time, train_loss
+
+    def objective(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    def objective_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng, eval="test"
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        return dict()
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        return dict()
+
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        pass

From 2ef3af8019bf6ab2531610b8299f57bcb1148ef7 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 24 Jun 2021 14:38:56 +0200
Subject: [PATCH 03/95] Writing common ML benchmark class for tabular
 collection

---
 .../benchmarks/ml/ml_benchmark_template.py    | 347 ++++++++++++++++++
 hpobench/benchmarks/ml/rf_benchmark.py        | 300 +--------------
 hpobench/benchmarks/ml/svm_benchmark_2.py     | 315 +---------------
 3 files changed, 376 insertions(+), 586 deletions(-)
 create mode 100644 hpobench/benchmarks/ml/ml_benchmark_template.py

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
new file mode 100644
index 00000000..0891f0fe
--- /dev/null
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -0,0 +1,347 @@
+import time
+import openml
+import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, make_scorer
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+
+
+class Benchmark(AbstractBenchmark):
+    _issue_tasks = [3917, 3945]
+
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            benchmark_type: str = "raw"
+    ):
+        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
+        self.rng = check_random_state(self.seed)
+        super(Benchmark, self).__init__(rng=seed)
+
+        self.benchmark_type = benchmark_type
+        self.task_id = task_id
+        self.valid_size = valid_size
+        self.accuracy_scorer = make_scorer(accuracy_score)
+
+        # Data variables
+        self.train_X = None
+        self.valid_X = None
+        self.test_X = None
+        self.train_y = None
+        self.valid_y = None
+        self.test_y = None
+        self.train_idx = None
+        self.test_idx = None
+        self.task = None
+        self.dataset = None
+        self.preprocessor = None
+        self.lower_bound_train_size = None
+        self.load_data_from_openml()
+
+        # Observation and fidelity spaces
+        self.fidelity_choice = fidelity_choice
+        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
+        self.x_cs = self.get_configuration_space(self.seed)
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        raise NotImplementedError()
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        raise NotImplementedError()
+
+    def get_config(self, size=None):
+        """Samples configuration(s) from the (hyper) parameter space
+        """
+        if size is None:  # return only one config
+            return self.x_cs.sample_configuration()
+        return [self.x_cs.sample_configuration() for i in range(size)]
+
+    def get_fidelity(self, size=None):
+        """Samples candidate fidelities from the fidelity space
+        """
+        if size is None:  # return only one config
+            return self.z_cs.sample_configuration()
+        return [self.z_cs.sample_configuration() for i in range(size)]
+
+    def _convert_labels(self, labels):
+        """Converts boolean labels (if exists) to strings
+        """
+        label_types = list(map(lambda x: isinstance(x, bool), labels))
+        if np.all(label_types):
+            _labels = list(map(lambda x: str(x), labels))
+            if isinstance(labels, pd.Series):
+                labels = pd.Series(_labels, index=labels.index)
+            elif isinstance(labels, np.array):
+                labels = np.array(labels)
+        return labels
+
+    def load_data_from_openml(self, valid_size=None, verbose=False):
+        """Fetches data from OpenML and initializes the train-validation-test data splits
+
+        The validation set is fixed till this function is called again or explicitly altered
+        """
+        # fetches task
+        self.task = openml.tasks.get_task(self.task_id, download_data=False)
+        # fetches dataset
+        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
+        if verbose:
+            print(self.task, '\n')
+            print(self.dataset, '\n')
+
+        # loads full data
+        X, y, categorical_ind, feature_names = self.dataset.get_data(
+            target=self.task.target_name, dataset_format="dataframe"
+        )
+        categorical_ind = np.array(categorical_ind)
+        (cat_idx,) = np.where(categorical_ind)
+        (cont_idx,) = np.where(~categorical_ind)
+
+        # splitting dataset into train and test (10% test)
+        # train-test split is fixed for a task and its associated dataset
+        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
+        train_x = X.iloc[self.train_idx]
+        train_y = y.iloc[self.train_idx]
+        self.test_X = X.iloc[self.test_idx]
+        self.test_y = y.iloc[self.test_idx]
+
+        # splitting training into training and validation
+        # validation set is fixed till this function is called again or explicitly altered
+        valid_size = self.valid_size if valid_size is None else valid_size
+        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
+            train_x, train_y, test_size=valid_size,
+            shuffle=True, stratify=train_y, random_state=self.rng
+        )
+
+        # preprocessor to handle missing values, categorical columns encodings,
+        # and scaling numeric columns
+        self.preprocessor = make_pipeline(
+            ColumnTransformer([
+                (
+                    "cat",
+                    make_pipeline(SimpleImputer(strategy="most_frequent"),
+                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
+                    cat_idx.tolist(),
+                ),
+                (
+                    "cont",
+                    make_pipeline(SimpleImputer(strategy="median"),
+                                  StandardScaler()),
+                    cont_idx.tolist(),
+                )
+            ])
+        )
+        if verbose:
+            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
+
+        # preprocessor fit only on the training set
+        self.train_X = self.preprocessor.fit_transform(self.train_X)
+        # applying preprocessor built on the training set, across validation and test splits
+        self.valid_X = self.preprocessor.transform(self.valid_X)
+        self.test_X = self.preprocessor.transform(self.test_X)
+        # converting boolean labels to strings
+        self.train_y = self._convert_labels(self.train_y)
+        self.valid_y = self._convert_labels(self.valid_y)
+        self.test_y = self._convert_labels(self.test_y)
+
+        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
+        # use 10 times the number of classes as lower bound for the dataset fraction
+        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+
+        if verbose:
+            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
+
+        if verbose:
+            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
+            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
+            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
+            print("\nData loading complete!\n")
+        return
+
+    def shuffle_data_idx(self, train_id=None, ng=None):
+        rng = self.rng if rng is None else rng
+        train_idx = self.train_idx if train_idx is None else train_idx
+        rng.shuffle(train_idx)
+        return train_idx
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
+        """
+        raise NotImplementedError()
+
+    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+        start = time.time()
+
+        # initializing model
+        model = self.init_model(config, fidelity, rng)
+
+        # preparing data
+        if eval == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+            train_idx = self.train_idx
+        else:
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+            train_idx = np.arange(len(train_X))
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here:
+        # application of the other fidelity to the dataset that the model interfaces
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                fidelity['subsample'] * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        model.fit(train_X[train_idx], train_y.iloc[train_idx])
+        # computing statistics on training data
+        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
+
+        model_fit_time = time.time() - start
+        return model, model_fit_time, train_loss
+
+    def objective(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    def objective_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng, eval="test"
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        return dict()
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        return dict()
+
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        pass
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 35684c00..be08b938 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -17,11 +17,10 @@
 
 import hpobench.util.rng_helper as rng_helper
 from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
-class RandomForestBenchmark(AbstractBenchmark):
-    _issue_tasks = [3917, 3945]
-
+class RandomForestBenchmark(Benchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
@@ -30,34 +29,10 @@ def __init__(
             fidelity_choice: int = 1,
             benchmark_type: str = "raw"
     ):
-        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
-        self.rng = check_random_state(self.seed)
-        super(RandomForestBenchmark, self).__init__(rng=seed)
-
-        self.benchmark_type = benchmark_type
-        self.task_id = task_id
-        self.valid_size = valid_size
-        self.accuracy_scorer = make_scorer(accuracy_score)
-
-        # Data variables
-        self.train_X = None
-        self.valid_X = None
-        self.test_X = None
-        self.train_y = None
-        self.valid_y = None
-        self.test_y = None
-        self.train_idx = None
-        self.test_idx = None
-        self.task = None
-        self.dataset = None
-        self.preprocessor = None
-        self.lower_bound_train_size = None
-        self.load_data_from_openml()
-
-        # Observation and fidelity spaces
-        self.fidelity_choice = fidelity_choice
-        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
-        self.x_cs = self.get_configuration_space(self.seed)
+        super(RandomForestBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, benchmark_type
+        )
+        pass
 
     @staticmethod
     def get_configuration_space(seed=None):
@@ -123,269 +98,14 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs
 
-    def get_config(self, size=None):
-        """Samples configuration(s) from the (hyper) parameter space
-        """
-        if size is None:  # return only one config
-            return self.x_cs.sample_configuration()
-        return [self.x_cs.sample_configuration() for i in range(size)]
-
-    def get_fidelity(self, size=None):
-        """Samples candidate fidelities from the fidelity space
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
         """
-        if size is None:  # return only one config
-            return self.z_cs.sample_configuration()
-        return [self.z_cs.sample_configuration() for i in range(size)]
-
-    def _convert_labels(self, labels):
-        """Converts boolean labels (if exists) to strings
-        """
-        label_types = list(map(lambda x: isinstance(x, bool), labels))
-        if np.all(label_types):
-            _labels = list(map(lambda x: str(x), labels))
-            if isinstance(labels, pd.Series):
-                labels = pd.Series(_labels, index=labels.index)
-            elif isinstance(labels, np.array):
-                labels = np.array(labels)
-        return labels
-
-    def load_data_from_openml(self, valid_size=None, verbose=False):
-        """Fetches data from OpenML and initializes the train-validation-test data splits
-
-        The validation set is fixed till this function is called again or explicitly altered
-        """
-        # fetches task
-        self.task = openml.tasks.get_task(self.task_id, download_data=False)
-        # fetches dataset
-        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
-        if verbose:
-            print(self.task, '\n')
-            print(self.dataset, '\n')
-
-        # loads full data
-        X, y, categorical_ind, feature_names = self.dataset.get_data(
-            target=self.task.target_name, dataset_format="dataframe"
-        )
-        categorical_ind = np.array(categorical_ind)
-        (cat_idx,) = np.where(categorical_ind)
-        (cont_idx,) = np.where(~categorical_ind)
-
-        # splitting dataset into train and test (10% test)
-        # train-test split is fixed for a task and its associated dataset
-        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
-        train_x = X.iloc[self.train_idx]
-        train_y = y.iloc[self.train_idx]
-        self.test_X = X.iloc[self.test_idx]
-        self.test_y = y.iloc[self.test_idx]
-
-        # splitting training into training and validation
-        # validation set is fixed till this function is called again or explicitly altered
-        valid_size = self.valid_size if valid_size is None else valid_size
-        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
-            train_x, train_y, test_size=valid_size,
-            shuffle=True, stratify=train_y, random_state=self.rng
-        )
-
-        # preprocessor to handle missing values, categorical columns encodings,
-        # and scaling numeric columns
-        self.preprocessor = make_pipeline(
-            ColumnTransformer([
-                (
-                    "cat",
-                    make_pipeline(SimpleImputer(strategy="most_frequent"),
-                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
-                    cat_idx.tolist(),
-                ),
-                (
-                    "cont",
-                    make_pipeline(SimpleImputer(strategy="median"),
-                                  StandardScaler()),
-                    cont_idx.tolist(),
-                )
-            ])
-        )
-        if verbose:
-            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
-
-        # preprocessor fit only on the training set
-        self.train_X = self.preprocessor.fit_transform(self.train_X)
-        # applying preprocessor built on the training set, across validation and test splits
-        self.valid_X = self.preprocessor.transform(self.valid_X)
-        self.test_X = self.preprocessor.transform(self.test_X)
-        # converting boolean labels to strings
-        self.train_y = self._convert_labels(self.train_y)
-        self.valid_y = self._convert_labels(self.valid_y)
-        self.test_y = self._convert_labels(self.test_y)
-
-        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
-        # use 10 times the number of classes as lower bound for the dataset fraction
-        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
-
-        if verbose:
-            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
-
-        if verbose:
-            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
-            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
-            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
-            print("\nData loading complete!\n")
-        return
-
-    def shuffle_data_idx(self, train_id=None, ng=None):
         rng = self.rng if rng is None else rng
-        train_idx = self.train_idx if train_idx is None else train_idx
-        rng.shuffle(train_idx)
-        return train_idx
-
-    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
-        start = time.time()
-
-        # initializing model
         model = RandomForestClassifier(
             **config.get_dictionary(),
             n_estimators=fidelity['n_estimators'],  # a fidelity being used during initialization
             bootstrap=True,
-            random_state=self.rng
-        )
-
-        # preparing data
-        if eval == "valid":
-            train_X = self.train_X
-            train_y = self.train_y
-            train_idx = self.train_idx
-        else:
-            train_X = np.vstack((self.train_X, self.valid_X))
-            train_y = pd.concat((self.train_y, self.valid_y))
-            train_idx = np.arange(len(train_X))
-
-        # shuffling data
-        if shuffle:
-            train_idx = self.shuffle_data_idx(train_idx, rng)
-            train_X = train_X.iloc[train_idx]
-            train_y = train_y.iloc[train_idx]
-
-        # subsample here
-        # application of the other fidelity to the dataset that the model interfaces
-        train_idx = self.rng.choice(
-            np.arange(len(train_X)), size=int(
-                fidelity['subsample'] * len(train_X)
-            )
+            random_state=rng
         )
-        # fitting the model with subsampled data
-        model.fit(train_X[train_idx], train_y.iloc[train_idx])
-        # computing statistics on training data
-        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
-
-        model_fit_time = time.time() - start
-        return model, model_fit_time, train_loss
-
-    def objective(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
-                configuration, fidelity, shuffle, rng
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
-        eval_time = time.time() - start
-
-        info = {
-            'train_loss': train_loss,
-            'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
-        }
-
-        return {
-            'function_value': info['val_loss'],
-            'cost': info['cost'],
-            'info': info
-        }
-
-    def objective_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
-                configuration, fidelity, shuffle, rng, eval="test"
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
-        eval_time = time.time() - start
-
-        info = {
-            'train_loss': train_loss,
-            'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
-        }
-
-        return {
-            'function_value': info['val_loss'],
-            'cost': info['cost'],
-            'info': info
-        }
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        return dict()
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        return dict()
-
-    def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
-        pass
+        return model
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 6e8ec6c9..13076040 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -17,11 +17,10 @@
 
 import hpobench.util.rng_helper as rng_helper
 from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
-class SVMBenchmark(AbstractBenchmark):
-    _issue_tasks = [3917, 3945]
-
+class SVMBenchmark(Benchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
@@ -30,50 +29,33 @@ def __init__(
             fidelity_choice: int = 1,
             benchmark_type: str = "raw"
     ):
-        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
-        self.rng = check_random_state(self.seed)
-        super(SVMBenchmark, self).__init__(rng=seed)
-
-        self.benchmark_type = benchmark_type
-        self.task_id = task_id
-        self.valid_size = valid_size
-        self.accuracy_scorer = make_scorer(accuracy_score)
-        #TODO: check the cache_size parameter from sklearn docs
+        super(SVMBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, benchmark_type
+        )
         self.cache_size = 200
 
-        # Data variables
-        self.train_X = None
-        self.valid_X = None
-        self.test_X = None
-        self.train_y = None
-        self.valid_y = None
-        self.test_y = None
-        self.train_idx = None
-        self.test_idx = None
-        self.task = None
-        self.dataset = None
-        self.preprocessor = None
-        self.lower_bound_train_size = None
-        self.load_data_from_openml()
-
-        # Observation and fidelity spaces
-        self.fidelity_choice = fidelity_choice
-        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
-        self.x_cs = self.get_configuration_space(self.seed)
-
     @staticmethod
     def get_configuration_space(seed=None):
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
 
+        # cs.add_hyperparameters([
+        #     CS.UniformFloatHyperparameter(
+        #         'C', lower=-10., upper=10., default_value=0., log=False
+        #     ),
+        #     CS.UniformFloatHyperparameter(
+        #         'gamma', lower=-10., upper=10., default_value=1., log=False
+        #     ),
+        # ])
+        # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
         cs.add_hyperparameters([
             CS.UniformFloatHyperparameter(
-                'C', lower=-10., upper=10., default_value=0., log=False
+                "C", 0.03125, 32768, log=True, default_value=1.0
             ),
             CS.UniformFloatHyperparameter(
-                'gamma', lower=-10., upper=10., default_value=1., log=False
-            ),
+                "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
+            )
         ])
         return cs
 
@@ -99,273 +81,14 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
         z_cs.add_hyperparameter(subsample)
         return z_cs
 
-    def get_config(self, size=None):
-        """Samples configuration(s) from the (hyper) parameter space
-        """
-        if size is None:  # return only one config
-            return self.x_cs.sample_configuration()
-        return [self.x_cs.sample_configuration() for i in range(size)]
-
-    def get_fidelity(self, size=None):
-        """Samples candidate fidelities from the fidelity space
-        """
-        if size is None:  # return only one config
-            return self.z_cs.sample_configuration()
-        return [self.z_cs.sample_configuration() for i in range(size)]
-
-    def _convert_labels(self, labels):
-        """Converts boolean labels (if exists) to strings
-        """
-        label_types = list(map(lambda x: isinstance(x, bool), labels))
-        if np.all(label_types):
-            _labels = list(map(lambda x: str(x), labels))
-            if isinstance(labels, pd.Series):
-                labels = pd.Series(_labels, index=labels.index)
-            elif isinstance(labels, np.array):
-                labels = np.array(labels)
-        return labels
-
-    def load_data_from_openml(self, valid_size=None, verbose=False):
-        """Fetches data from OpenML and initializes the train-validation-test data splits
-
-        The validation set is fixed till this function is called again or explicitly altered
-        """
-        # fetches task
-        self.task = openml.tasks.get_task(self.task_id, download_data=False)
-        # fetches dataset
-        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
-        if verbose:
-            print(self.task, '\n')
-            print(self.dataset, '\n')
-
-        # loads full data
-        X, y, categorical_ind, feature_names = self.dataset.get_data(
-            target=self.task.target_name, dataset_format="dataframe"
-        )
-        categorical_ind = np.array(categorical_ind)
-        (cat_idx,) = np.where(categorical_ind)
-        (cont_idx,) = np.where(~categorical_ind)
-
-        # splitting dataset into train and test (10% test)
-        # train-test split is fixed for a task and its associated dataset
-        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
-        train_x = X.iloc[self.train_idx]
-        train_y = y.iloc[self.train_idx]
-        self.test_X = X.iloc[self.test_idx]
-        self.test_y = y.iloc[self.test_idx]
-
-        # splitting training into training and validation
-        # validation set is fixed till this function is called again or explicitly altered
-        valid_size = self.valid_size if valid_size is None else valid_size
-        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
-            train_x, train_y, test_size=valid_size,
-            shuffle=True, stratify=train_y, random_state=self.rng
-        )
-
-        # preprocessor to handle missing values, categorical columns encodings,
-        # and scaling numeric columns
-        self.preprocessor = make_pipeline(
-            ColumnTransformer([
-                (
-                    "cat",
-                    make_pipeline(SimpleImputer(strategy="most_frequent"),
-                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
-                    cat_idx.tolist(),
-                ),
-                (
-                    "cont",
-                    make_pipeline(SimpleImputer(strategy="median"),
-                                  StandardScaler()),
-                    cont_idx.tolist(),
-                )
-            ])
-        )
-        if verbose:
-            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
-
-        # preprocessor fit only on the training set
-        self.train_X = self.preprocessor.fit_transform(self.train_X)
-        # applying preprocessor built on the training set, across validation and test splits
-        self.valid_X = self.preprocessor.transform(self.valid_X)
-        self.test_X = self.preprocessor.transform(self.test_X)
-        # converting boolean labels to strings
-        self.train_y = self._convert_labels(self.train_y)
-        self.valid_y = self._convert_labels(self.valid_y)
-        self.test_y = self._convert_labels(self.test_y)
-
-        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
-        # use 10 times the number of classes as lower bound for the dataset fraction
-        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
-
-        if verbose:
-            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
-
-        if verbose:
-            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
-            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
-            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
-            print("\nData loading complete!\n")
-        return
-
-    def shuffle_data_idx(self, train_id=None, ng=None):
-        rng = self.rng if rng is None else rng
-        train_idx = self.train_idx if train_idx is None else train_idx
-        rng.shuffle(train_idx)
-        return train_idx
-
-    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
-        start = time.time()
-
+    def init_model(self, config, fidelity=None, rng=None):
         # initializing model
         rng = self.rng if rng is None else rng
         config = config.get_dictionary()
-        for k, v in config.items():
-            config[k] = np.exp(float(v))
         model = SVC(
             **config,
             random_state=rng,
             cache_size=self.cache_size
 
         )
-
-        # preparing data
-        if eval == "valid":
-            train_X = self.train_X
-            train_y = self.train_y
-            train_idx = self.train_idx
-        else:
-            train_X = np.vstack((self.train_X, self.valid_X))
-            train_y = pd.concat((self.train_y, self.valid_y))
-            train_idx = np.arange(len(train_X))
-
-        # shuffling data
-        if shuffle:
-            train_idx = self.shuffle_data_idx(train_idx, rng)
-            train_X = train_X.iloc[train_idx]
-            train_y = train_y.iloc[train_idx]
-
-        # subsample here
-        # application of the other fidelity to the dataset that the model interfaces
-        train_idx = self.rng.choice(
-            np.arange(len(train_X)), size=int(
-                fidelity['subsample'] * len(train_X)
-            )
-        )
-        # fitting the model with subsampled data
-        model.fit(train_X[train_idx], train_y.iloc[train_idx])
-        # computing statistics on training data
-        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
-
-        model_fit_time = time.time() - start
-        return model, model_fit_time, train_loss
-
-    def objective(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
-                configuration, fidelity, shuffle, rng
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
-        eval_time = time.time() - start
-
-        info = {
-            'train_loss': train_loss,
-            'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
-        }
-
-        return {
-            'function_value': info['val_loss'],
-            'cost': info['cost'],
-            'info': info
-        }
-
-    def objective_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
-                configuration, fidelity, shuffle, rng, eval="test"
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
-        eval_time = time.time() - start
-
-        info = {
-            'train_loss': train_loss,
-            'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
-        }
-
-        return {
-            'function_value': info['val_loss'],
-            'cost': info['cost'],
-            'info': info
-        }
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        return dict()
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        return dict()
-
-    def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
-        pass
+        return model

From 61b6963ba0a7ed67a36e22941d64c01dd72f4d46 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 24 Jun 2021 18:19:33 +0200
Subject: [PATCH 04/95] Adding placeholder for HistGradientBoostedClassifier

---
 hpobench/benchmarks/ml/histgb_benchmark.py | 125 +++++++++++++++++++++
 hpobench/benchmarks/ml/rf_benchmark.py     |   5 +-
 2 files changed, 127 insertions(+), 3 deletions(-)
 create mode 100644 hpobench/benchmarks/ml/histgb_benchmark.py

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
new file mode 100644
index 00000000..11e7af4a
--- /dev/null
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -0,0 +1,125 @@
+import time
+import openml
+import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from copy import deepcopy
+from typing import Union, Dict
+
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, make_scorer
+
+# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingClassifier
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
+
+
+class HistGBBenchmark(Benchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            benchmark_type: str = "raw"
+    ):
+        super(HistGBBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, benchmark_type
+        )
+        pass
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformIntegerHyperparameter(
+                'max_depth', lower=1, upper=15, default_value=2, log=False
+            ),
+            CS.UniformIntegerHyperparameter(
+                'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'learning_rate', lower=1e-5, upper=1e-1, default_value=0.1, log=False
+            ),
+            #TODO: find best way to encode l2 reg. since log params cannot have 0 as exact bound
+            # scales the regularization parameter by using it as a power of 10
+            # such that the range of the parameter becomes {0, 1e-7, 1e-6, ..., 1e-1}
+            # where 10 ** 0 is enforced to be 0 (no regularization)
+            CS.UniformIntegerHyperparameter(
+                'l2_regularization', lower=-7, upper=0, default_value=0, log=False
+            )  # value of 1 indicates 0 regularization
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+        if fidelity_choice == 0:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 1:
+            # only n_estimators as fidelity
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 2:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+            )
+        else:
+            # both n_estimators and subsample as fidelities
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+            )
+        z_cs.add_hyperparameters([ntrees, subsample])
+        return z_cs
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
+        """
+        rng = self.rng if rng is None else rng
+        config = deepcopy(config).get_dictionary()
+        l2 = config.pop("l2_regularization")
+        l2 = 0 if l2 == 1 else 10 ** l2
+        # TODO: decide on encoding of learning rate
+        #TODO: allow non-encoded categoricals?
+        #TODO: early stopping set to False?
+        model = HistGradientBoostingClassifier(
+            **config,
+            l2_regularization=l2,
+            max_iter=fidelity['n_estimators'],  # a fidelity being used during initialization
+            early_stopping=False,
+            random_state=rng
+        )
+        return model
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index be08b938..960b8271 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -16,7 +16,6 @@
 from sklearn.metrics import accuracy_score, make_scorer
 
 import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
@@ -85,7 +84,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             # only subsample as fidelity
             ntrees = CS.Constant('n_estimators', value=100)
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
             )
         else:
             # both n_estimators and subsample as fidelities
@@ -93,7 +92,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
                 'n_estimators', lower=2, upper=100, default_value=10, log=False
             )
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
             )
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs

From a5d0217b258edff81b3b8082807c610e621ebbe6 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 24 Jun 2021 18:21:34 +0200
Subject: [PATCH 05/95] Minor code cleaning

---
 hpobench/benchmarks/ml/svm_benchmark_2.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 13076040..ec174748 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -16,7 +16,6 @@
 from sklearn.metrics import accuracy_score, make_scorer
 
 import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 

From 3def203e10a499a0e5fe2ab90643e5e28a739826 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Sat, 26 Jun 2021 17:44:58 +0200
Subject: [PATCH 06/95] Reformatting output dict + option to add more metrics

---
 hpobench/benchmarks/ml/histgb_benchmark.py    |  3 +-
 .../benchmarks/ml/ml_benchmark_template.py    | 47 ++++++++++++++++---
 hpobench/benchmarks/ml/svm_benchmark_2.py     |  1 -
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 11e7af4a..769838ae 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -50,8 +50,9 @@ def get_configuration_space(seed=None):
             CS.UniformIntegerHyperparameter(
                 'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
             ),
+            #TODO: fix lr value range error in map_to_config()
             CS.UniformFloatHyperparameter(
-                'learning_rate', lower=1e-5, upper=1e-1, default_value=0.1, log=False
+                'learning_rate', lower=1e-5, upper=1e-1, default_value=0.1, log=True
             ),
             #TODO: find best way to encode l2 reg. since log params cannot have 0 as exact bound
             # scales the regularization parameter by using it as a power of 10
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 0891f0fe..2b95c097 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -12,12 +12,31 @@
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score, make_scorer
+from sklearn.metrics import make_scorer, roc_auc_score, accuracy_score, f1_score, \
+    top_k_accuracy_score, balanced_accuracy_score
 
 import hpobench.util.rng_helper as rng_helper
 from hpobench.abstract_benchmark import AbstractBenchmark
 
 
+metrics = dict(
+    #TODO: decide on metrics generalized for different datasets
+    acc=accuracy_score,
+    bal_acc=balanced_accuracy_score,
+    f1=f1_score,
+    # roc=roc_auc_score,
+    # topk=top_k_accuracy_score
+)
+metrics_kwargs = dict(
+    #TODO: decide on metric parameters
+    acc=dict(),
+    bal_acc=dict(),
+    f1=dict(average="weighted"),
+    # roc=dict(average="weighted"),
+    # topk=dict()
+)
+
+
 class Benchmark(AbstractBenchmark):
     _issue_tasks = [3917, 3945]
 
@@ -36,7 +55,10 @@ def __init__(
         self.benchmark_type = benchmark_type
         self.task_id = task_id
         self.valid_size = valid_size
-        self.accuracy_scorer = make_scorer(accuracy_score)
+        self.scorers = dict()
+        for k, v in metrics.items():
+            self.scorers[k] = make_scorer(v, **metrics_kwargs[k])
+        # self.scorers = make_scorer(accuracy_score)
 
         # Data variables
         self.train_X = None
@@ -231,7 +253,10 @@ def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
         # fitting the model with subsampled data
         model.fit(train_X[train_idx], train_y.iloc[train_idx])
         # computing statistics on training data
-        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
+        scores = dict()
+        for k, v in self.scorers.items():
+            scores[k] = v(model, train_X, train_y)
+        train_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, train_X, train_y)
 
         model_fit_time = time.time() - start
         return model, model_fit_time, train_loss
@@ -255,7 +280,10 @@ def objective(
             pass
 
         start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
+        scores = dict()
+        for k, v in self.scorers.items():
+            scores[k] = v(model, self.valid_X, self.valid_y)
+        val_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
         eval_time = time.time() - start
 
         info = {
@@ -264,6 +292,7 @@ def objective(
             'cost': model_fit_time + eval_time,
             'training_cost': model_fit_time,
             'evaluation_cost': eval_time,
+            'scores': scores,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -294,22 +323,26 @@ def objective_test(
             pass
 
         start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
+        scores = dict()
+        for k, v in self.scorers.items():
+            scores[k] = v(model, self.test_X, self.test_y)
+        test_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.test_X, self.test_y)
         eval_time = time.time() - start
 
         info = {
             'train_loss': train_loss,
-            'val_loss': val_loss,
+            'test_loss': test_loss,
             'cost': model_fit_time + eval_time,
             'training_cost': model_fit_time,
             'evaluation_cost': eval_time,
+            'scores': scores,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
         }
 
         return {
-            'function_value': info['val_loss'],
+            'function_value': info['test_loss'],
             'cost': info['cost'],
             'info': info
         }
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index ec174748..62da5bbc 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -88,6 +88,5 @@ def init_model(self, config, fidelity=None, rng=None):
             **config,
             random_state=rng,
             cache_size=self.cache_size
-
         )
         return model

From 750cc7d1138ba9dd6f91ba9023565a3720093f8d Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 28 Jun 2021 15:46:40 +0200
Subject: [PATCH 07/95] Removing redundant import

---
 hpobench/benchmarks/ml/histgb_benchmark.py      | 1 -
 hpobench/benchmarks/ml/ml_benchmark_template.py | 1 -
 hpobench/benchmarks/ml/rf_benchmark.py          | 1 -
 hpobench/benchmarks/ml/svm_benchmark_2.py       | 1 -
 4 files changed, 4 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 769838ae..0a0461a3 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -19,7 +19,6 @@
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.ensemble import HistGradientBoostingClassifier
 
-import hpobench.util.rng_helper as rng_helper
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 2b95c097..55772ffc 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -15,7 +15,6 @@
 from sklearn.metrics import make_scorer, roc_auc_score, accuracy_score, f1_score, \
     top_k_accuracy_score, balanced_accuracy_score
 
-import hpobench.util.rng_helper as rng_helper
 from hpobench.abstract_benchmark import AbstractBenchmark
 
 
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 960b8271..96e3f48c 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -15,7 +15,6 @@
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, make_scorer
 
-import hpobench.util.rng_helper as rng_helper
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 62da5bbc..2747f380 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -15,7 +15,6 @@
 from sklearn.pipeline import make_pipeline, Pipeline
 from sklearn.metrics import accuracy_score, make_scorer
 
-import hpobench.util.rng_helper as rng_helper
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 

From e7665e68fdab26e2f88d927ae87810d961232372 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 30 Jun 2021 18:23:55 +0200
Subject: [PATCH 08/95] Decoupling storage of costs for each metric

---
 .../benchmarks/ml/ml_benchmark_template.py    | 53 ++++++++++---------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 55772ffc..7692e447 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -221,8 +221,6 @@ def init_model(self, config, fidelity=None, rng=None):
         raise NotImplementedError()
 
     def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
-        start = time.time()
-
         # initializing model
         model = self.init_model(config, fidelity, rng)
 
@@ -250,15 +248,18 @@ def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
             )
         )
         # fitting the model with subsampled data
+        start = time.time()
         model.fit(train_X[train_idx], train_y.iloc[train_idx])
+        model_fit_time = time.time() - start
         # computing statistics on training data
         scores = dict()
+        score_cost = dict()
         for k, v in self.scorers.items():
+            _start = time.time()
             scores[k] = v(model, train_X, train_y)
+            score_cost[k] = time.time() - _start
         train_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, train_X, train_y)
-
-        model_fit_time = time.time() - start
-        return model, model_fit_time, train_loss
+        return model, model_fit_time, train_loss, scores, score_cost
 
     def objective(
             self,
@@ -271,27 +272,29 @@ def objective(
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
         """
         if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
+            model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
                 configuration, fidelity, shuffle, rng
             )
         else:
             #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
+            pass + info['train_costs']['acc']
 
-        start = time.time()
         scores = dict()
+        score_cost = dict()
         for k, v in self.scorers.items():
+            _start = time.time()
             scores[k] = v(model, self.valid_X, self.valid_y)
+            score_cost[k] = time.time() - _start
         val_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
-        eval_time = time.time() - start
 
         info = {
             'train_loss': train_loss,
             'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            'scores': scores,
+            'model_cost': model_fit_time,
+            'train_scores': train_scores,
+            'train_costs': train_score_cost,
+            'eval_scores': scores,
+            'eval_costs': score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -299,7 +302,7 @@ def objective(
 
         return {
             'function_value': info['val_loss'],
-            'cost': info['cost'],
+            'cost': model_fit_time + info['train_costs']['acc'] + info['eval_costs']['acc'],
             'info': info
         }
 
@@ -314,27 +317,29 @@ def objective_test(
         """Function that evaluates a 'config' on a 'fidelity' on the test set
         """
         if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
+            model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
                 configuration, fidelity, shuffle, rng, eval="test"
             )
         else:
             #TODO: add cases for `tabular` and `surrogate` benchmarks
             pass
 
-        start = time.time()
         scores = dict()
+        score_cost = dict()
         for k, v in self.scorers.items():
+            _start = time.time()
             scores[k] = v(model, self.test_X, self.test_y)
-        test_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.test_X, self.test_y)
-        eval_time = time.time() - start
+            score_cost[k] = time.time() - _start
+        test_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
 
         info = {
             'train_loss': train_loss,
-            'test_loss': test_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            'scores': scores,
+            'val_loss': test_loss,
+            'model_cost': model_fit_time,
+            'train_scores': train_scores,
+            'train_costs': train_score_cost,
+            'eval_scores': scores,
+            'eval_costs': score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -342,7 +347,7 @@ def objective_test(
 
         return {
             'function_value': info['test_loss'],
-            'cost': info['cost'],
+            'cost': model_fit_time + info['train_costs']['acc'] + info['eval_costs']['acc'],
             'info': info
         }
 

From 47fe4cdd6e466589449427cbe6a91a7da28479d0 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 1 Jul 2021 16:28:27 +0200
Subject: [PATCH 09/95] Including test scores in objective

---
 hpobench/benchmarks/ml/histgb_benchmark.py    |  7 +-
 .../benchmarks/ml/ml_benchmark_template.py    | 68 +++++++++----------
 hpobench/benchmarks/ml/rf_benchmark.py        |  7 +-
 hpobench/benchmarks/ml/svm_benchmark_2.py     | 16 +----
 4 files changed, 39 insertions(+), 59 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 0a0461a3..ac273c57 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -28,12 +28,9 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            benchmark_type: str = "raw"
+            fidelity_choice: int = 1
     ):
-        super(HistGBBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, benchmark_type
-        )
+        super(HistGBBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
         pass
 
     @staticmethod
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 7692e447..cc543b50 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -44,14 +44,12 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            benchmark_type: str = "raw"
+            fidelity_choice: int = 1
     ):
         self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
         self.rng = check_random_state(self.seed)
         super(Benchmark, self).__init__(rng=seed)
 
-        self.benchmark_type = benchmark_type
         self.task_id = task_id
         self.valid_size = valid_size
         self.scorers = dict()
@@ -258,7 +256,7 @@ def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
             _start = time.time()
             scores[k] = v(model, train_X, train_y)
             score_cost[k] = time.time() - _start
-        train_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, train_X, train_y)
+        train_loss = 1 - scores["acc"]
         return model, model_fit_time, train_loss, scores, score_cost
 
     def objective(
@@ -271,21 +269,24 @@ def objective(
     ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
         """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
-                configuration, fidelity, shuffle, rng
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass + info['train_costs']['acc']
+        model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
+            configuration, fidelity, shuffle, rng
+        )
+        val_scores = dict()
+        val_score_cost = dict()
+        for k, v in self.scorers.items():
+            _start = time.time()
+            val_scores[k] = v(model, self.valid_X, self.valid_y)
+            val_score_cost[k] = time.time() - _start
+        val_loss = 1 - val_scores["acc"]
 
-        scores = dict()
-        score_cost = dict()
+        test_scores = dict()
+        test_score_cost = dict()
         for k, v in self.scorers.items():
             _start = time.time()
-            scores[k] = v(model, self.valid_X, self.valid_y)
-            score_cost[k] = time.time() - _start
-        val_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
+            test_scores[k] = v(model, self.test_X, self.test_y)
+            test_score_cost[k] = time.time() - _start
+        val_loss = 1 - test_scores["acc"]
 
         info = {
             'train_loss': train_loss,
@@ -293,8 +294,10 @@ def objective(
             'model_cost': model_fit_time,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
-            'eval_scores': scores,
-            'eval_costs': score_cost,
+            'val_scores': val_scores,
+            'val_costs': val_score_cost,
+            'test_scores': test_scores,
+            'test_costs': test_score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -302,7 +305,7 @@ def objective(
 
         return {
             'function_value': info['val_loss'],
-            'cost': model_fit_time + info['train_costs']['acc'] + info['eval_costs']['acc'],
+            'cost': model_fit_time + info['train_costs']['acc'] + info['val_costs']['acc'],
             'info': info
         }
 
@@ -316,21 +319,16 @@ def objective_test(
     ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the test set
         """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
-                configuration, fidelity, shuffle, rng, eval="test"
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        scores = dict()
-        score_cost = dict()
+        model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
+            configuration, fidelity, shuffle, rng, eval="test"
+        )
+        test_scores = dict()
+        test_score_cost = dict()
         for k, v in self.scorers.items():
             _start = time.time()
-            scores[k] = v(model, self.test_X, self.test_y)
-            score_cost[k] = time.time() - _start
-        test_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
+            test_scores[k] = v(model, self.test_X, self.test_y)
+            test_score_cost[k] = time.time() - _start
+        test_loss = 1 - test_scores["acc"]
 
         info = {
             'train_loss': train_loss,
@@ -338,8 +336,8 @@ def objective_test(
             'model_cost': model_fit_time,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
-            'eval_scores': scores,
-            'eval_costs': score_cost,
+            'test_scores': test_scores,
+            'test_costs': test_score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -347,7 +345,7 @@ def objective_test(
 
         return {
             'function_value': info['test_loss'],
-            'cost': model_fit_time + info['train_costs']['acc'] + info['eval_costs']['acc'],
+            'cost': model_fit_time + info['train_costs']['acc'] + info['test_costs']['acc'],
             'info': info
         }
 
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 96e3f48c..7426a37a 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -24,12 +24,9 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            benchmark_type: str = "raw"
+            fidelity_choice: int = 1
     ):
-        super(RandomForestBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, benchmark_type
-        )
+        super(RandomForestBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
         pass
 
     @staticmethod
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 2747f380..12d22afa 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -24,12 +24,9 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            benchmark_type: str = "raw"
+            fidelity_choice: int = 1
     ):
-        super(SVMBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, benchmark_type
-        )
+        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
         self.cache_size = 200
 
     @staticmethod
@@ -37,15 +34,6 @@ def get_configuration_space(seed=None):
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
-
-        # cs.add_hyperparameters([
-        #     CS.UniformFloatHyperparameter(
-        #         'C', lower=-10., upper=10., default_value=0., log=False
-        #     ),
-        #     CS.UniformFloatHyperparameter(
-        #         'gamma', lower=-10., upper=10., default_value=1., log=False
-        #     ),
-        # ])
         # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
         cs.add_hyperparameters([
             CS.UniformFloatHyperparameter(

From 2d085ecd2d3fd083a3168b6bd861c06bbd8bfd32 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 1 Jul 2021 16:45:43 +0200
Subject: [PATCH 10/95] Documenting the structure of information in each fn
 eval.

---
 hpobench/benchmarks/ml/README.md | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 hpobench/benchmarks/ml/README.md

diff --git a/hpobench/benchmarks/ml/README.md b/hpobench/benchmarks/ml/README.md
new file mode 100644
index 00000000..46ad4e08
--- /dev/null
+++ b/hpobench/benchmarks/ml/README.md
@@ -0,0 +1,29 @@
+Each function evalution returns a dictionary with the following information:
+
+```
+└───function_value: 1 - accuracy (acc.) on validation set
+└───cost: time to fit model + time to evaluate acc. training set + time to evaluate acc. validation set
+└───info: dictionary (dict) with miscellaneous information
+|   └───train_loss: 1 - accuracy (acc.) on training set
+|   └───val_loss: 1 - accuracy (acc.) on validation set
+|   └───model_cost: time taken to fit the model
+|   └───train_scores: performance on all metrics over the training set (dict)
+|   |   └───f1: F1-score   
+|   |   └───acc: Accuracy
+|   |   └───bal_acc: Balanced accuracy
+|   └───train_costs: time taken to compute performance on all metrics over the training set (dict)
+|   |   └───f1: F1-score   
+|   |   └───acc: Accuracy
+|   |   └───bal_acc: Balanced accuracy 
+|   └───valid_scores: performance on all metrics over the validation set (dict)
+|   |   └───...
+|   └───valid_costs: time taken to compute performance on all metrics over the validation set (dict)
+|   |   └───...
+|   └───test_scores: performance on all metrics over the test set
+|   |   └───...
+|   └───test_costs: time taken to compute performance on all metrics over the test set (dict)
+|   |   └───...
+```
+
+*NOTE*: the keys `function_value`, `cost`, `info` need to exist when creating a new objective 
+function, while `info` can house any kind of auxilliary information required.
\ No newline at end of file

From 2da9d5c02a2e413f4ab78a81bf68a6bc32495f4e Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Fri, 2 Jul 2021 17:04:26 +0200
Subject: [PATCH 11/95] Some decisions on lower bound for subsample fidelity

---
 hpobench/benchmarks/ml/ml_benchmark_template.py |  5 +++--
 hpobench/benchmarks/ml/svm_benchmark_2.py       | 10 ++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index cc543b50..3ad61b54 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -194,8 +194,9 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
 
         # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
         # use 10 times the number of classes as lower bound for the dataset fraction
-        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
+        n_classes = len(self.task.class_labels)
         self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+        self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
 
         if verbose:
             print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
@@ -332,7 +333,7 @@ def objective_test(
 
         info = {
             'train_loss': train_loss,
-            'val_loss': test_loss,
+            'test_loss': test_loss,
             'model_cost': model_fit_time,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 12d22afa..845f40e0 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -45,8 +45,8 @@ def get_configuration_space(seed=None):
         ])
         return cs
 
-    @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=None):
+    @classmethod
+    def get_fidelity_space(cls, seed=None, fidelity_choice=None):
         """Fidelity space available --- specifies the fidelity dimensions
 
         For SVM, only a single fidelity exists, i.e., subsample fraction.
@@ -57,12 +57,14 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
 
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+
         if fidelity_choice == 0:
             subsample = CS.Constant('subsample', value=1)
         else:
+            # TODO: dynamically adapt based on 1/512 and lower_bound_train_size and set log=True
+            lower = 0.1
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+                'subsample', lower=lower, upper=1, default_value=0.33, log=False
             )
         z_cs.add_hyperparameter(subsample)
         return z_cs

From 751d2e91658f4c7efc0acdc9676545162d870f84 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Tue, 6 Jul 2021 20:20:27 +0200
Subject: [PATCH 12/95] AbstractBenchmark update for fidelity option +
 including XGBoost

---
 README.md                                     |   7 +-
 examples/local/xgboost_local.py               |   2 +-
 hpobench/abstract_benchmark.py                |   7 +-
 hpobench/benchmarks/ml/histgb_benchmark.py    |   4 +-
 .../benchmarks/ml/ml_benchmark_template.py    |  82 +--
 hpobench/benchmarks/ml/rf_benchmark.py        |   4 +-
 hpobench/benchmarks/ml/svm_benchmark.py       | 391 +++-----------
 hpobench/benchmarks/ml/svm_benchmark_2.py     |  81 ---
 hpobench/benchmarks/ml/svm_benchmark_old.py   | 350 ++++++++++++
 hpobench/benchmarks/ml/xgboost_benchmark.py   | 511 ++++--------------
 .../benchmarks/ml/xgboost_benchmark_old.py    | 426 +++++++++++++++
 tests/test_utils.py                           |   2 +-
 tests/test_whitebox.py                        |   2 +-
 13 files changed, 1004 insertions(+), 865 deletions(-)
 delete mode 100644 hpobench/benchmarks/ml/svm_benchmark_2.py
 create mode 100644 hpobench/benchmarks/ml/svm_benchmark_old.py
 create mode 100644 hpobench/benchmarks/ml/xgboost_benchmark_old.py

diff --git a/README.md b/README.md
index 001eb1f4..998f2ad2 100644
--- a/README.md
+++ b/README.md
@@ -35,11 +35,14 @@ Further requirements are: [ConfigSpace](https://github.com/automl/ConfigSpace),
  This can be arbitrarily complex and further information can be found in the docstring of the benchmark.
  
 A simple example is the XGBoost benchmark which can be installed with `pip install .[xgboost]`
+
 ```python
-from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark
+from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark
+
 b = XGBoostBenchmark(task_id=167149)
 config = b.get_configuration_space(seed=1).sample_configuration()
-result_dict = b.objective_function(configuration=config, fidelity={"n_estimators": 128, "dataset_fraction": 0.5}, rng=1)
+result_dict = b.objective_function(configuration=config,
+                                   fidelity={"n_estimators": 128, "dataset_fraction": 0.5}, rng=1)
 
 ```
 
diff --git a/examples/local/xgboost_local.py b/examples/local/xgboost_local.py
index 47c1f77f..4f3b3ad3 100644
--- a/examples/local/xgboost_local.py
+++ b/examples/local/xgboost_local.py
@@ -10,7 +10,7 @@
 import argparse
 from time import time
 
-from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
 from hpobench.util.openml_data_manager import get_openmlcc18_taskids
 
 
diff --git a/hpobench/abstract_benchmark.py b/hpobench/abstract_benchmark.py
index 5d7bc994..abbbcb22 100644
--- a/hpobench/abstract_benchmark.py
+++ b/hpobench/abstract_benchmark.py
@@ -226,12 +226,17 @@ def get_configuration_space(seed: Union[int, None] = None) -> ConfigSpace.Config
 
     @staticmethod
     @abc.abstractmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+    def get_fidelity_space(
+            seed: Union[int, None] = None, fidelity_choice: Union[int, None] = None
+    ) -> ConfigSpace.ConfigurationSpace:
         """ Defines the available fidelity parameters as a "fidelity space" for each benchmark.
         Parameters
         ----------
         seed: int, None
             Seed for the fidelity space.
+        fidelity_choice: int, None
+            integer value to choose the type of fidelity space
+
         Returns
         -------
         ConfigSpace.ConfigurationSpace
diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index ac273c57..21ed4ec0 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -19,10 +19,10 @@
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.ensemble import HistGradientBoostingClassifier
 
-from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
 
-class HistGBBenchmark(Benchmark):
+class HistGBBenchmark(MLBenchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 3ad61b54..e0ab59bc 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -36,7 +36,7 @@
 )
 
 
-class Benchmark(AbstractBenchmark):
+class MLBenchmark(AbstractBenchmark):
     _issue_tasks = [3917, 3945]
 
     def __init__(
@@ -48,7 +48,7 @@ def __init__(
     ):
         self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
         self.rng = check_random_state(self.seed)
-        super(Benchmark, self).__init__(rng=seed)
+        super(MLBenchmark, self).__init__(rng=seed)
 
         self.task_id = task_id
         self.valid_size = valid_size
@@ -84,7 +84,7 @@ def get_configuration_space(seed=None):
         raise NotImplementedError()
 
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=1):
+    def get_fidelity_space(seed=None, fidelity_choice=None):
         """Fidelity space available --- specifies the fidelity dimensions
 
         If fidelity_choice is 0
@@ -194,8 +194,8 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
 
         # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
         # use 10 times the number of classes as lower bound for the dataset fraction
-        n_classes = len(self.task.class_labels)
-        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+        self.n_classes = len(self.task.class_labels)
+        self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
         self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
 
         if verbose:
@@ -219,7 +219,7 @@ def init_model(self, config, fidelity=None, rng=None):
         """
         raise NotImplementedError()
 
-    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+    def _train_objective(self, config, fidelity, shuffle, rng, eval="valid"):
         # initializing model
         model = self.init_model(config, fidelity, rng)
 
@@ -260,7 +260,7 @@ def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
         train_loss = 1 - scores["acc"]
         return model, model_fit_time, train_loss, scores, score_cost
 
-    def objective(
+    def objective_function(
             self,
             configuration: Union[CS.Configuration, Dict],
             fidelity: Union[CS.Configuration, Dict, None] = None,
@@ -270,7 +270,7 @@ def objective(
     ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
         """
-        model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
+        model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
             configuration, fidelity, shuffle, rng
         )
         val_scores = dict()
@@ -310,7 +310,7 @@ def objective(
             'info': info
         }
 
-    def objective_test(
+    def objective_function_test(
             self,
             configuration: Union[CS.Configuration, Dict],
             fidelity: Union[CS.Configuration, Dict, None] = None,
@@ -320,7 +320,7 @@ def objective_test(
     ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the test set
         """
-        model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
+        model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
             configuration, fidelity, shuffle, rng, eval="test"
         )
         test_scores = dict()
@@ -350,34 +350,40 @@ def objective_test(
             'info': info
         }
 
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        return dict()
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        return dict()
+    # # pylint: disable=arguments-differ
+    # @AbstractBenchmark.check_parameters
+    # def objective_function(
+    #         self,
+    #         configuration: Union[CS.Configuration, Dict],
+    #         fidelity: Union[CS.Configuration, Dict, None] = None,
+    #         shuffle: bool = False,
+    #         rng: Union[np.random.RandomState, int, None] = None,
+    #         **kwargs
+    # ) -> Dict:
+    #     """Function that evaluates a 'config' on a 'fidelity' on the validation set
+    #     """
+    #     return dict()
+    #
+    # # pylint: disable=arguments-differ
+    # @AbstractBenchmark.check_parameters
+    # def objective_function_test(
+    #         self,
+    #         configuration: Union[CS.Configuration, Dict],
+    #         fidelity: Union[CS.Configuration, Dict, None] = None,
+    #         shuffle: bool = False,
+    #         rng: Union[np.random.RandomState, int, None] = None,
+    #         **kwargs
+    # ) -> Dict:
+    #     """Function that evaluates a 'config' on a 'fidelity' on the test set
+    #     """
+    #     return dict()
 
     def get_meta_information(self):
         """ Returns the meta information for the benchmark """
-        pass
+        return {'name': 'Support Vector Machine',
+                'shape of train data': self.x_train.shape,
+                'shape of test data': self.x_test.shape,
+                'shape of valid data': self.x_valid.shape,
+                'initial random seed': self.rng,
+                'task_id': self.task_id
+                }
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 7426a37a..b815e1bd 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -15,10 +15,10 @@
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, make_scorer
 
-from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
 
-class RandomForestBenchmark(Benchmark):
+class RandomForestBenchmark(MLBenchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index 0a765e45..1d0e2d00 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -1,350 +1,81 @@
-"""
-
-Changelog:
-==========
-0.0.2:
-* Standardize the structure of the meta information
-
-0.0.1:
-* First implementation
-
-"""
-
-import logging
 import time
-from typing import Union, Tuple, Dict, List
-
-import ConfigSpace as CS
+import openml
 import numpy as np
-from scipy import sparse
-from sklearn import pipeline
-from sklearn import svm
-from sklearn.compose import ColumnTransformer
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
+from sklearn.svm import SVC
 from sklearn.impute import SimpleImputer
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline, Pipeline
 from sklearn.metrics import accuracy_score, make_scorer
-from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
-
-import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
-
-__version__ = '0.0.2'
-
-logger = logging.getLogger('SVMBenchmark')
-
-
-class SupportVectorMachine(AbstractBenchmark):
-    """
-    Hyperparameter optimization task to optimize the regularization
-    parameter C and the kernel parameter gamma of a support vector machine.
-    Both hyperparameters are optimized on a log scale in [-10, 10].
-    The X_test data set is only used for a final offline evaluation of
-    a configuration. For that the validation and training data is
-    concatenated to form the whole training data set.
-    """
-
-    def __init__(self, task_id: Union[int, None] = None,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        """
-        Parameters
-        ----------
-        task_id : int, None
-        rng : np.random.RandomState, int, None
-        """
-        super(SupportVectorMachine, self).__init__(rng=rng)
-
-        self.task_id = task_id
-        self.cache_size = 200  # Cache for the SVC in MB
-        self.accuracy_scorer = make_scorer(accuracy_score)
-
-        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
-            self.get_data()
-        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
-
-        # Sort data (Categorical + numerical) so that categorical and continous are not mixed.
-        categorical_idx = np.argwhere(self.categorical_data)
-        continuous_idx = np.argwhere(~self.categorical_data)
-        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
-        self.categorical_data = self.categorical_data[sorting]
-        self.x_train = self.x_train[:, sorting]
-        self.x_valid = self.x_valid[:, sorting]
-        self.x_test = self.x_test[:, sorting]
-
-        nan_columns = np.all(np.isnan(self.x_train), axis=0)
-        self.categorical_data = self.categorical_data[~nan_columns]
-        self.x_train, self.x_valid, self.x_test, self.categories = \
-            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
-                                                                 is_categorical=self.categorical_data)
-
-        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
-                                         size=len(self.x_train),
-                                         replace=False)
-
-        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
-        # (https://arxiv.org/pdf/1605.07079.pdf),
-        # use 10 time the number of classes as lower bound for the dataset fraction
-        n_classes = np.unique(self.y_train).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
-
-    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
-        """ Loads the data given a task or another source. """
-
-        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
-                                                             'overwrite the get_data method.')
-
-        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
-        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
-
-        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
-
-    def shuffle_data(self, rng=None):
-        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
-        class-random-state"""
-        random_state = rng_helper.get_rng(rng, self.rng)
-        random_state.shuffle(self.train_idx)
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(self, configuration: Union[CS.Configuration, Dict],
-                           fidelity: Union[CS.Configuration, Dict, None] = None,
-                           shuffle: bool = False,
-                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a SVM model given a hyperparameter configuration and
-        evaluates the model on the validation set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the SVM model
-        fidelity: Dict, None
-            Fidelity parameters for the SVM model, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : validation loss
-            cost : time to train and evaluate the model
-            info : Dict
-                train_loss : training loss
-                fidelity : used fidelities in this evaluation
-        """
-        start_time = time.time()
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
 
-        # Split of dataset subset
-        if self.lower_bound_train_size > fidelity['dataset_fraction']:
-            train_size = self.lower_bound_train_size
-            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
-                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
-                           f'{self.lower_bound_train_size:.8f}')
-        else:
-            train_size = fidelity['dataset_fraction']
-
-        train_size = int(train_size * len(self.train_idx))
-        train_idx = self.train_idx[:train_size]
-
-        # Transform hyperparameters to linear scale
-        hp_c = np.exp(float(configuration['C']))
-        hp_gamma = np.exp(float(configuration['gamma']))
-
-        # Train support vector machine
-        model = self.get_pipeline(hp_c, hp_gamma)
-        model.fit(self.x_train[train_idx], self.y_train[train_idx])
-
-        # Compute validation error
-        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
-        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
-
-        cost = time.time() - start_time
-
-        return {'function_value': float(val_loss),
-                "cost": cost,
-                'info': {'train_loss': float(train_loss),
-                         'fidelity': fidelity}}
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
-                                fidelity: Union[CS.Configuration, Dict, None] = None,
-                                shuffle: bool = False,
-                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a SVM model with a given configuration on both the X_train
-        and validation data set and evaluates the model on the X_test data set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the SVM Model
-        fidelity: Dict, None
-            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : X_test loss
-            cost : time to X_train and evaluate the model
-            info : Dict
-                train_valid_loss: Loss on the train+valid data set
-                fidelity : used fidelities in this evaluation
-        """
-        assert np.isclose(fidelity['dataset_fraction'], 1), \
-            f'Data set fraction must be 1 but was {fidelity["dataset_fraction"]}'
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start_time = time.time()
-
-        # Concatenate training and validation dataset
-        if isinstance(self.x_train, sparse.csr.csr_matrix) or isinstance(self.x_valid, sparse.csr.csr_matrix):
-            data = sparse.vstack((self.x_train, self.x_valid))
-        else:
-            data = np.concatenate((self.x_train, self.x_valid))
-        targets = np.concatenate((self.y_train, self.y_valid))
-
-        # Transform hyperparameters to linear scale
-        hp_c = np.exp(float(configuration['C']))
-        hp_gamma = np.exp(float(configuration['gamma']))
-
-        model = self.get_pipeline(hp_c, hp_gamma)
-        model.fit(data, targets)
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
-        # Compute validation error
-        train_valid_loss = 1 - self.accuracy_scorer(model, data, targets)
 
-        # Compute test error
-        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
-
-        cost = time.time() - start_time
-
-        return {'function_value': float(test_loss),
-                "cost": cost,
-                'info': {'train_valid_loss': float(train_valid_loss),
-                         'fidelity': fidelity}}
-
-    def get_pipeline(self, C: float, gamma: float) -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-
-        model = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", MinMaxScaler(feature_range=(0, 1)), ~self.categorical_data)])),
-            ('svm',
-             svm.SVC(gamma=gamma, C=C, random_state=self.rng, cache_size=self.cache_size))
-        ])
-        return model
+class SVMBenchmark(MLBenchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1
+    ):
+        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
+        self.cache_size = 200
 
     @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """
-        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
-        the SVM Model
-
-        For a detailed explanation of the hyperparameters:
-        https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
         """
-
-        seed = seed if seed is not None else np.random.randint(1, 100000)
         cs = CS.ConfigurationSpace(seed=seed)
-
+        # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
         cs.add_hyperparameters([
-            CS.UniformFloatHyperparameter('C', lower=-10., upper=10., default_value=0., log=False),
-            CS.UniformFloatHyperparameter('gamma', lower=-10., upper=10., default_value=1., log=False),
+            CS.UniformFloatHyperparameter(
+                "C", 0.03125, 32768, log=True, default_value=1.0
+            ),
+            CS.UniformFloatHyperparameter(
+                "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
+            )
         ])
-        # cs.generate_all_continuous_from_bounds(SupportVectorMachine.get_meta_information()['bounds'])
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """
-        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
-        the SupportVector Benchmark
-
-        Fidelities
-        ----------
-        dataset_fraction: float - [0.1, 1]
-            fraction of training data set to use
+    def get_fidelity_space(seed=None, fidelity_choice=None):
+        """Fidelity space available --- specifies the fidelity dimensions
 
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
+        For SVM, only a single fidelity exists, i.e., subsample fraction.
+        if fidelity_choice == 0
+            uses the entire data (subsample=1), reflecting the black-box setup
+        else
+            parameterizes the fraction of data to subsample
 
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
         """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
-        fidel_space = CS.ConfigurationSpace(seed=seed)
+        z_cs = CS.ConfigurationSpace(seed=seed)
 
-        fidel_space.add_hyperparameters([
-            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
-        ])
-        return fidel_space
-
-    def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
-        return {'name': 'Support Vector Machine',
-                'references': ["@InProceedings{pmlr-v54-klein17a",
-                               "author = {Aaron Klein and Stefan Falkner and Simon Bartels and Philipp Hennig and "
-                               "Frank Hutter}, "
-                               "title = {{Fast Bayesian Optimization of Machine Learning Hyperparameters on "
-                               "Large Datasets}}"
-                               "pages = {528--536}, year = {2017},"
-                               "editor = {Aarti Singh and Jerry Zhu},"
-                               "volume = {54},"
-                               "series = {Proceedings of Machine Learning Research},"
-                               "address = {Fort Lauderdale, FL, USA},"
-                               "month = {20--22 Apr},"
-                               "publisher = {PMLR},"
-                               "pdf = {http://proceedings.mlr.press/v54/klein17a/klein17a.pdf}, "
-                               "url = {http://proceedings.mlr.press/v54/klein17a.html}, "
-                               ],
-                'code': 'https://github.com/automl/HPOlib1.5/blob/container/hpolib/benchmarks/ml/svm_benchmark.py',
-                'shape of train data': self.x_train.shape,
-                'shape of test data': self.x_test.shape,
-                'shape of valid data': self.x_valid.shape,
-                'initial random seed': self.rng,
-                'task_id': self.task_id
-                }
+        if fidelity_choice == 0:
+            subsample = CS.Constant('subsample', value=1)
+        else:
+            # TODO: dynamically adapt based on 1/512 and lower_bound_train_size and set log=True
+            lower = 0.1
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=lower, upper=1, default_value=0.33, log=False
+            )
+        z_cs.add_hyperparameter(subsample)
+        return z_cs
+
+    def init_model(self, config, fidelity=None, rng=None):
+        # initializing model
+        rng = self.rng if rng is None else rng
+        config = config.get_dictionary()
+        model = SVC(
+            **config,
+            random_state=rng,
+            cache_size=self.cache_size
+        )
+        return model
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
deleted file mode 100644
index 845f40e0..00000000
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import time
-import openml
-import numpy as np
-import pandas as pd
-import ConfigSpace as CS
-from typing import Union, Dict
-
-from sklearn.svm import SVC
-from sklearn.impute import SimpleImputer
-from sklearn.utils import check_random_state
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import train_test_split
-from sklearn.pipeline import make_pipeline, Pipeline
-from sklearn.metrics import accuracy_score, make_scorer
-
-from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
-
-
-class SVMBenchmark(Benchmark):
-    def __init__(
-            self,
-            task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
-            valid_size: float = 0.33,
-            fidelity_choice: int = 1
-    ):
-        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
-        self.cache_size = 200
-
-    @staticmethod
-    def get_configuration_space(seed=None):
-        """Parameter space to be optimized --- contains the hyperparameters
-        """
-        cs = CS.ConfigurationSpace(seed=seed)
-        # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
-        cs.add_hyperparameters([
-            CS.UniformFloatHyperparameter(
-                "C", 0.03125, 32768, log=True, default_value=1.0
-            ),
-            CS.UniformFloatHyperparameter(
-                "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
-            )
-        ])
-        return cs
-
-    @classmethod
-    def get_fidelity_space(cls, seed=None, fidelity_choice=None):
-        """Fidelity space available --- specifies the fidelity dimensions
-
-        For SVM, only a single fidelity exists, i.e., subsample fraction.
-        if fidelity_choice == 0
-            uses the entire data (subsample=1), reflecting the black-box setup
-        else
-            parameterizes the fraction of data to subsample
-
-        """
-        z_cs = CS.ConfigurationSpace(seed=seed)
-
-        if fidelity_choice == 0:
-            subsample = CS.Constant('subsample', value=1)
-        else:
-            # TODO: dynamically adapt based on 1/512 and lower_bound_train_size and set log=True
-            lower = 0.1
-            subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=lower, upper=1, default_value=0.33, log=False
-            )
-        z_cs.add_hyperparameter(subsample)
-        return z_cs
-
-    def init_model(self, config, fidelity=None, rng=None):
-        # initializing model
-        rng = self.rng if rng is None else rng
-        config = config.get_dictionary()
-        model = SVC(
-            **config,
-            random_state=rng,
-            cache_size=self.cache_size
-        )
-        return model
diff --git a/hpobench/benchmarks/ml/svm_benchmark_old.py b/hpobench/benchmarks/ml/svm_benchmark_old.py
new file mode 100644
index 00000000..0a765e45
--- /dev/null
+++ b/hpobench/benchmarks/ml/svm_benchmark_old.py
@@ -0,0 +1,350 @@
+"""
+
+Changelog:
+==========
+0.0.2:
+* Standardize the structure of the meta information
+
+0.0.1:
+* First implementation
+
+"""
+
+import logging
+import time
+from typing import Union, Tuple, Dict, List
+
+import ConfigSpace as CS
+import numpy as np
+from scipy import sparse
+from sklearn import pipeline
+from sklearn import svm
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.metrics import accuracy_score, make_scorer
+from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
+
+__version__ = '0.0.2'
+
+logger = logging.getLogger('SVMBenchmark')
+
+
+class SupportVectorMachine(AbstractBenchmark):
+    """
+    Hyperparameter optimization task to optimize the regularization
+    parameter C and the kernel parameter gamma of a support vector machine.
+    Both hyperparameters are optimized on a log scale in [-10, 10].
+    The X_test data set is only used for a final offline evaluation of
+    a configuration. For that the validation and training data is
+    concatenated to form the whole training data set.
+    """
+
+    def __init__(self, task_id: Union[int, None] = None,
+                 rng: Union[np.random.RandomState, int, None] = None):
+        """
+        Parameters
+        ----------
+        task_id : int, None
+        rng : np.random.RandomState, int, None
+        """
+        super(SupportVectorMachine, self).__init__(rng=rng)
+
+        self.task_id = task_id
+        self.cache_size = 200  # Cache for the SVC in MB
+        self.accuracy_scorer = make_scorer(accuracy_score)
+
+        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
+            self.get_data()
+        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
+
+        # Sort data (Categorical + numerical) so that categorical and continous are not mixed.
+        categorical_idx = np.argwhere(self.categorical_data)
+        continuous_idx = np.argwhere(~self.categorical_data)
+        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
+        self.categorical_data = self.categorical_data[sorting]
+        self.x_train = self.x_train[:, sorting]
+        self.x_valid = self.x_valid[:, sorting]
+        self.x_test = self.x_test[:, sorting]
+
+        nan_columns = np.all(np.isnan(self.x_train), axis=0)
+        self.categorical_data = self.categorical_data[~nan_columns]
+        self.x_train, self.x_valid, self.x_test, self.categories = \
+            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
+                                                                 is_categorical=self.categorical_data)
+
+        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
+                                         size=len(self.x_train),
+                                         replace=False)
+
+        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
+        # (https://arxiv.org/pdf/1605.07079.pdf),
+        # use 10 time the number of classes as lower bound for the dataset fraction
+        n_classes = np.unique(self.y_train).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
+
+    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
+        """ Loads the data given a task or another source. """
+
+        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
+                                                             'overwrite the get_data method.')
+
+        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
+        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
+
+        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
+
+    def shuffle_data(self, rng=None):
+        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
+        class-random-state"""
+        random_state = rng_helper.get_rng(rng, self.rng)
+        random_state.shuffle(self.train_idx)
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(self, configuration: Union[CS.Configuration, Dict],
+                           fidelity: Union[CS.Configuration, Dict, None] = None,
+                           shuffle: bool = False,
+                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
+        """
+        Trains a SVM model given a hyperparameter configuration and
+        evaluates the model on the validation set.
+
+        Parameters
+        ----------
+        configuration : Dict, CS.Configuration
+            Configuration for the SVM model
+        fidelity: Dict, None
+            Fidelity parameters for the SVM model, check get_fidelity_space(). Uses default (max) value if None.
+        shuffle : bool
+            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
+            Defaults to ``False``.
+        rng : np.random.RandomState, int, None,
+            Random seed for benchmark. By default the class level random seed.
+
+            To prevent overfitting on a single seed, it is possible to pass a
+            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
+            If this parameter is not given, the default random state is used.
+        kwargs
+
+        Returns
+        -------
+        Dict -
+            function_value : validation loss
+            cost : time to train and evaluate the model
+            info : Dict
+                train_loss : training loss
+                fidelity : used fidelities in this evaluation
+        """
+        start_time = time.time()
+
+        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
+
+        if shuffle:
+            self.shuffle_data(self.rng)
+
+        # Split of dataset subset
+        if self.lower_bound_train_size > fidelity['dataset_fraction']:
+            train_size = self.lower_bound_train_size
+            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
+                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
+                           f'{self.lower_bound_train_size:.8f}')
+        else:
+            train_size = fidelity['dataset_fraction']
+
+        train_size = int(train_size * len(self.train_idx))
+        train_idx = self.train_idx[:train_size]
+
+        # Transform hyperparameters to linear scale
+        hp_c = np.exp(float(configuration['C']))
+        hp_gamma = np.exp(float(configuration['gamma']))
+
+        # Train support vector machine
+        model = self.get_pipeline(hp_c, hp_gamma)
+        model.fit(self.x_train[train_idx], self.y_train[train_idx])
+
+        # Compute validation error
+        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
+        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
+
+        cost = time.time() - start_time
+
+        return {'function_value': float(val_loss),
+                "cost": cost,
+                'info': {'train_loss': float(train_loss),
+                         'fidelity': fidelity}}
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
+                                fidelity: Union[CS.Configuration, Dict, None] = None,
+                                shuffle: bool = False,
+                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
+        """
+        Trains a SVM model with a given configuration on both the X_train
+        and validation data set and evaluates the model on the X_test data set.
+
+        Parameters
+        ----------
+        configuration : Dict, CS.Configuration
+            Configuration for the SVM Model
+        fidelity: Dict, None
+            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
+        shuffle : bool
+            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
+            Defaults to ``False``.
+        rng : np.random.RandomState, int, None,
+            Random seed for benchmark. By default the class level random seed.
+            To prevent overfitting on a single seed, it is possible to pass a
+            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
+            If this parameter is not given, the default random state is used.
+        kwargs
+
+        Returns
+        -------
+        Dict -
+            function_value : X_test loss
+            cost : time to X_train and evaluate the model
+            info : Dict
+                train_valid_loss: Loss on the train+valid data set
+                fidelity : used fidelities in this evaluation
+        """
+        assert np.isclose(fidelity['dataset_fraction'], 1), \
+            f'Data set fraction must be 1 but was {fidelity["dataset_fraction"]}'
+
+        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
+
+        if shuffle:
+            self.shuffle_data(self.rng)
+
+        start_time = time.time()
+
+        # Concatenate training and validation dataset
+        if isinstance(self.x_train, sparse.csr.csr_matrix) or isinstance(self.x_valid, sparse.csr.csr_matrix):
+            data = sparse.vstack((self.x_train, self.x_valid))
+        else:
+            data = np.concatenate((self.x_train, self.x_valid))
+        targets = np.concatenate((self.y_train, self.y_valid))
+
+        # Transform hyperparameters to linear scale
+        hp_c = np.exp(float(configuration['C']))
+        hp_gamma = np.exp(float(configuration['gamma']))
+
+        model = self.get_pipeline(hp_c, hp_gamma)
+        model.fit(data, targets)
+
+        # Compute validation error
+        train_valid_loss = 1 - self.accuracy_scorer(model, data, targets)
+
+        # Compute test error
+        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
+
+        cost = time.time() - start_time
+
+        return {'function_value': float(test_loss),
+                "cost": cost,
+                'info': {'train_valid_loss': float(train_valid_loss),
+                         'fidelity': fidelity}}
+
+    def get_pipeline(self, C: float, gamma: float) -> pipeline.Pipeline:
+        """ Create the scikit-learn (training-)pipeline """
+
+        model = pipeline.Pipeline([
+            ('preprocess_impute',
+             ColumnTransformer([
+                 ("categorical", "passthrough", self.categorical_data),
+                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
+            ('preprocess_one_hot',
+             ColumnTransformer([
+                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
+                 ("continuous", MinMaxScaler(feature_range=(0, 1)), ~self.categorical_data)])),
+            ('svm',
+             svm.SVC(gamma=gamma, C=C, random_state=self.rng, cache_size=self.cache_size))
+        ])
+        return model
+
+    @staticmethod
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """
+        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
+        the SVM Model
+
+        For a detailed explanation of the hyperparameters:
+        https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
+
+        Parameters
+        ----------
+        seed : int, None
+            Fixing the seed for the ConfigSpace.ConfigurationSpace
+
+        Returns
+        -------
+        ConfigSpace.ConfigurationSpace
+        """
+
+        seed = seed if seed is not None else np.random.randint(1, 100000)
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformFloatHyperparameter('C', lower=-10., upper=10., default_value=0., log=False),
+            CS.UniformFloatHyperparameter('gamma', lower=-10., upper=10., default_value=1., log=False),
+        ])
+        # cs.generate_all_continuous_from_bounds(SupportVectorMachine.get_meta_information()['bounds'])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """
+        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
+        the SupportVector Benchmark
+
+        Fidelities
+        ----------
+        dataset_fraction: float - [0.1, 1]
+            fraction of training data set to use
+
+        Parameters
+        ----------
+        seed : int, None
+            Fixing the seed for the ConfigSpace.ConfigurationSpace
+
+        Returns
+        -------
+        ConfigSpace.ConfigurationSpace
+        """
+        seed = seed if seed is not None else np.random.randint(1, 100000)
+        fidel_space = CS.ConfigurationSpace(seed=seed)
+
+        fidel_space.add_hyperparameters([
+            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
+        ])
+        return fidel_space
+
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        return {'name': 'Support Vector Machine',
+                'references': ["@InProceedings{pmlr-v54-klein17a",
+                               "author = {Aaron Klein and Stefan Falkner and Simon Bartels and Philipp Hennig and "
+                               "Frank Hutter}, "
+                               "title = {{Fast Bayesian Optimization of Machine Learning Hyperparameters on "
+                               "Large Datasets}}"
+                               "pages = {528--536}, year = {2017},"
+                               "editor = {Aarti Singh and Jerry Zhu},"
+                               "volume = {54},"
+                               "series = {Proceedings of Machine Learning Research},"
+                               "address = {Fort Lauderdale, FL, USA},"
+                               "month = {20--22 Apr},"
+                               "publisher = {PMLR},"
+                               "pdf = {http://proceedings.mlr.press/v54/klein17a/klein17a.pdf}, "
+                               "url = {http://proceedings.mlr.press/v54/klein17a.html}, "
+                               ],
+                'code': 'https://github.com/automl/HPOlib1.5/blob/container/hpolib/benchmarks/ml/svm_benchmark.py',
+                'shape of train data': self.x_train.shape,
+                'shape of test data': self.x_test.shape,
+                'shape of valid data': self.x_valid.shape,
+                'initial random seed': self.rng,
+                'task_id': self.task_id
+                }
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index e43b3529..b038e4c9 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -1,426 +1,125 @@
-"""
-
-Changelog:
-==========
-0.0.2:
-* Change the search space definiton to match the paper: (https://arxiv.org/pdf/1802.09596.pdf)
-    eta:                [1e-5, 1] (def: 0.3)    ->  [2**-10, 1] (def: 0.3)
-    min_child_weight:   [0,05, 10] (def: 1)     ->  [1, 2**7] (def: 1)
-    colsample_bytree:   [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
-    colsample_bylevel:  [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
-    reg_lambda:         [1e-5, 2] (def: 1)      ->  [2**-10, 2**10] (def: 1)
-    reg_alpha:          [1e-5, 2] (def: 1e-5)   ->  [2**-10, 2**10] (def: 1)
-    max_depth:          -                       ->  [1, 15] (def: 6)
-    subsample_per_it:   -                       ->  [0.01, 1] (def: 1)
-    [booster:            -                      ->  [gbtree, gblinear, dart] (def: gbtree)]  *)
-
-    *) This parameter is only in the XGBoostExtendedBenchmark. Not in the XGBoostBenchmark class.
-
-* Increase the fidelity `n_estimators`
-    n_estimators        [2, 128] (def: 128)     ->  [1, 256] (def: 256)
-
-* Add class to optimize also the used booster method: (gbtree, gblinear or dart)
-    We have introduced a new class, which adds the used booster as parameter to the configuration space. To read more
-    about booster, please take a look in the official XGBoost-documentation (https://xgboost.readthedocs.io/en/latest).
-
-
-0.0.1:
-* First implementation of a XGBoost Benchmark.
-
-
-"""
-
-import logging
 import time
-from typing import Union, Tuple, Dict, List
-
-import ConfigSpace as CS
+import openml
 import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
 import xgboost as xgb
-from sklearn import pipeline
-from sklearn.compose import ColumnTransformer
 from sklearn.impute import SimpleImputer
-from sklearn.metrics import accuracy_score, make_scorer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, make_scorer
 
-import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
-
-__version__ = '0.0.2'
-
-logger = logging.getLogger('XGBBenchmark')
-
-
-class XGBoostBenchmark(AbstractBenchmark):
-
-    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        """
-
-        Parameters
-        ----------
-        task_id : int, None
-        n_threads  : int, None
-        rng : np.random.RandomState, int, None
-        """
-
-        super(XGBoostBenchmark, self).__init__(rng=rng)
-        self.n_threads = n_threads
-        self.task_id = task_id
-        self.accuracy_scorer = make_scorer(accuracy_score)
-
-        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
-            self.get_data()
-        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
-
-        # XGB needs sorted data. Data should be (Categorical + numerical) not mixed.
-        categorical_idx = np.argwhere(self.categorical_data)
-        continuous_idx = np.argwhere(~self.categorical_data)
-        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
-        self.categorical_data = self.categorical_data[sorting]
-        self.x_train = self.x_train[:, sorting]
-        self.x_valid = self.x_valid[:, sorting]
-        self.x_test = self.x_test[:, sorting]
-
-        nan_columns = np.all(np.isnan(self.x_train), axis=0)
-        self.categorical_data = self.categorical_data[~nan_columns]
-
-        self.x_train, self.x_valid, self.x_test, self.categories = \
-            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
-                                                                 is_categorical=self.categorical_data)
-
-        # Determine the number of categories in the labels.
-        # In case of binary classification ``self.num_class`` has to be 1 for xgboost.
-        self.num_class = len(np.unique(np.concatenate([self.y_train, self.y_test, self.y_valid])))
-        self.num_class = 1 if self.num_class == 2 else self.num_class
-
-        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
-                                         size=len(self.x_train),
-                                         replace=False)
-
-        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
-        # (https://arxiv.org/pdf/1605.07079.pdf),
-        # use 10 time the number of classes as lower bound for the dataset fraction
-        n_classes = np.unique(self.y_train).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
-
-    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
-        """ Loads the data given a task or another source. """
-
-        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
-                                                             'overwrite the get_data method.')
-
-        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
-        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
-
-        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
-
-    def shuffle_data(self, rng=None):
-        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
-        class-random-state"""
-        random_state = rng_helper.get_rng(rng, self.rng)
-        random_state.shuffle(self.train_idx)
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(self, configuration: Union[CS.Configuration, Dict],
-                           fidelity: Union[CS.Configuration, Dict, None] = None,
-                           shuffle: bool = False,
-                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a XGBoost model given a hyperparameter configuration and
-        evaluates the model on the validation set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the XGBoost model
-        fidelity: Dict, None
-            Fidelity parameters for the XGBoost model, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : validation loss
-            cost : time to train and evaluate the model
-            info : Dict
-                train_loss : trainings loss
-                fidelity : used fidelities in this evaluation
-        """
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start = time.time()
-
-        if self.lower_bound_train_size > fidelity['dataset_fraction']:
-            train_data_fraction = self.lower_bound_train_size
-            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
-                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
-                           f'{self.lower_bound_train_size:.8f}')
-        else:
-            train_data_fraction = fidelity['dataset_fraction']
-
-        train_idx = self.train_idx[:int(len(self.train_idx) * train_data_fraction)]
-
-        model = self._get_pipeline(n_estimators=fidelity["n_estimators"], **configuration)
-        model.fit(X=self.x_train[train_idx], y=self.y_train[train_idx])
-
-        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
-        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
-        cost = time.time() - start
-
-        return {'function_value': float(val_loss),
-                'cost': cost,
-                'info': {'train_loss': float(train_loss),
-                         'fidelity': fidelity}
-                }
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
-                                fidelity: Union[CS.Configuration, Dict, None] = None,
-                                shuffle: bool = False,
-                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a XGBoost model with a given configuration on both the train
-        and validation data set and evaluates the model on the test data set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the XGBoost Model
-        fidelity: Dict, None
-            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : test loss
-            cost : time to train and evaluate the model
-            info : Dict
-                fidelity : used fidelities in this evaluation
-        """
-        default_dataset_fraction = self.get_fidelity_space().get_hyperparameter('dataset_fraction').default_value
-        if fidelity['dataset_fraction'] != default_dataset_fraction:
-            raise NotImplementedError(f'Test error can not be computed for dataset_fraction <= '
-                                      f'{default_dataset_fraction}')
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start = time.time()
-
-        # Impute potential nan values with the feature-
-        data = np.concatenate((self.x_train, self.x_valid))
-        targets = np.concatenate((self.y_train, self.y_valid))
-
-        model = self._get_pipeline(n_estimators=fidelity['n_estimators'], **configuration)
-        model.fit(X=data, y=targets)
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
-        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
-        cost = time.time() - start
 
-        return {'function_value': float(test_loss),
-                'cost': cost,
-                'info': {'fidelity': fidelity}}
+class XGBoostBenchmark(MLBenchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1
+    ):
+        super(XGBoostBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
+        pass
 
     @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
         """
-        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
-        the XGBoost Model
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
-        """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
         cs = CS.ConfigurationSpace(seed=seed)
 
         cs.add_hyperparameters([
-            CS.UniformFloatHyperparameter('eta', lower=2**-10, upper=1., default_value=0.3, log=True),
-            CS.UniformIntegerHyperparameter('max_depth', lower=1, upper=15, default_value=6, log=False),
-            CS.UniformFloatHyperparameter('min_child_weight', lower=1., upper=2**7., default_value=1., log=True),
-            CS.UniformFloatHyperparameter('colsample_bytree', lower=0.01, upper=1., default_value=1.),
-            CS.UniformFloatHyperparameter('colsample_bylevel', lower=0.01, upper=1., default_value=1.),
-            CS.UniformFloatHyperparameter('reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True),
-            CS.UniformFloatHyperparameter('reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True),
-            CS.UniformFloatHyperparameter('subsample_per_it', lower=0.1, upper=1, default_value=1, log=False)
+            CS.UniformFloatHyperparameter(
+                'eta', lower=2**-10, upper=1., default_value=0.3, log=True
+            ),  # learning rate
+            CS.UniformIntegerHyperparameter(
+                'max_depth', lower=1, upper=15, default_value=6, log=False
+            ),
+            CS.UniformFloatHyperparameter(
+                'min_child_weight', lower=1., upper=2**7., default_value=1., log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'colsample_bytree', lower=0.01, upper=1., default_value=1.
+            ),
+            # CS.UniformFloatHyperparameter(
+            #     'colsample_bylevel', lower=0.01, upper=1., default_value=1.
+            # ),
+            CS.UniformFloatHyperparameter(
+                'reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True
+            ),
+            # CS.UniformFloatHyperparameter(
+            #     'reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True
+            # ),
+            # CS.UniformFloatHyperparameter(
+            #     'subsample_per_it', lower=0.1, upper=1, default_value=1, log=False
+            # )
         ])
-
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
         """
-        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
-        the XGBoost Benchmark
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+        if fidelity_choice == 0:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 1:
+            # only n_estimators as fidelity
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 2:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+            )
+        else:
+            # both n_estimators and subsample as fidelities
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+            )
+        z_cs.add_hyperparameters([ntrees, subsample])
+        return z_cs
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
         """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
-        fidel_space = CS.ConfigurationSpace(seed=seed)
-
-        fidel_space.add_hyperparameters([
-            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
-            CS.UniformIntegerHyperparameter("n_estimators", lower=1, upper=256, default_value=256, log=False)
-        ])
-
-        return fidel_space
-
-    def get_meta_information(self) -> Dict:
-        """ Returns the meta information for the benchmark """
-        return {'name': 'XGBoost',
-                'references': ['@article{probst2019tunability,'
-                               'title={Tunability: Importance of hyperparameters of machine learning algorithms.},'
-                               'author={Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd},'
-                               'journal={J. Mach. Learn. Res.},'
-                               'volume={20},'
-                               'number={53},'
-                               'pages={1--32},'
-                               'year={2019}'
-                               '}'],
-                'code': 'https://github.com/automl/HPOlib1.5/blob/development/hpolib/benchmarks/ml/'
-                        'xgboost_benchmark.py',
-                'shape of train data': self.x_train.shape,
-                'shape of test data': self.x_test.shape,
-                'shape of valid data': self.x_valid.shape,
-                'initial random seed': self.rng,
-                'task_id': self.task_id
-                }
-
-    def _get_pipeline(self, max_depth: int, eta: float, min_child_weight: int,
-                      colsample_bytree: float, colsample_bylevel: float, reg_lambda: int, reg_alpha: int,
-                      n_estimators: int, subsample_per_it: float) \
-            -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
-
-        clf = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", "passthrough", ~self.categorical_data)])),
-            ('xgb',
-             xgb.XGBClassifier(
-                 max_depth=max_depth,
-                 learning_rate=eta,
-                 min_child_weight=min_child_weight,
-                 colsample_bytree=colsample_bytree,
-                 colsample_bylevel=colsample_bylevel,
-                 reg_alpha=reg_alpha,
-                 reg_lambda=reg_lambda,
-                 n_estimators=n_estimators,
-                 objective=objective,
-                 n_jobs=self.n_threads,
-                 random_state=self.rng.randint(1, 100000),
-                 num_class=self.num_class,
-                 subsample=subsample_per_it))
-            ])
-        return clf
-
-
-class XGBoostExtendedBenchmark(XGBoostBenchmark):
-    """
-    Similar to XGBoostBenchmark but enables also the optimization of the used booster.
-    """
-
-    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        super(XGBoostExtendedBenchmark, self).__init__(task_id=task_id, n_threads=n_threads, rng=rng)
-
-    @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        cs = XGBoostBenchmark.get_configuration_space(seed)
-        hp_booster = CS.CategoricalHyperparameter('booster', choices=['gbtree', 'gblinear', 'dart'],
-                                                  default_value='gbtree')
-        cs.add_hyperparameter(hp_booster)
-
-        # XGBoost with 'gblinear' can not use some
-        # parameters. Exclude them from the configuration space by introducing a condition.
-        hps = ['colsample_bylevel', 'colsample_bytree', 'max_depth',  'min_child_weight', 'subsample_per_it']
-
-        # The NotEqualsCondition means: "Make parameter X active if hp_booster is not equal to gblinear."
-        conditions = [CS.NotEqualsCondition(cs.get_hyperparameter(hp), hp_booster, 'gblinear') for hp in hps]
-        cs.add_conditions(conditions)
-        return cs
-
-    # noinspection PyMethodOverriding
-    # pylint: disable=arguments-differ
-    def _get_pipeline(self, n_estimators: int, booster: str, reg_lambda: int, reg_alpha: int, eta: float,
-                      min_child_weight: int = None, max_depth: int = None, colsample_bytree: float = None,
-                      colsample_bylevel: float = None, subsample_per_it: float = None) \
-            -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
-
-        configuration = dict(booster=booster,
-                             max_depth=max_depth,
-                             learning_rate=eta,
-                             min_child_weight=min_child_weight,
-                             colsample_bytree=colsample_bytree,
-                             colsample_bylevel=colsample_bylevel,
-                             reg_alpha=reg_alpha,
-                             reg_lambda=reg_lambda,
-                             n_estimators=n_estimators,
-                             objective=objective,
-                             n_jobs=self.n_threads,
-                             random_state=self.rng.randint(1, 100000),
-                             num_class=self.num_class,
-                             subsample=subsample_per_it)
-
-        configuration = {k: v for k, v in configuration.items() if v is not None}
-
-        clf = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", "passthrough", ~self.categorical_data)])),
-            ('xgb',
-             xgb.XGBClassifier(**configuration))
-        ])
-        return clf
+        rng = rng if (rng is None and isinstance(rng, int)) else self.seed
+        extra_args = dict(
+            n_estimators=fidelity['n_estimators'],
+            objective="binary:logistic",
+            random_state=rng,
+            subsample=1
+        )
+        if self.n_classes > 2:
+            extra_args["objective"] = "multi:softmax"
+            extra_args.update({"num_class": self.n_classes})
+        model = xgb.XGBClassifier(
+            **config.get_dictionary(),
+            **extra_args
+        )
+        return model
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark_old.py b/hpobench/benchmarks/ml/xgboost_benchmark_old.py
new file mode 100644
index 00000000..fb380c89
--- /dev/null
+++ b/hpobench/benchmarks/ml/xgboost_benchmark_old.py
@@ -0,0 +1,426 @@
+"""
+
+Changelog:
+==========
+0.0.2:
+* Change the search space definiton to match the paper: (https://arxiv.org/pdf/1802.09596.pdf)
+    eta:                [1e-5, 1] (def: 0.3)    ->  [2**-10, 1] (def: 0.3)
+    min_child_weight:   [0,05, 10] (def: 1)     ->  [1, 2**7] (def: 1)
+    colsample_bytree:   [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
+    colsample_bylevel:  [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
+    reg_lambda:         [1e-5, 2] (def: 1)      ->  [2**-10, 2**10] (def: 1)
+    reg_alpha:          [1e-5, 2] (def: 1e-5)   ->  [2**-10, 2**10] (def: 1)
+    max_depth:          -                       ->  [1, 15] (def: 6)
+    subsample_per_it:   -                       ->  [0.01, 1] (def: 1)
+    [booster:            -                      ->  [gbtree, gblinear, dart] (def: gbtree)]  *)
+
+    *) This parameter is only in the XGBoostExtendedBenchmark. Not in the XGBoostBenchmark class.
+
+* Increase the fidelity `n_estimators`
+    n_estimators        [2, 128] (def: 128)     ->  [1, 256] (def: 256)
+
+* Add class to optimize also the used booster method: (gbtree, gblinear or dart)
+    We have introduced a new class, which adds the used booster as parameter to the configuration space. To read more
+    about booster, please take a look in the official XGBoost-documentation (https://xgboost.readthedocs.io/en/latest).
+
+
+0.0.1:
+* First implementation of a XGBoost Benchmark.
+
+
+"""
+
+import logging
+import time
+from typing import Union, Tuple, Dict, List
+
+import ConfigSpace as CS
+import numpy as np
+import xgboost as xgb
+from sklearn import pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.metrics import accuracy_score, make_scorer
+from sklearn.preprocessing import OneHotEncoder
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
+
+__version__ = '0.0.2'
+
+logger = logging.getLogger('XGBBenchmark')
+
+
+class XGBoostBenchmark(AbstractBenchmark):
+
+    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
+                 rng: Union[np.random.RandomState, int, None] = None):
+        """
+
+        Parameters
+        ----------
+        task_id : int, None
+        n_threads  : int, None
+        rng : np.random.RandomState, int, None
+        """
+
+        super(XGBoostBenchmark, self).__init__(rng=rng)
+        self.n_threads = n_threads
+        self.task_id = task_id
+        self.accuracy_scorer = make_scorer(accuracy_score)
+
+        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
+            self.get_data()
+        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
+
+        # XGB needs sorted data. Data should be (Categorical + numerical) not mixed.
+        categorical_idx = np.argwhere(self.categorical_data)
+        continuous_idx = np.argwhere(~self.categorical_data)
+        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
+        self.categorical_data = self.categorical_data[sorting]
+        self.x_train = self.x_train[:, sorting]
+        self.x_valid = self.x_valid[:, sorting]
+        self.x_test = self.x_test[:, sorting]
+
+        nan_columns = np.all(np.isnan(self.x_train), axis=0)
+        self.categorical_data = self.categorical_data[~nan_columns]
+
+        self.x_train, self.x_valid, self.x_test, self.categories = \
+            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
+                                                                 is_categorical=self.categorical_data)
+
+        # Determine the number of categories in the labels.
+        # In case of binary classification ``self.num_class`` has to be 1 for xgboost.
+        self.num_class = len(np.unique(np.concatenate([self.y_train, self.y_test, self.y_valid])))
+        self.num_class = 1 if self.num_class == 2 else self.num_class
+
+        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
+                                         size=len(self.x_train),
+                                         replace=False)
+
+        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
+        # (https://arxiv.org/pdf/1605.07079.pdf),
+        # use 10 time the number of classes as lower bound for the dataset fraction
+        n_classes = np.unique(self.y_train).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
+
+    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
+        """ Loads the data given a task or another source. """
+
+        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
+                                                             'overwrite the get_data method.')
+
+        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
+        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
+
+        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
+
+    def shuffle_data(self, rng=None):
+        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
+        class-random-state"""
+        random_state = rng_helper.get_rng(rng, self.rng)
+        random_state.shuffle(self.train_idx)
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(self, configuration: Union[CS.Configuration, Dict],
+                           fidelity: Union[CS.Configuration, Dict, None] = None,
+                           shuffle: bool = False,
+                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
+        """
+        Trains a XGBoost model given a hyperparameter configuration and
+        evaluates the model on the validation set.
+
+        Parameters
+        ----------
+        configuration : Dict, CS.Configuration
+            Configuration for the XGBoost model
+        fidelity: Dict, None
+            Fidelity parameters for the XGBoost model, check get_fidelity_space(). Uses default (max) value if None.
+        shuffle : bool
+            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
+            Defaults to ``False``.
+        rng : np.random.RandomState, int, None,
+            Random seed for benchmark. By default the class level random seed.
+
+            To prevent overfitting on a single seed, it is possible to pass a
+            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
+            If this parameter is not given, the default random state is used.
+        kwargs
+
+        Returns
+        -------
+        Dict -
+            function_value : validation loss
+            cost : time to train and evaluate the model
+            info : Dict
+                train_loss : trainings loss
+                fidelity : used fidelities in this evaluation
+        """
+        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
+
+        if shuffle:
+            self.shuffle_data(self.rng)
+
+        start = time.time()
+
+        if self.lower_bound_train_size > fidelity['dataset_fraction']:
+            train_data_fraction = self.lower_bound_train_size
+            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
+                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
+                           f'{self.lower_bound_train_size:.8f}')
+        else:
+            train_data_fraction = fidelity['dataset_fraction']
+
+        train_idx = self.train_idx[:int(len(self.train_idx) * train_data_fraction)]
+
+        model = self._get_pipeline(n_estimators=fidelity["n_estimators"], **configuration)
+        model.fit(X=self.x_train[train_idx], y=self.y_train[train_idx])
+
+        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
+        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
+        cost = time.time() - start
+
+        return {'function_value': float(val_loss),
+                'cost': cost,
+                'info': {'train_loss': float(train_loss),
+                         'fidelity': fidelity}
+                }
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
+                                fidelity: Union[CS.Configuration, Dict, None] = None,
+                                shuffle: bool = False,
+                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
+        """
+        Trains a XGBoost model with a given configuration on both the train
+        and validation data set and evaluates the model on the test data set.
+
+        Parameters
+        ----------
+        configuration : Dict, CS.Configuration
+            Configuration for the XGBoost Model
+        fidelity: Dict, None
+            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
+        shuffle : bool
+            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
+            Defaults to ``False``.
+        rng : np.random.RandomState, int, None,
+            Random seed for benchmark. By default the class level random seed.
+            To prevent overfitting on a single seed, it is possible to pass a
+            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
+            If this parameter is not given, the default random state is used.
+        kwargs
+
+        Returns
+        -------
+        Dict -
+            function_value : test loss
+            cost : time to train and evaluate the model
+            info : Dict
+                fidelity : used fidelities in this evaluation
+        """
+        default_dataset_fraction = self.get_fidelity_space().get_hyperparameter('dataset_fraction').default_value
+        if fidelity['dataset_fraction'] != default_dataset_fraction:
+            raise NotImplementedError(f'Test error can not be computed for dataset_fraction <= '
+                                      f'{default_dataset_fraction}')
+
+        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
+
+        if shuffle:
+            self.shuffle_data(self.rng)
+
+        start = time.time()
+
+        # Impute potential nan values with the feature-
+        data = np.concatenate((self.x_train, self.x_valid))
+        targets = np.concatenate((self.y_train, self.y_valid))
+
+        model = self._get_pipeline(n_estimators=fidelity['n_estimators'], **configuration)
+        model.fit(X=data, y=targets)
+
+        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
+        cost = time.time() - start
+
+        return {'function_value': float(test_loss),
+                'cost': cost,
+                'info': {'fidelity': fidelity}}
+
+    @staticmethod
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """
+        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
+        the XGBoost Model
+
+        Parameters
+        ----------
+        seed : int, None
+            Fixing the seed for the ConfigSpace.ConfigurationSpace
+
+        Returns
+        -------
+        ConfigSpace.ConfigurationSpace
+        """
+        seed = seed if seed is not None else np.random.randint(1, 100000)
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformFloatHyperparameter('eta', lower=2**-10, upper=1., default_value=0.3, log=True),
+            CS.UniformIntegerHyperparameter('max_depth', lower=1, upper=15, default_value=6, log=False),
+            CS.UniformFloatHyperparameter('min_child_weight', lower=1., upper=2**7., default_value=1., log=True),
+            CS.UniformFloatHyperparameter('colsample_bytree', lower=0.01, upper=1., default_value=1.),
+            CS.UniformFloatHyperparameter('colsample_bylevel', lower=0.01, upper=1., default_value=1.),
+            CS.UniformFloatHyperparameter('reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True),
+            CS.UniformFloatHyperparameter('reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True),
+            CS.UniformFloatHyperparameter('subsample_per_it', lower=0.1, upper=1, default_value=1, log=False)
+        ])
+
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """
+        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
+        the XGBoost Benchmark
+
+        Parameters
+        ----------
+        seed : int, None
+            Fixing the seed for the ConfigSpace.ConfigurationSpace
+
+        Returns
+        -------
+        ConfigSpace.ConfigurationSpace
+        """
+        seed = seed if seed is not None else np.random.randint(1, 100000)
+        fidel_space = CS.ConfigurationSpace(seed=seed)
+
+        fidel_space.add_hyperparameters([
+            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
+            CS.UniformIntegerHyperparameter("n_estimators", lower=1, upper=256, default_value=256, log=False)
+        ])
+
+        return fidel_space
+
+    def get_meta_information(self) -> Dict:
+        """ Returns the meta information for the benchmark """
+        return {'name': 'XGBoost',
+                'references': ['@article{probst2019tunability,'
+                               'title={Tunability: Importance of hyperparameters of machine learning algorithms.},'
+                               'author={Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd},'
+                               'journal={J. Mach. Learn. Res.},'
+                               'volume={20},'
+                               'number={53},'
+                               'pages={1--32},'
+                               'year={2019}'
+                               '}'],
+                'code': 'https://github.com/automl/HPOlib1.5/blob/development/hpolib/benchmarks/ml/'
+                        'xgboost_benchmark_old.py',
+                'shape of train data': self.x_train.shape,
+                'shape of test data': self.x_test.shape,
+                'shape of valid data': self.x_valid.shape,
+                'initial random seed': self.rng,
+                'task_id': self.task_id
+                }
+
+    def _get_pipeline(self, max_depth: int, eta: float, min_child_weight: int,
+                      colsample_bytree: float, colsample_bylevel: float, reg_lambda: int, reg_alpha: int,
+                      n_estimators: int, subsample_per_it: float) \
+            -> pipeline.Pipeline:
+        """ Create the scikit-learn (training-)pipeline """
+        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
+
+        clf = pipeline.Pipeline([
+            ('preprocess_impute',
+             ColumnTransformer([
+                 ("categorical", "passthrough", self.categorical_data),
+                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
+            ('preprocess_one_hot',
+             ColumnTransformer([
+                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
+                 ("continuous", "passthrough", ~self.categorical_data)])),
+            ('xgb',
+             xgb.XGBClassifier(
+                 max_depth=max_depth,
+                 learning_rate=eta,
+                 min_child_weight=min_child_weight,
+                 colsample_bytree=colsample_bytree,
+                 colsample_bylevel=colsample_bylevel,
+                 reg_alpha=reg_alpha,
+                 reg_lambda=reg_lambda,
+                 n_estimators=n_estimators,
+                 objective=objective,
+                 n_jobs=self.n_threads,
+                 random_state=self.rng.randint(1, 100000),
+                 num_class=self.num_class,
+                 subsample=subsample_per_it))
+            ])
+        return clf
+
+
+class XGBoostExtendedBenchmark(XGBoostBenchmark):
+    """
+    Similar to XGBoostBenchmark but enables also the optimization of the used booster.
+    """
+
+    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
+                 rng: Union[np.random.RandomState, int, None] = None):
+        super(XGBoostExtendedBenchmark, self).__init__(task_id=task_id, n_threads=n_threads, rng=rng)
+
+    @staticmethod
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        cs = XGBoostBenchmark.get_configuration_space(seed)
+        hp_booster = CS.CategoricalHyperparameter('booster', choices=['gbtree', 'gblinear', 'dart'],
+                                                  default_value='gbtree')
+        cs.add_hyperparameter(hp_booster)
+
+        # XGBoost with 'gblinear' can not use some
+        # parameters. Exclude them from the configuration space by introducing a condition.
+        hps = ['colsample_bylevel', 'colsample_bytree', 'max_depth',  'min_child_weight', 'subsample_per_it']
+
+        # The NotEqualsCondition means: "Make parameter X active if hp_booster is not equal to gblinear."
+        conditions = [CS.NotEqualsCondition(cs.get_hyperparameter(hp), hp_booster, 'gblinear') for hp in hps]
+        cs.add_conditions(conditions)
+        return cs
+
+    # noinspection PyMethodOverriding
+    # pylint: disable=arguments-differ
+    def _get_pipeline(self, n_estimators: int, booster: str, reg_lambda: int, reg_alpha: int, eta: float,
+                      min_child_weight: int = None, max_depth: int = None, colsample_bytree: float = None,
+                      colsample_bylevel: float = None, subsample_per_it: float = None) \
+            -> pipeline.Pipeline:
+        """ Create the scikit-learn (training-)pipeline """
+        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
+
+        configuration = dict(booster=booster,
+                             max_depth=max_depth,
+                             learning_rate=eta,
+                             min_child_weight=min_child_weight,
+                             colsample_bytree=colsample_bytree,
+                             colsample_bylevel=colsample_bylevel,
+                             reg_alpha=reg_alpha,
+                             reg_lambda=reg_lambda,
+                             n_estimators=n_estimators,
+                             objective=objective,
+                             n_jobs=self.n_threads,
+                             random_state=self.rng.randint(1, 100000),
+                             num_class=self.num_class,
+                             subsample=subsample_per_it)
+
+        configuration = {k: v for k, v in configuration.items() if v is not None}
+
+        clf = pipeline.Pipeline([
+            ('preprocess_impute',
+             ColumnTransformer([
+                 ("categorical", "passthrough", self.categorical_data),
+                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
+            ('preprocess_one_hot',
+             ColumnTransformer([
+                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
+                 ("continuous", "passthrough", ~self.categorical_data)])),
+            ('xgb',
+             xgb.XGBClassifier(**configuration))
+        ])
+        return clf
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 885ce606..9bc5ff3b 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -64,7 +64,7 @@ def test_rng_serialization():
 def test_rng_serialization_xgb():
     import json
     from hpobench.util.container_utils import BenchmarkEncoder, BenchmarkDecoder
-    from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark
+    from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark
 
     b = XGBoostBenchmark(task_id=167149, rng=0)
     meta = b.get_meta_information()
diff --git a/tests/test_whitebox.py b/tests/test_whitebox.py
index 7e4c32aa..c3f5e0ff 100644
--- a/tests/test_whitebox.py
+++ b/tests/test_whitebox.py
@@ -14,7 +14,7 @@
 
 
 def test_whitebox_without_container_xgb():
-    from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+    from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
     b = Benchmark(task_id=167199, rng=0)
     cs = b.get_configuration_space(seed=0)
 

From 3f84afbe466b83910d95f0b074ccb0d1046f35ed Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 23 Jun 2021 20:32:38 +0200
Subject: [PATCH 13/95] Adding sample RF space for tabular collection design

---
 hpobench/benchmarks/ml/rf_benchmark.py | 391 +++++++++++++++++++++++++
 1 file changed, 391 insertions(+)
 create mode 100644 hpobench/benchmarks/ml/rf_benchmark.py

diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
new file mode 100644
index 00000000..35684c00
--- /dev/null
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -0,0 +1,391 @@
+import time
+import openml
+import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, make_scorer
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+
+
+class RandomForestBenchmark(AbstractBenchmark):
+    _issue_tasks = [3917, 3945]
+
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            benchmark_type: str = "raw"
+    ):
+        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
+        self.rng = check_random_state(self.seed)
+        super(RandomForestBenchmark, self).__init__(rng=seed)
+
+        self.benchmark_type = benchmark_type
+        self.task_id = task_id
+        self.valid_size = valid_size
+        self.accuracy_scorer = make_scorer(accuracy_score)
+
+        # Data variables
+        self.train_X = None
+        self.valid_X = None
+        self.test_X = None
+        self.train_y = None
+        self.valid_y = None
+        self.test_y = None
+        self.train_idx = None
+        self.test_idx = None
+        self.task = None
+        self.dataset = None
+        self.preprocessor = None
+        self.lower_bound_train_size = None
+        self.load_data_from_openml()
+
+        # Observation and fidelity spaces
+        self.fidelity_choice = fidelity_choice
+        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
+        self.x_cs = self.get_configuration_space(self.seed)
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformIntegerHyperparameter(
+                'max_depth', lower=1, upper=15, default_value=2, log=False
+            ),
+            CS.UniformIntegerHyperparameter(
+                'min_samples_split', lower=2, upper=128, default_value=2, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'max_features', lower=0.1, upper=0.9, default_value=0.5, log=False
+            ),
+            CS.UniformIntegerHyperparameter(
+                'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
+            ),
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+        if fidelity_choice == 0:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 1:
+            # only n_estimators as fidelity
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 2:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+            )
+        else:
+            # both n_estimators and subsample as fidelities
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+            )
+        z_cs.add_hyperparameters([ntrees, subsample])
+        return z_cs
+
+    def get_config(self, size=None):
+        """Samples configuration(s) from the (hyper) parameter space
+        """
+        if size is None:  # return only one config
+            return self.x_cs.sample_configuration()
+        return [self.x_cs.sample_configuration() for i in range(size)]
+
+    def get_fidelity(self, size=None):
+        """Samples candidate fidelities from the fidelity space
+        """
+        if size is None:  # return only one config
+            return self.z_cs.sample_configuration()
+        return [self.z_cs.sample_configuration() for i in range(size)]
+
+    def _convert_labels(self, labels):
+        """Converts boolean labels (if exists) to strings
+        """
+        label_types = list(map(lambda x: isinstance(x, bool), labels))
+        if np.all(label_types):
+            _labels = list(map(lambda x: str(x), labels))
+            if isinstance(labels, pd.Series):
+                labels = pd.Series(_labels, index=labels.index)
+            elif isinstance(labels, np.array):
+                labels = np.array(labels)
+        return labels
+
+    def load_data_from_openml(self, valid_size=None, verbose=False):
+        """Fetches data from OpenML and initializes the train-validation-test data splits
+
+        The validation set is fixed till this function is called again or explicitly altered
+        """
+        # fetches task
+        self.task = openml.tasks.get_task(self.task_id, download_data=False)
+        # fetches dataset
+        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
+        if verbose:
+            print(self.task, '\n')
+            print(self.dataset, '\n')
+
+        # loads full data
+        X, y, categorical_ind, feature_names = self.dataset.get_data(
+            target=self.task.target_name, dataset_format="dataframe"
+        )
+        categorical_ind = np.array(categorical_ind)
+        (cat_idx,) = np.where(categorical_ind)
+        (cont_idx,) = np.where(~categorical_ind)
+
+        # splitting dataset into train and test (10% test)
+        # train-test split is fixed for a task and its associated dataset
+        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
+        train_x = X.iloc[self.train_idx]
+        train_y = y.iloc[self.train_idx]
+        self.test_X = X.iloc[self.test_idx]
+        self.test_y = y.iloc[self.test_idx]
+
+        # splitting training into training and validation
+        # validation set is fixed till this function is called again or explicitly altered
+        valid_size = self.valid_size if valid_size is None else valid_size
+        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
+            train_x, train_y, test_size=valid_size,
+            shuffle=True, stratify=train_y, random_state=self.rng
+        )
+
+        # preprocessor to handle missing values, categorical columns encodings,
+        # and scaling numeric columns
+        self.preprocessor = make_pipeline(
+            ColumnTransformer([
+                (
+                    "cat",
+                    make_pipeline(SimpleImputer(strategy="most_frequent"),
+                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
+                    cat_idx.tolist(),
+                ),
+                (
+                    "cont",
+                    make_pipeline(SimpleImputer(strategy="median"),
+                                  StandardScaler()),
+                    cont_idx.tolist(),
+                )
+            ])
+        )
+        if verbose:
+            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
+
+        # preprocessor fit only on the training set
+        self.train_X = self.preprocessor.fit_transform(self.train_X)
+        # applying preprocessor built on the training set, across validation and test splits
+        self.valid_X = self.preprocessor.transform(self.valid_X)
+        self.test_X = self.preprocessor.transform(self.test_X)
+        # converting boolean labels to strings
+        self.train_y = self._convert_labels(self.train_y)
+        self.valid_y = self._convert_labels(self.valid_y)
+        self.test_y = self._convert_labels(self.test_y)
+
+        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
+        # use 10 times the number of classes as lower bound for the dataset fraction
+        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+
+        if verbose:
+            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
+
+        if verbose:
+            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
+            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
+            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
+            print("\nData loading complete!\n")
+        return
+
+    def shuffle_data_idx(self, train_id=None, ng=None):
+        rng = self.rng if rng is None else rng
+        train_idx = self.train_idx if train_idx is None else train_idx
+        rng.shuffle(train_idx)
+        return train_idx
+
+    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+        start = time.time()
+
+        # initializing model
+        model = RandomForestClassifier(
+            **config.get_dictionary(),
+            n_estimators=fidelity['n_estimators'],  # a fidelity being used during initialization
+            bootstrap=True,
+            random_state=self.rng
+        )
+
+        # preparing data
+        if eval == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+            train_idx = self.train_idx
+        else:
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+            train_idx = np.arange(len(train_X))
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here
+        # application of the other fidelity to the dataset that the model interfaces
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                fidelity['subsample'] * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        model.fit(train_X[train_idx], train_y.iloc[train_idx])
+        # computing statistics on training data
+        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
+
+        model_fit_time = time.time() - start
+        return model, model_fit_time, train_loss
+
+    def objective(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    def objective_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng, eval="test"
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        return dict()
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        return dict()
+
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        pass

From 09b296a4a1ef18e0a11b0e0dfa25ebb6345c6427 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 23 Jun 2021 20:57:37 +0200
Subject: [PATCH 14/95] Placeholder SVM benchmark to interface tabular data
 collection

---
 hpobench/benchmarks/ml/svm_benchmark_2.py | 371 ++++++++++++++++++++++
 1 file changed, 371 insertions(+)
 create mode 100644 hpobench/benchmarks/ml/svm_benchmark_2.py

diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
new file mode 100644
index 00000000..6e8ec6c9
--- /dev/null
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -0,0 +1,371 @@
+import time
+import openml
+import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
+from sklearn.svm import SVC
+from sklearn.impute import SimpleImputer
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline, Pipeline
+from sklearn.metrics import accuracy_score, make_scorer
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+
+
+class SVMBenchmark(AbstractBenchmark):
+    _issue_tasks = [3917, 3945]
+
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            benchmark_type: str = "raw"
+    ):
+        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
+        self.rng = check_random_state(self.seed)
+        super(SVMBenchmark, self).__init__(rng=seed)
+
+        self.benchmark_type = benchmark_type
+        self.task_id = task_id
+        self.valid_size = valid_size
+        self.accuracy_scorer = make_scorer(accuracy_score)
+        #TODO: check the cache_size parameter from sklearn docs
+        self.cache_size = 200
+
+        # Data variables
+        self.train_X = None
+        self.valid_X = None
+        self.test_X = None
+        self.train_y = None
+        self.valid_y = None
+        self.test_y = None
+        self.train_idx = None
+        self.test_idx = None
+        self.task = None
+        self.dataset = None
+        self.preprocessor = None
+        self.lower_bound_train_size = None
+        self.load_data_from_openml()
+
+        # Observation and fidelity spaces
+        self.fidelity_choice = fidelity_choice
+        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
+        self.x_cs = self.get_configuration_space(self.seed)
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformFloatHyperparameter(
+                'C', lower=-10., upper=10., default_value=0., log=False
+            ),
+            CS.UniformFloatHyperparameter(
+                'gamma', lower=-10., upper=10., default_value=1., log=False
+            ),
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=None):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        For SVM, only a single fidelity exists, i.e., subsample fraction.
+        if fidelity_choice == 0
+            uses the entire data (subsample=1), reflecting the black-box setup
+        else
+            parameterizes the fraction of data to subsample
+
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+        if fidelity_choice == 0:
+            subsample = CS.Constant('subsample', value=1)
+        else:
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+            )
+        z_cs.add_hyperparameter(subsample)
+        return z_cs
+
+    def get_config(self, size=None):
+        """Samples configuration(s) from the (hyper) parameter space
+        """
+        if size is None:  # return only one config
+            return self.x_cs.sample_configuration()
+        return [self.x_cs.sample_configuration() for i in range(size)]
+
+    def get_fidelity(self, size=None):
+        """Samples candidate fidelities from the fidelity space
+        """
+        if size is None:  # return only one config
+            return self.z_cs.sample_configuration()
+        return [self.z_cs.sample_configuration() for i in range(size)]
+
+    def _convert_labels(self, labels):
+        """Converts boolean labels (if exists) to strings
+        """
+        label_types = list(map(lambda x: isinstance(x, bool), labels))
+        if np.all(label_types):
+            _labels = list(map(lambda x: str(x), labels))
+            if isinstance(labels, pd.Series):
+                labels = pd.Series(_labels, index=labels.index)
+            elif isinstance(labels, np.array):
+                labels = np.array(labels)
+        return labels
+
+    def load_data_from_openml(self, valid_size=None, verbose=False):
+        """Fetches data from OpenML and initializes the train-validation-test data splits
+
+        The validation set is fixed till this function is called again or explicitly altered
+        """
+        # fetches task
+        self.task = openml.tasks.get_task(self.task_id, download_data=False)
+        # fetches dataset
+        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
+        if verbose:
+            print(self.task, '\n')
+            print(self.dataset, '\n')
+
+        # loads full data
+        X, y, categorical_ind, feature_names = self.dataset.get_data(
+            target=self.task.target_name, dataset_format="dataframe"
+        )
+        categorical_ind = np.array(categorical_ind)
+        (cat_idx,) = np.where(categorical_ind)
+        (cont_idx,) = np.where(~categorical_ind)
+
+        # splitting dataset into train and test (10% test)
+        # train-test split is fixed for a task and its associated dataset
+        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
+        train_x = X.iloc[self.train_idx]
+        train_y = y.iloc[self.train_idx]
+        self.test_X = X.iloc[self.test_idx]
+        self.test_y = y.iloc[self.test_idx]
+
+        # splitting training into training and validation
+        # validation set is fixed till this function is called again or explicitly altered
+        valid_size = self.valid_size if valid_size is None else valid_size
+        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
+            train_x, train_y, test_size=valid_size,
+            shuffle=True, stratify=train_y, random_state=self.rng
+        )
+
+        # preprocessor to handle missing values, categorical columns encodings,
+        # and scaling numeric columns
+        self.preprocessor = make_pipeline(
+            ColumnTransformer([
+                (
+                    "cat",
+                    make_pipeline(SimpleImputer(strategy="most_frequent"),
+                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
+                    cat_idx.tolist(),
+                ),
+                (
+                    "cont",
+                    make_pipeline(SimpleImputer(strategy="median"),
+                                  StandardScaler()),
+                    cont_idx.tolist(),
+                )
+            ])
+        )
+        if verbose:
+            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
+
+        # preprocessor fit only on the training set
+        self.train_X = self.preprocessor.fit_transform(self.train_X)
+        # applying preprocessor built on the training set, across validation and test splits
+        self.valid_X = self.preprocessor.transform(self.valid_X)
+        self.test_X = self.preprocessor.transform(self.test_X)
+        # converting boolean labels to strings
+        self.train_y = self._convert_labels(self.train_y)
+        self.valid_y = self._convert_labels(self.valid_y)
+        self.test_y = self._convert_labels(self.test_y)
+
+        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
+        # use 10 times the number of classes as lower bound for the dataset fraction
+        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+
+        if verbose:
+            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
+
+        if verbose:
+            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
+            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
+            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
+            print("\nData loading complete!\n")
+        return
+
+    def shuffle_data_idx(self, train_id=None, ng=None):
+        rng = self.rng if rng is None else rng
+        train_idx = self.train_idx if train_idx is None else train_idx
+        rng.shuffle(train_idx)
+        return train_idx
+
+    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+        start = time.time()
+
+        # initializing model
+        rng = self.rng if rng is None else rng
+        config = config.get_dictionary()
+        for k, v in config.items():
+            config[k] = np.exp(float(v))
+        model = SVC(
+            **config,
+            random_state=rng,
+            cache_size=self.cache_size
+
+        )
+
+        # preparing data
+        if eval == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+            train_idx = self.train_idx
+        else:
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+            train_idx = np.arange(len(train_X))
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here
+        # application of the other fidelity to the dataset that the model interfaces
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                fidelity['subsample'] * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        model.fit(train_X[train_idx], train_y.iloc[train_idx])
+        # computing statistics on training data
+        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
+
+        model_fit_time = time.time() - start
+        return model, model_fit_time, train_loss
+
+    def objective(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    def objective_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng, eval="test"
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        return dict()
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        return dict()
+
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        pass

From af4f593c835764268dd7f07344ac5287eeb9d891 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 24 Jun 2021 14:38:56 +0200
Subject: [PATCH 15/95] Writing common ML benchmark class for tabular
 collection

---
 .../benchmarks/ml/ml_benchmark_template.py    | 347 ++++++++++++++++++
 hpobench/benchmarks/ml/rf_benchmark.py        | 300 +--------------
 hpobench/benchmarks/ml/svm_benchmark_2.py     | 315 +---------------
 3 files changed, 376 insertions(+), 586 deletions(-)
 create mode 100644 hpobench/benchmarks/ml/ml_benchmark_template.py

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
new file mode 100644
index 00000000..0891f0fe
--- /dev/null
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -0,0 +1,347 @@
+import time
+import openml
+import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, make_scorer
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+
+
+class Benchmark(AbstractBenchmark):
+    _issue_tasks = [3917, 3945]
+
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            benchmark_type: str = "raw"
+    ):
+        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
+        self.rng = check_random_state(self.seed)
+        super(Benchmark, self).__init__(rng=seed)
+
+        self.benchmark_type = benchmark_type
+        self.task_id = task_id
+        self.valid_size = valid_size
+        self.accuracy_scorer = make_scorer(accuracy_score)
+
+        # Data variables
+        self.train_X = None
+        self.valid_X = None
+        self.test_X = None
+        self.train_y = None
+        self.valid_y = None
+        self.test_y = None
+        self.train_idx = None
+        self.test_idx = None
+        self.task = None
+        self.dataset = None
+        self.preprocessor = None
+        self.lower_bound_train_size = None
+        self.load_data_from_openml()
+
+        # Observation and fidelity spaces
+        self.fidelity_choice = fidelity_choice
+        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
+        self.x_cs = self.get_configuration_space(self.seed)
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        raise NotImplementedError()
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        raise NotImplementedError()
+
+    def get_config(self, size=None):
+        """Samples configuration(s) from the (hyper) parameter space
+        """
+        if size is None:  # return only one config
+            return self.x_cs.sample_configuration()
+        return [self.x_cs.sample_configuration() for i in range(size)]
+
+    def get_fidelity(self, size=None):
+        """Samples candidate fidelities from the fidelity space
+        """
+        if size is None:  # return only one config
+            return self.z_cs.sample_configuration()
+        return [self.z_cs.sample_configuration() for i in range(size)]
+
+    def _convert_labels(self, labels):
+        """Converts boolean labels (if exists) to strings
+        """
+        label_types = list(map(lambda x: isinstance(x, bool), labels))
+        if np.all(label_types):
+            _labels = list(map(lambda x: str(x), labels))
+            if isinstance(labels, pd.Series):
+                labels = pd.Series(_labels, index=labels.index)
+            elif isinstance(labels, np.array):
+                labels = np.array(labels)
+        return labels
+
+    def load_data_from_openml(self, valid_size=None, verbose=False):
+        """Fetches data from OpenML and initializes the train-validation-test data splits
+
+        The validation set is fixed till this function is called again or explicitly altered
+        """
+        # fetches task
+        self.task = openml.tasks.get_task(self.task_id, download_data=False)
+        # fetches dataset
+        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
+        if verbose:
+            print(self.task, '\n')
+            print(self.dataset, '\n')
+
+        # loads full data
+        X, y, categorical_ind, feature_names = self.dataset.get_data(
+            target=self.task.target_name, dataset_format="dataframe"
+        )
+        categorical_ind = np.array(categorical_ind)
+        (cat_idx,) = np.where(categorical_ind)
+        (cont_idx,) = np.where(~categorical_ind)
+
+        # splitting dataset into train and test (10% test)
+        # train-test split is fixed for a task and its associated dataset
+        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
+        train_x = X.iloc[self.train_idx]
+        train_y = y.iloc[self.train_idx]
+        self.test_X = X.iloc[self.test_idx]
+        self.test_y = y.iloc[self.test_idx]
+
+        # splitting training into training and validation
+        # validation set is fixed till this function is called again or explicitly altered
+        valid_size = self.valid_size if valid_size is None else valid_size
+        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
+            train_x, train_y, test_size=valid_size,
+            shuffle=True, stratify=train_y, random_state=self.rng
+        )
+
+        # preprocessor to handle missing values, categorical columns encodings,
+        # and scaling numeric columns
+        self.preprocessor = make_pipeline(
+            ColumnTransformer([
+                (
+                    "cat",
+                    make_pipeline(SimpleImputer(strategy="most_frequent"),
+                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
+                    cat_idx.tolist(),
+                ),
+                (
+                    "cont",
+                    make_pipeline(SimpleImputer(strategy="median"),
+                                  StandardScaler()),
+                    cont_idx.tolist(),
+                )
+            ])
+        )
+        if verbose:
+            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
+
+        # preprocessor fit only on the training set
+        self.train_X = self.preprocessor.fit_transform(self.train_X)
+        # applying preprocessor built on the training set, across validation and test splits
+        self.valid_X = self.preprocessor.transform(self.valid_X)
+        self.test_X = self.preprocessor.transform(self.test_X)
+        # converting boolean labels to strings
+        self.train_y = self._convert_labels(self.train_y)
+        self.valid_y = self._convert_labels(self.valid_y)
+        self.test_y = self._convert_labels(self.test_y)
+
+        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
+        # use 10 times the number of classes as lower bound for the dataset fraction
+        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+
+        if verbose:
+            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
+
+        if verbose:
+            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
+            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
+            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
+            print("\nData loading complete!\n")
+        return
+
+    def shuffle_data_idx(self, train_id=None, ng=None):
+        rng = self.rng if rng is None else rng
+        train_idx = self.train_idx if train_idx is None else train_idx
+        rng.shuffle(train_idx)
+        return train_idx
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
+        """
+        raise NotImplementedError()
+
+    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+        start = time.time()
+
+        # initializing model
+        model = self.init_model(config, fidelity, rng)
+
+        # preparing data
+        if eval == "valid":
+            train_X = self.train_X
+            train_y = self.train_y
+            train_idx = self.train_idx
+        else:
+            train_X = np.vstack((self.train_X, self.valid_X))
+            train_y = pd.concat((self.train_y, self.valid_y))
+            train_idx = np.arange(len(train_X))
+
+        # shuffling data
+        if shuffle:
+            train_idx = self.shuffle_data_idx(train_idx, rng)
+            train_X = train_X.iloc[train_idx]
+            train_y = train_y.iloc[train_idx]
+
+        # subsample here:
+        # application of the other fidelity to the dataset that the model interfaces
+        train_idx = self.rng.choice(
+            np.arange(len(train_X)), size=int(
+                fidelity['subsample'] * len(train_X)
+            )
+        )
+        # fitting the model with subsampled data
+        model.fit(train_X[train_idx], train_y.iloc[train_idx])
+        # computing statistics on training data
+        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
+
+        model_fit_time = time.time() - start
+        return model, model_fit_time, train_loss
+
+    def objective(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    def objective_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        if self.benchmark_type == "raw":
+            model, model_fit_time, train_loss = self._raw_objective(
+                configuration, fidelity, shuffle, rng, eval="test"
+            )
+        else:
+            #TODO: add cases for `tabular` and `surrogate` benchmarks
+            pass
+
+        start = time.time()
+        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
+        eval_time = time.time() - start
+
+        info = {
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'cost': model_fit_time + eval_time,
+            'training_cost': model_fit_time,
+            'evaluation_cost': eval_time,
+            # storing as dictionary and not ConfigSpace saves tremendous memory
+            'fidelity': fidelity.get_dictionary(),
+            'config': configuration.get_dictionary()
+        }
+
+        return {
+            'function_value': info['val_loss'],
+            'cost': info['cost'],
+            'info': info
+        }
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the validation set
+        """
+        return dict()
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(
+            self,
+            configuration: Union[CS.Configuration, Dict],
+            fidelity: Union[CS.Configuration, Dict, None] = None,
+            shuffle: bool = False,
+            rng: Union[np.random.RandomState, int, None] = None,
+            **kwargs
+    ) -> Dict:
+        """Function that evaluates a 'config' on a 'fidelity' on the test set
+        """
+        return dict()
+
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        pass
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 35684c00..be08b938 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -17,11 +17,10 @@
 
 import hpobench.util.rng_helper as rng_helper
 from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
-class RandomForestBenchmark(AbstractBenchmark):
-    _issue_tasks = [3917, 3945]
-
+class RandomForestBenchmark(Benchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
@@ -30,34 +29,10 @@ def __init__(
             fidelity_choice: int = 1,
             benchmark_type: str = "raw"
     ):
-        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
-        self.rng = check_random_state(self.seed)
-        super(RandomForestBenchmark, self).__init__(rng=seed)
-
-        self.benchmark_type = benchmark_type
-        self.task_id = task_id
-        self.valid_size = valid_size
-        self.accuracy_scorer = make_scorer(accuracy_score)
-
-        # Data variables
-        self.train_X = None
-        self.valid_X = None
-        self.test_X = None
-        self.train_y = None
-        self.valid_y = None
-        self.test_y = None
-        self.train_idx = None
-        self.test_idx = None
-        self.task = None
-        self.dataset = None
-        self.preprocessor = None
-        self.lower_bound_train_size = None
-        self.load_data_from_openml()
-
-        # Observation and fidelity spaces
-        self.fidelity_choice = fidelity_choice
-        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
-        self.x_cs = self.get_configuration_space(self.seed)
+        super(RandomForestBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, benchmark_type
+        )
+        pass
 
     @staticmethod
     def get_configuration_space(seed=None):
@@ -123,269 +98,14 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs
 
-    def get_config(self, size=None):
-        """Samples configuration(s) from the (hyper) parameter space
-        """
-        if size is None:  # return only one config
-            return self.x_cs.sample_configuration()
-        return [self.x_cs.sample_configuration() for i in range(size)]
-
-    def get_fidelity(self, size=None):
-        """Samples candidate fidelities from the fidelity space
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
         """
-        if size is None:  # return only one config
-            return self.z_cs.sample_configuration()
-        return [self.z_cs.sample_configuration() for i in range(size)]
-
-    def _convert_labels(self, labels):
-        """Converts boolean labels (if exists) to strings
-        """
-        label_types = list(map(lambda x: isinstance(x, bool), labels))
-        if np.all(label_types):
-            _labels = list(map(lambda x: str(x), labels))
-            if isinstance(labels, pd.Series):
-                labels = pd.Series(_labels, index=labels.index)
-            elif isinstance(labels, np.array):
-                labels = np.array(labels)
-        return labels
-
-    def load_data_from_openml(self, valid_size=None, verbose=False):
-        """Fetches data from OpenML and initializes the train-validation-test data splits
-
-        The validation set is fixed till this function is called again or explicitly altered
-        """
-        # fetches task
-        self.task = openml.tasks.get_task(self.task_id, download_data=False)
-        # fetches dataset
-        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
-        if verbose:
-            print(self.task, '\n')
-            print(self.dataset, '\n')
-
-        # loads full data
-        X, y, categorical_ind, feature_names = self.dataset.get_data(
-            target=self.task.target_name, dataset_format="dataframe"
-        )
-        categorical_ind = np.array(categorical_ind)
-        (cat_idx,) = np.where(categorical_ind)
-        (cont_idx,) = np.where(~categorical_ind)
-
-        # splitting dataset into train and test (10% test)
-        # train-test split is fixed for a task and its associated dataset
-        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
-        train_x = X.iloc[self.train_idx]
-        train_y = y.iloc[self.train_idx]
-        self.test_X = X.iloc[self.test_idx]
-        self.test_y = y.iloc[self.test_idx]
-
-        # splitting training into training and validation
-        # validation set is fixed till this function is called again or explicitly altered
-        valid_size = self.valid_size if valid_size is None else valid_size
-        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
-            train_x, train_y, test_size=valid_size,
-            shuffle=True, stratify=train_y, random_state=self.rng
-        )
-
-        # preprocessor to handle missing values, categorical columns encodings,
-        # and scaling numeric columns
-        self.preprocessor = make_pipeline(
-            ColumnTransformer([
-                (
-                    "cat",
-                    make_pipeline(SimpleImputer(strategy="most_frequent"),
-                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
-                    cat_idx.tolist(),
-                ),
-                (
-                    "cont",
-                    make_pipeline(SimpleImputer(strategy="median"),
-                                  StandardScaler()),
-                    cont_idx.tolist(),
-                )
-            ])
-        )
-        if verbose:
-            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
-
-        # preprocessor fit only on the training set
-        self.train_X = self.preprocessor.fit_transform(self.train_X)
-        # applying preprocessor built on the training set, across validation and test splits
-        self.valid_X = self.preprocessor.transform(self.valid_X)
-        self.test_X = self.preprocessor.transform(self.test_X)
-        # converting boolean labels to strings
-        self.train_y = self._convert_labels(self.train_y)
-        self.valid_y = self._convert_labels(self.valid_y)
-        self.test_y = self._convert_labels(self.test_y)
-
-        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
-        # use 10 times the number of classes as lower bound for the dataset fraction
-        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
-
-        if verbose:
-            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
-
-        if verbose:
-            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
-            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
-            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
-            print("\nData loading complete!\n")
-        return
-
-    def shuffle_data_idx(self, train_id=None, ng=None):
         rng = self.rng if rng is None else rng
-        train_idx = self.train_idx if train_idx is None else train_idx
-        rng.shuffle(train_idx)
-        return train_idx
-
-    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
-        start = time.time()
-
-        # initializing model
         model = RandomForestClassifier(
             **config.get_dictionary(),
             n_estimators=fidelity['n_estimators'],  # a fidelity being used during initialization
             bootstrap=True,
-            random_state=self.rng
-        )
-
-        # preparing data
-        if eval == "valid":
-            train_X = self.train_X
-            train_y = self.train_y
-            train_idx = self.train_idx
-        else:
-            train_X = np.vstack((self.train_X, self.valid_X))
-            train_y = pd.concat((self.train_y, self.valid_y))
-            train_idx = np.arange(len(train_X))
-
-        # shuffling data
-        if shuffle:
-            train_idx = self.shuffle_data_idx(train_idx, rng)
-            train_X = train_X.iloc[train_idx]
-            train_y = train_y.iloc[train_idx]
-
-        # subsample here
-        # application of the other fidelity to the dataset that the model interfaces
-        train_idx = self.rng.choice(
-            np.arange(len(train_X)), size=int(
-                fidelity['subsample'] * len(train_X)
-            )
+            random_state=rng
         )
-        # fitting the model with subsampled data
-        model.fit(train_X[train_idx], train_y.iloc[train_idx])
-        # computing statistics on training data
-        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
-
-        model_fit_time = time.time() - start
-        return model, model_fit_time, train_loss
-
-    def objective(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
-                configuration, fidelity, shuffle, rng
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
-        eval_time = time.time() - start
-
-        info = {
-            'train_loss': train_loss,
-            'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
-        }
-
-        return {
-            'function_value': info['val_loss'],
-            'cost': info['cost'],
-            'info': info
-        }
-
-    def objective_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
-                configuration, fidelity, shuffle, rng, eval="test"
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
-        eval_time = time.time() - start
-
-        info = {
-            'train_loss': train_loss,
-            'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
-        }
-
-        return {
-            'function_value': info['val_loss'],
-            'cost': info['cost'],
-            'info': info
-        }
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        return dict()
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        return dict()
-
-    def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
-        pass
+        return model
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 6e8ec6c9..13076040 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -17,11 +17,10 @@
 
 import hpobench.util.rng_helper as rng_helper
 from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
-class SVMBenchmark(AbstractBenchmark):
-    _issue_tasks = [3917, 3945]
-
+class SVMBenchmark(Benchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
@@ -30,50 +29,33 @@ def __init__(
             fidelity_choice: int = 1,
             benchmark_type: str = "raw"
     ):
-        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
-        self.rng = check_random_state(self.seed)
-        super(SVMBenchmark, self).__init__(rng=seed)
-
-        self.benchmark_type = benchmark_type
-        self.task_id = task_id
-        self.valid_size = valid_size
-        self.accuracy_scorer = make_scorer(accuracy_score)
-        #TODO: check the cache_size parameter from sklearn docs
+        super(SVMBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, benchmark_type
+        )
         self.cache_size = 200
 
-        # Data variables
-        self.train_X = None
-        self.valid_X = None
-        self.test_X = None
-        self.train_y = None
-        self.valid_y = None
-        self.test_y = None
-        self.train_idx = None
-        self.test_idx = None
-        self.task = None
-        self.dataset = None
-        self.preprocessor = None
-        self.lower_bound_train_size = None
-        self.load_data_from_openml()
-
-        # Observation and fidelity spaces
-        self.fidelity_choice = fidelity_choice
-        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
-        self.x_cs = self.get_configuration_space(self.seed)
-
     @staticmethod
     def get_configuration_space(seed=None):
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
 
+        # cs.add_hyperparameters([
+        #     CS.UniformFloatHyperparameter(
+        #         'C', lower=-10., upper=10., default_value=0., log=False
+        #     ),
+        #     CS.UniformFloatHyperparameter(
+        #         'gamma', lower=-10., upper=10., default_value=1., log=False
+        #     ),
+        # ])
+        # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
         cs.add_hyperparameters([
             CS.UniformFloatHyperparameter(
-                'C', lower=-10., upper=10., default_value=0., log=False
+                "C", 0.03125, 32768, log=True, default_value=1.0
             ),
             CS.UniformFloatHyperparameter(
-                'gamma', lower=-10., upper=10., default_value=1., log=False
-            ),
+                "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
+            )
         ])
         return cs
 
@@ -99,273 +81,14 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
         z_cs.add_hyperparameter(subsample)
         return z_cs
 
-    def get_config(self, size=None):
-        """Samples configuration(s) from the (hyper) parameter space
-        """
-        if size is None:  # return only one config
-            return self.x_cs.sample_configuration()
-        return [self.x_cs.sample_configuration() for i in range(size)]
-
-    def get_fidelity(self, size=None):
-        """Samples candidate fidelities from the fidelity space
-        """
-        if size is None:  # return only one config
-            return self.z_cs.sample_configuration()
-        return [self.z_cs.sample_configuration() for i in range(size)]
-
-    def _convert_labels(self, labels):
-        """Converts boolean labels (if exists) to strings
-        """
-        label_types = list(map(lambda x: isinstance(x, bool), labels))
-        if np.all(label_types):
-            _labels = list(map(lambda x: str(x), labels))
-            if isinstance(labels, pd.Series):
-                labels = pd.Series(_labels, index=labels.index)
-            elif isinstance(labels, np.array):
-                labels = np.array(labels)
-        return labels
-
-    def load_data_from_openml(self, valid_size=None, verbose=False):
-        """Fetches data from OpenML and initializes the train-validation-test data splits
-
-        The validation set is fixed till this function is called again or explicitly altered
-        """
-        # fetches task
-        self.task = openml.tasks.get_task(self.task_id, download_data=False)
-        # fetches dataset
-        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
-        if verbose:
-            print(self.task, '\n')
-            print(self.dataset, '\n')
-
-        # loads full data
-        X, y, categorical_ind, feature_names = self.dataset.get_data(
-            target=self.task.target_name, dataset_format="dataframe"
-        )
-        categorical_ind = np.array(categorical_ind)
-        (cat_idx,) = np.where(categorical_ind)
-        (cont_idx,) = np.where(~categorical_ind)
-
-        # splitting dataset into train and test (10% test)
-        # train-test split is fixed for a task and its associated dataset
-        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
-        train_x = X.iloc[self.train_idx]
-        train_y = y.iloc[self.train_idx]
-        self.test_X = X.iloc[self.test_idx]
-        self.test_y = y.iloc[self.test_idx]
-
-        # splitting training into training and validation
-        # validation set is fixed till this function is called again or explicitly altered
-        valid_size = self.valid_size if valid_size is None else valid_size
-        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
-            train_x, train_y, test_size=valid_size,
-            shuffle=True, stratify=train_y, random_state=self.rng
-        )
-
-        # preprocessor to handle missing values, categorical columns encodings,
-        # and scaling numeric columns
-        self.preprocessor = make_pipeline(
-            ColumnTransformer([
-                (
-                    "cat",
-                    make_pipeline(SimpleImputer(strategy="most_frequent"),
-                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
-                    cat_idx.tolist(),
-                ),
-                (
-                    "cont",
-                    make_pipeline(SimpleImputer(strategy="median"),
-                                  StandardScaler()),
-                    cont_idx.tolist(),
-                )
-            ])
-        )
-        if verbose:
-            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
-
-        # preprocessor fit only on the training set
-        self.train_X = self.preprocessor.fit_transform(self.train_X)
-        # applying preprocessor built on the training set, across validation and test splits
-        self.valid_X = self.preprocessor.transform(self.valid_X)
-        self.test_X = self.preprocessor.transform(self.test_X)
-        # converting boolean labels to strings
-        self.train_y = self._convert_labels(self.train_y)
-        self.valid_y = self._convert_labels(self.valid_y)
-        self.test_y = self._convert_labels(self.test_y)
-
-        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
-        # use 10 times the number of classes as lower bound for the dataset fraction
-        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
-
-        if verbose:
-            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
-
-        if verbose:
-            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
-            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
-            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
-            print("\nData loading complete!\n")
-        return
-
-    def shuffle_data_idx(self, train_id=None, ng=None):
-        rng = self.rng if rng is None else rng
-        train_idx = self.train_idx if train_idx is None else train_idx
-        rng.shuffle(train_idx)
-        return train_idx
-
-    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
-        start = time.time()
-
+    def init_model(self, config, fidelity=None, rng=None):
         # initializing model
         rng = self.rng if rng is None else rng
         config = config.get_dictionary()
-        for k, v in config.items():
-            config[k] = np.exp(float(v))
         model = SVC(
             **config,
             random_state=rng,
             cache_size=self.cache_size
 
         )
-
-        # preparing data
-        if eval == "valid":
-            train_X = self.train_X
-            train_y = self.train_y
-            train_idx = self.train_idx
-        else:
-            train_X = np.vstack((self.train_X, self.valid_X))
-            train_y = pd.concat((self.train_y, self.valid_y))
-            train_idx = np.arange(len(train_X))
-
-        # shuffling data
-        if shuffle:
-            train_idx = self.shuffle_data_idx(train_idx, rng)
-            train_X = train_X.iloc[train_idx]
-            train_y = train_y.iloc[train_idx]
-
-        # subsample here
-        # application of the other fidelity to the dataset that the model interfaces
-        train_idx = self.rng.choice(
-            np.arange(len(train_X)), size=int(
-                fidelity['subsample'] * len(train_X)
-            )
-        )
-        # fitting the model with subsampled data
-        model.fit(train_X[train_idx], train_y.iloc[train_idx])
-        # computing statistics on training data
-        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
-
-        model_fit_time = time.time() - start
-        return model, model_fit_time, train_loss
-
-    def objective(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
-                configuration, fidelity, shuffle, rng
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
-        eval_time = time.time() - start
-
-        info = {
-            'train_loss': train_loss,
-            'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
-        }
-
-        return {
-            'function_value': info['val_loss'],
-            'cost': info['cost'],
-            'info': info
-        }
-
-    def objective_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
-                configuration, fidelity, shuffle, rng, eval="test"
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
-        eval_time = time.time() - start
-
-        info = {
-            'train_loss': train_loss,
-            'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
-        }
-
-        return {
-            'function_value': info['val_loss'],
-            'cost': info['cost'],
-            'info': info
-        }
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        return dict()
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        return dict()
-
-    def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
-        pass
+        return model

From df2462dd53a323cd88afca0e9a63862900b69f97 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 24 Jun 2021 18:19:33 +0200
Subject: [PATCH 16/95] Adding placeholder for HistGradientBoostedClassifier

---
 hpobench/benchmarks/ml/histgb_benchmark.py | 125 +++++++++++++++++++++
 hpobench/benchmarks/ml/rf_benchmark.py     |   5 +-
 2 files changed, 127 insertions(+), 3 deletions(-)
 create mode 100644 hpobench/benchmarks/ml/histgb_benchmark.py

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
new file mode 100644
index 00000000..11e7af4a
--- /dev/null
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -0,0 +1,125 @@
+import time
+import openml
+import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from copy import deepcopy
+from typing import Union, Dict
+
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, make_scorer
+
+# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingClassifier
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
+
+
+class HistGBBenchmark(Benchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            benchmark_type: str = "raw"
+    ):
+        super(HistGBBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, benchmark_type
+        )
+        pass
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformIntegerHyperparameter(
+                'max_depth', lower=1, upper=15, default_value=2, log=False
+            ),
+            CS.UniformIntegerHyperparameter(
+                'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'learning_rate', lower=1e-5, upper=1e-1, default_value=0.1, log=False
+            ),
+            #TODO: find best way to encode l2 reg. since log params cannot have 0 as exact bound
+            # scales the regularization parameter by using it as a power of 10
+            # such that the range of the parameter becomes {0, 1e-7, 1e-6, ..., 1e-1}
+            # where 10 ** 0 is enforced to be 0 (no regularization)
+            CS.UniformIntegerHyperparameter(
+                'l2_regularization', lower=-7, upper=0, default_value=0, log=False
+            )  # value of 1 indicates 0 regularization
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+        if fidelity_choice == 0:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 1:
+            # only n_estimators as fidelity
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 2:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+            )
+        else:
+            # both n_estimators and subsample as fidelities
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+            )
+        z_cs.add_hyperparameters([ntrees, subsample])
+        return z_cs
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
+        """
+        rng = self.rng if rng is None else rng
+        config = deepcopy(config).get_dictionary()
+        l2 = config.pop("l2_regularization")
+        l2 = 0 if l2 == 1 else 10 ** l2
+        # TODO: decide on encoding of learning rate
+        #TODO: allow non-encoded categoricals?
+        #TODO: early stopping set to False?
+        model = HistGradientBoostingClassifier(
+            **config,
+            l2_regularization=l2,
+            max_iter=fidelity['n_estimators'],  # a fidelity being used during initialization
+            early_stopping=False,
+            random_state=rng
+        )
+        return model
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index be08b938..960b8271 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -16,7 +16,6 @@
 from sklearn.metrics import accuracy_score, make_scorer
 
 import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
@@ -85,7 +84,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             # only subsample as fidelity
             ntrees = CS.Constant('n_estimators', value=100)
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
             )
         else:
             # both n_estimators and subsample as fidelities
@@ -93,7 +92,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
                 'n_estimators', lower=2, upper=100, default_value=10, log=False
             )
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
             )
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs

From 4d1d2d6a8e0a9de88dcf21af6bf07196eeadb69a Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 24 Jun 2021 18:21:34 +0200
Subject: [PATCH 17/95] Minor code cleaning

---
 hpobench/benchmarks/ml/svm_benchmark_2.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 13076040..ec174748 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -16,7 +16,6 @@
 from sklearn.metrics import accuracy_score, make_scorer
 
 import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 

From 299e59247715518734379d8085d6b03df441baad Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Sat, 26 Jun 2021 17:44:58 +0200
Subject: [PATCH 18/95] Reformatting output dict + option to add more metrics

---
 hpobench/benchmarks/ml/histgb_benchmark.py    |  3 +-
 .../benchmarks/ml/ml_benchmark_template.py    | 47 ++++++++++++++++---
 hpobench/benchmarks/ml/svm_benchmark_2.py     |  1 -
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 11e7af4a..769838ae 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -50,8 +50,9 @@ def get_configuration_space(seed=None):
             CS.UniformIntegerHyperparameter(
                 'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
             ),
+            #TODO: fix lr value range error in map_to_config()
             CS.UniformFloatHyperparameter(
-                'learning_rate', lower=1e-5, upper=1e-1, default_value=0.1, log=False
+                'learning_rate', lower=1e-5, upper=1e-1, default_value=0.1, log=True
             ),
             #TODO: find best way to encode l2 reg. since log params cannot have 0 as exact bound
             # scales the regularization parameter by using it as a power of 10
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 0891f0fe..2b95c097 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -12,12 +12,31 @@
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score, make_scorer
+from sklearn.metrics import make_scorer, roc_auc_score, accuracy_score, f1_score, \
+    top_k_accuracy_score, balanced_accuracy_score
 
 import hpobench.util.rng_helper as rng_helper
 from hpobench.abstract_benchmark import AbstractBenchmark
 
 
+metrics = dict(
+    #TODO: decide on metrics generalized for different datasets
+    acc=accuracy_score,
+    bal_acc=balanced_accuracy_score,
+    f1=f1_score,
+    # roc=roc_auc_score,
+    # topk=top_k_accuracy_score
+)
+metrics_kwargs = dict(
+    #TODO: decide on metric parameters
+    acc=dict(),
+    bal_acc=dict(),
+    f1=dict(average="weighted"),
+    # roc=dict(average="weighted"),
+    # topk=dict()
+)
+
+
 class Benchmark(AbstractBenchmark):
     _issue_tasks = [3917, 3945]
 
@@ -36,7 +55,10 @@ def __init__(
         self.benchmark_type = benchmark_type
         self.task_id = task_id
         self.valid_size = valid_size
-        self.accuracy_scorer = make_scorer(accuracy_score)
+        self.scorers = dict()
+        for k, v in metrics.items():
+            self.scorers[k] = make_scorer(v, **metrics_kwargs[k])
+        # self.scorers = make_scorer(accuracy_score)
 
         # Data variables
         self.train_X = None
@@ -231,7 +253,10 @@ def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
         # fitting the model with subsampled data
         model.fit(train_X[train_idx], train_y.iloc[train_idx])
         # computing statistics on training data
-        train_loss = 1 - self.accuracy_scorer(model, train_X, train_y)
+        scores = dict()
+        for k, v in self.scorers.items():
+            scores[k] = v(model, train_X, train_y)
+        train_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, train_X, train_y)
 
         model_fit_time = time.time() - start
         return model, model_fit_time, train_loss
@@ -255,7 +280,10 @@ def objective(
             pass
 
         start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.valid_X, self.valid_y)
+        scores = dict()
+        for k, v in self.scorers.items():
+            scores[k] = v(model, self.valid_X, self.valid_y)
+        val_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
         eval_time = time.time() - start
 
         info = {
@@ -264,6 +292,7 @@ def objective(
             'cost': model_fit_time + eval_time,
             'training_cost': model_fit_time,
             'evaluation_cost': eval_time,
+            'scores': scores,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -294,22 +323,26 @@ def objective_test(
             pass
 
         start = time.time()
-        val_loss = 1 - self.accuracy_scorer(model, self.test_X, self.test_y)
+        scores = dict()
+        for k, v in self.scorers.items():
+            scores[k] = v(model, self.test_X, self.test_y)
+        test_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.test_X, self.test_y)
         eval_time = time.time() - start
 
         info = {
             'train_loss': train_loss,
-            'val_loss': val_loss,
+            'test_loss': test_loss,
             'cost': model_fit_time + eval_time,
             'training_cost': model_fit_time,
             'evaluation_cost': eval_time,
+            'scores': scores,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
         }
 
         return {
-            'function_value': info['val_loss'],
+            'function_value': info['test_loss'],
             'cost': info['cost'],
             'info': info
         }
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index ec174748..62da5bbc 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -88,6 +88,5 @@ def init_model(self, config, fidelity=None, rng=None):
             **config,
             random_state=rng,
             cache_size=self.cache_size
-
         )
         return model

From c46321d4d08cdacdd3b5b9a3831154e3a0a6eaab Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 28 Jun 2021 15:46:40 +0200
Subject: [PATCH 19/95] Removing redundant import

---
 hpobench/benchmarks/ml/histgb_benchmark.py      | 1 -
 hpobench/benchmarks/ml/ml_benchmark_template.py | 1 -
 hpobench/benchmarks/ml/rf_benchmark.py          | 1 -
 hpobench/benchmarks/ml/svm_benchmark_2.py       | 1 -
 4 files changed, 4 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 769838ae..0a0461a3 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -19,7 +19,6 @@
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.ensemble import HistGradientBoostingClassifier
 
-import hpobench.util.rng_helper as rng_helper
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 2b95c097..55772ffc 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -15,7 +15,6 @@
 from sklearn.metrics import make_scorer, roc_auc_score, accuracy_score, f1_score, \
     top_k_accuracy_score, balanced_accuracy_score
 
-import hpobench.util.rng_helper as rng_helper
 from hpobench.abstract_benchmark import AbstractBenchmark
 
 
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 960b8271..96e3f48c 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -15,7 +15,6 @@
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, make_scorer
 
-import hpobench.util.rng_helper as rng_helper
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 62da5bbc..2747f380 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -15,7 +15,6 @@
 from sklearn.pipeline import make_pipeline, Pipeline
 from sklearn.metrics import accuracy_score, make_scorer
 
-import hpobench.util.rng_helper as rng_helper
 from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
 
 

From 17f663477beb30d5fbe76bd72d7e2ecdc169525a Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 30 Jun 2021 18:23:55 +0200
Subject: [PATCH 20/95] Decoupling storage of costs for each metric

---
 .../benchmarks/ml/ml_benchmark_template.py    | 53 ++++++++++---------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 55772ffc..7692e447 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -221,8 +221,6 @@ def init_model(self, config, fidelity=None, rng=None):
         raise NotImplementedError()
 
     def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
-        start = time.time()
-
         # initializing model
         model = self.init_model(config, fidelity, rng)
 
@@ -250,15 +248,18 @@ def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
             )
         )
         # fitting the model with subsampled data
+        start = time.time()
         model.fit(train_X[train_idx], train_y.iloc[train_idx])
+        model_fit_time = time.time() - start
         # computing statistics on training data
         scores = dict()
+        score_cost = dict()
         for k, v in self.scorers.items():
+            _start = time.time()
             scores[k] = v(model, train_X, train_y)
+            score_cost[k] = time.time() - _start
         train_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, train_X, train_y)
-
-        model_fit_time = time.time() - start
-        return model, model_fit_time, train_loss
+        return model, model_fit_time, train_loss, scores, score_cost
 
     def objective(
             self,
@@ -271,27 +272,29 @@ def objective(
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
         """
         if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
+            model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
                 configuration, fidelity, shuffle, rng
             )
         else:
             #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
+            pass + info['train_costs']['acc']
 
-        start = time.time()
         scores = dict()
+        score_cost = dict()
         for k, v in self.scorers.items():
+            _start = time.time()
             scores[k] = v(model, self.valid_X, self.valid_y)
+            score_cost[k] = time.time() - _start
         val_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
-        eval_time = time.time() - start
 
         info = {
             'train_loss': train_loss,
             'val_loss': val_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            'scores': scores,
+            'model_cost': model_fit_time,
+            'train_scores': train_scores,
+            'train_costs': train_score_cost,
+            'eval_scores': scores,
+            'eval_costs': score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -299,7 +302,7 @@ def objective(
 
         return {
             'function_value': info['val_loss'],
-            'cost': info['cost'],
+            'cost': model_fit_time + info['train_costs']['acc'] + info['eval_costs']['acc'],
             'info': info
         }
 
@@ -314,27 +317,29 @@ def objective_test(
         """Function that evaluates a 'config' on a 'fidelity' on the test set
         """
         if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss = self._raw_objective(
+            model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
                 configuration, fidelity, shuffle, rng, eval="test"
             )
         else:
             #TODO: add cases for `tabular` and `surrogate` benchmarks
             pass
 
-        start = time.time()
         scores = dict()
+        score_cost = dict()
         for k, v in self.scorers.items():
+            _start = time.time()
             scores[k] = v(model, self.test_X, self.test_y)
-        test_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.test_X, self.test_y)
-        eval_time = time.time() - start
+            score_cost[k] = time.time() - _start
+        test_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
 
         info = {
             'train_loss': train_loss,
-            'test_loss': test_loss,
-            'cost': model_fit_time + eval_time,
-            'training_cost': model_fit_time,
-            'evaluation_cost': eval_time,
-            'scores': scores,
+            'val_loss': test_loss,
+            'model_cost': model_fit_time,
+            'train_scores': train_scores,
+            'train_costs': train_score_cost,
+            'eval_scores': scores,
+            'eval_costs': score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -342,7 +347,7 @@ def objective_test(
 
         return {
             'function_value': info['test_loss'],
-            'cost': info['cost'],
+            'cost': model_fit_time + info['train_costs']['acc'] + info['eval_costs']['acc'],
             'info': info
         }
 

From 7de891f5c82b6b12973bca7df148766e50286783 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 1 Jul 2021 16:28:27 +0200
Subject: [PATCH 21/95] Including test scores in objective

---
 hpobench/benchmarks/ml/histgb_benchmark.py    |  7 +-
 .../benchmarks/ml/ml_benchmark_template.py    | 68 +++++++++----------
 hpobench/benchmarks/ml/rf_benchmark.py        |  7 +-
 hpobench/benchmarks/ml/svm_benchmark_2.py     | 16 +----
 4 files changed, 39 insertions(+), 59 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 0a0461a3..ac273c57 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -28,12 +28,9 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            benchmark_type: str = "raw"
+            fidelity_choice: int = 1
     ):
-        super(HistGBBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, benchmark_type
-        )
+        super(HistGBBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
         pass
 
     @staticmethod
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 7692e447..cc543b50 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -44,14 +44,12 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            benchmark_type: str = "raw"
+            fidelity_choice: int = 1
     ):
         self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
         self.rng = check_random_state(self.seed)
         super(Benchmark, self).__init__(rng=seed)
 
-        self.benchmark_type = benchmark_type
         self.task_id = task_id
         self.valid_size = valid_size
         self.scorers = dict()
@@ -258,7 +256,7 @@ def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
             _start = time.time()
             scores[k] = v(model, train_X, train_y)
             score_cost[k] = time.time() - _start
-        train_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, train_X, train_y)
+        train_loss = 1 - scores["acc"]
         return model, model_fit_time, train_loss, scores, score_cost
 
     def objective(
@@ -271,21 +269,24 @@ def objective(
     ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
         """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
-                configuration, fidelity, shuffle, rng
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass + info['train_costs']['acc']
+        model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
+            configuration, fidelity, shuffle, rng
+        )
+        val_scores = dict()
+        val_score_cost = dict()
+        for k, v in self.scorers.items():
+            _start = time.time()
+            val_scores[k] = v(model, self.valid_X, self.valid_y)
+            val_score_cost[k] = time.time() - _start
+        val_loss = 1 - val_scores["acc"]
 
-        scores = dict()
-        score_cost = dict()
+        test_scores = dict()
+        test_score_cost = dict()
         for k, v in self.scorers.items():
             _start = time.time()
-            scores[k] = v(model, self.valid_X, self.valid_y)
-            score_cost[k] = time.time() - _start
-        val_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
+            test_scores[k] = v(model, self.test_X, self.test_y)
+            test_score_cost[k] = time.time() - _start
+        val_loss = 1 - test_scores["acc"]
 
         info = {
             'train_loss': train_loss,
@@ -293,8 +294,10 @@ def objective(
             'model_cost': model_fit_time,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
-            'eval_scores': scores,
-            'eval_costs': score_cost,
+            'val_scores': val_scores,
+            'val_costs': val_score_cost,
+            'test_scores': test_scores,
+            'test_costs': test_score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -302,7 +305,7 @@ def objective(
 
         return {
             'function_value': info['val_loss'],
-            'cost': model_fit_time + info['train_costs']['acc'] + info['eval_costs']['acc'],
+            'cost': model_fit_time + info['train_costs']['acc'] + info['val_costs']['acc'],
             'info': info
         }
 
@@ -316,21 +319,16 @@ def objective_test(
     ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the test set
         """
-        if self.benchmark_type == "raw":
-            model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
-                configuration, fidelity, shuffle, rng, eval="test"
-            )
-        else:
-            #TODO: add cases for `tabular` and `surrogate` benchmarks
-            pass
-
-        scores = dict()
-        score_cost = dict()
+        model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
+            configuration, fidelity, shuffle, rng, eval="test"
+        )
+        test_scores = dict()
+        test_score_cost = dict()
         for k, v in self.scorers.items():
             _start = time.time()
-            scores[k] = v(model, self.test_X, self.test_y)
-            score_cost[k] = time.time() - _start
-        test_loss = 1 - scores["acc"]  # self.accuracy_scorer(model, self.valid_X, self.valid_y)
+            test_scores[k] = v(model, self.test_X, self.test_y)
+            test_score_cost[k] = time.time() - _start
+        test_loss = 1 - test_scores["acc"]
 
         info = {
             'train_loss': train_loss,
@@ -338,8 +336,8 @@ def objective_test(
             'model_cost': model_fit_time,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
-            'eval_scores': scores,
-            'eval_costs': score_cost,
+            'test_scores': test_scores,
+            'test_costs': test_score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
             'fidelity': fidelity.get_dictionary(),
             'config': configuration.get_dictionary()
@@ -347,7 +345,7 @@ def objective_test(
 
         return {
             'function_value': info['test_loss'],
-            'cost': model_fit_time + info['train_costs']['acc'] + info['eval_costs']['acc'],
+            'cost': model_fit_time + info['train_costs']['acc'] + info['test_costs']['acc'],
             'info': info
         }
 
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 96e3f48c..7426a37a 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -24,12 +24,9 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            benchmark_type: str = "raw"
+            fidelity_choice: int = 1
     ):
-        super(RandomForestBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, benchmark_type
-        )
+        super(RandomForestBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
         pass
 
     @staticmethod
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 2747f380..12d22afa 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -24,12 +24,9 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            benchmark_type: str = "raw"
+            fidelity_choice: int = 1
     ):
-        super(SVMBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, benchmark_type
-        )
+        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
         self.cache_size = 200
 
     @staticmethod
@@ -37,15 +34,6 @@ def get_configuration_space(seed=None):
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
-
-        # cs.add_hyperparameters([
-        #     CS.UniformFloatHyperparameter(
-        #         'C', lower=-10., upper=10., default_value=0., log=False
-        #     ),
-        #     CS.UniformFloatHyperparameter(
-        #         'gamma', lower=-10., upper=10., default_value=1., log=False
-        #     ),
-        # ])
         # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
         cs.add_hyperparameters([
             CS.UniformFloatHyperparameter(

From ec316c3a8ffc89c3224192ea66ed938c74d2ec53 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 1 Jul 2021 16:45:43 +0200
Subject: [PATCH 22/95] Documenting the structure of information in each fn
 eval.

---
 hpobench/benchmarks/ml/README.md | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 hpobench/benchmarks/ml/README.md

diff --git a/hpobench/benchmarks/ml/README.md b/hpobench/benchmarks/ml/README.md
new file mode 100644
index 00000000..46ad4e08
--- /dev/null
+++ b/hpobench/benchmarks/ml/README.md
@@ -0,0 +1,29 @@
+Each function evalution returns a dictionary with the following information:
+
+```
+└───function_value: 1 - accuracy (acc.) on validation set
+└───cost: time to fit model + time to evaluate acc. training set + time to evaluate acc. validation set
+└───info: dictionary (dict) with miscellaneous information
+|   └───train_loss: 1 - accuracy (acc.) on training set
+|   └───val_loss: 1 - accuracy (acc.) on validation set
+|   └───model_cost: time taken to fit the model
+|   └───train_scores: performance on all metrics over the training set (dict)
+|   |   └───f1: F1-score   
+|   |   └───acc: Accuracy
+|   |   └───bal_acc: Balanced accuracy
+|   └───train_costs: time taken to compute performance on all metrics over the training set (dict)
+|   |   └───f1: F1-score   
+|   |   └───acc: Accuracy
+|   |   └───bal_acc: Balanced accuracy 
+|   └───valid_scores: performance on all metrics over the validation set (dict)
+|   |   └───...
+|   └───valid_costs: time taken to compute performance on all metrics over the validation set (dict)
+|   |   └───...
+|   └───test_scores: performance on all metrics over the test set
+|   |   └───...
+|   └───test_costs: time taken to compute performance on all metrics over the test set (dict)
+|   |   └───...
+```
+
+*NOTE*: the keys `function_value`, `cost`, `info` need to exist when creating a new objective 
+function, while `info` can house any kind of auxilliary information required.
\ No newline at end of file

From e7f69b9e87731952d507d9db28cef42957be5a5b Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Fri, 2 Jul 2021 17:04:26 +0200
Subject: [PATCH 23/95] Some decisions on lower bound for subsample fidelity

---
 hpobench/benchmarks/ml/ml_benchmark_template.py |  5 +++--
 hpobench/benchmarks/ml/svm_benchmark_2.py       | 10 ++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index cc543b50..3ad61b54 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -194,8 +194,9 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
 
         # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
         # use 10 times the number of classes as lower bound for the dataset fraction
-        n_classes = len(self.task.class_labels)   # np.unique(self.train_y).shape[0]
+        n_classes = len(self.task.class_labels)
         self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+        self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
 
         if verbose:
             print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
@@ -332,7 +333,7 @@ def objective_test(
 
         info = {
             'train_loss': train_loss,
-            'val_loss': test_loss,
+            'test_loss': test_loss,
             'model_cost': model_fit_time,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
index 12d22afa..845f40e0 100644
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ b/hpobench/benchmarks/ml/svm_benchmark_2.py
@@ -45,8 +45,8 @@ def get_configuration_space(seed=None):
         ])
         return cs
 
-    @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=None):
+    @classmethod
+    def get_fidelity_space(cls, seed=None, fidelity_choice=None):
         """Fidelity space available --- specifies the fidelity dimensions
 
         For SVM, only a single fidelity exists, i.e., subsample fraction.
@@ -57,12 +57,14 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
 
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+
         if fidelity_choice == 0:
             subsample = CS.Constant('subsample', value=1)
         else:
+            # TODO: dynamically adapt based on 1/512 and lower_bound_train_size and set log=True
+            lower = 0.1
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=0.33, log=False
+                'subsample', lower=lower, upper=1, default_value=0.33, log=False
             )
         z_cs.add_hyperparameter(subsample)
         return z_cs

From edb3e7fedd5010bab9a65ba2e5b21e708cf8c4e3 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Tue, 6 Jul 2021 20:20:27 +0200
Subject: [PATCH 24/95] AbstractBenchmark update for fidelity option +
 including XGBoost

---
 README.md                                     |   7 +-
 examples/local/xgboost_local.py               |   2 +-
 hpobench/abstract_benchmark.py                |   7 +-
 hpobench/benchmarks/ml/histgb_benchmark.py    |   4 +-
 .../benchmarks/ml/ml_benchmark_template.py    |  82 +--
 hpobench/benchmarks/ml/rf_benchmark.py        |   4 +-
 hpobench/benchmarks/ml/svm_benchmark.py       | 395 +++-----------
 hpobench/benchmarks/ml/svm_benchmark_2.py     |  81 ---
 hpobench/benchmarks/ml/svm_benchmark_old.py   | 350 ++++++++++++
 hpobench/benchmarks/ml/xgboost_benchmark.py   | 515 ++++--------------
 .../benchmarks/ml/xgboost_benchmark_old.py    | 426 +++++++++++++++
 tests/test_utils.py                           |   2 +-
 tests/test_whitebox.py                        |   2 +-
 13 files changed, 1004 insertions(+), 873 deletions(-)
 delete mode 100644 hpobench/benchmarks/ml/svm_benchmark_2.py
 create mode 100644 hpobench/benchmarks/ml/svm_benchmark_old.py
 create mode 100644 hpobench/benchmarks/ml/xgboost_benchmark_old.py

diff --git a/README.md b/README.md
index ff34f75a..a015792b 100644
--- a/README.md
+++ b/README.md
@@ -35,11 +35,14 @@ Further requirements are: [ConfigSpace](https://github.com/automl/ConfigSpace),
  This can be arbitrarily complex and further information can be found in the docstring of the benchmark.
  
 A simple example is the XGBoost benchmark which can be installed with `pip install .[xgboost]`
+
 ```python
-from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark
+from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark
+
 b = XGBoostBenchmark(task_id=167149)
 config = b.get_configuration_space(seed=1).sample_configuration()
-result_dict = b.objective_function(configuration=config, fidelity={"n_estimators": 128, "dataset_fraction": 0.5}, rng=1)
+result_dict = b.objective_function(configuration=config,
+                                   fidelity={"n_estimators": 128, "dataset_fraction": 0.5}, rng=1)
 
 ```
 
diff --git a/examples/local/xgboost_local.py b/examples/local/xgboost_local.py
index 47c1f77f..4f3b3ad3 100644
--- a/examples/local/xgboost_local.py
+++ b/examples/local/xgboost_local.py
@@ -10,7 +10,7 @@
 import argparse
 from time import time
 
-from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
 from hpobench.util.openml_data_manager import get_openmlcc18_taskids
 
 
diff --git a/hpobench/abstract_benchmark.py b/hpobench/abstract_benchmark.py
index 5d7bc994..abbbcb22 100644
--- a/hpobench/abstract_benchmark.py
+++ b/hpobench/abstract_benchmark.py
@@ -226,12 +226,17 @@ def get_configuration_space(seed: Union[int, None] = None) -> ConfigSpace.Config
 
     @staticmethod
     @abc.abstractmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+    def get_fidelity_space(
+            seed: Union[int, None] = None, fidelity_choice: Union[int, None] = None
+    ) -> ConfigSpace.ConfigurationSpace:
         """ Defines the available fidelity parameters as a "fidelity space" for each benchmark.
         Parameters
         ----------
         seed: int, None
             Seed for the fidelity space.
+        fidelity_choice: int, None
+            integer value to choose the type of fidelity space
+
         Returns
         -------
         ConfigSpace.ConfigurationSpace
diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index ac273c57..21ed4ec0 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -19,10 +19,10 @@
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.ensemble import HistGradientBoostingClassifier
 
-from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
 
-class HistGBBenchmark(Benchmark):
+class HistGBBenchmark(MLBenchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 3ad61b54..e0ab59bc 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -36,7 +36,7 @@
 )
 
 
-class Benchmark(AbstractBenchmark):
+class MLBenchmark(AbstractBenchmark):
     _issue_tasks = [3917, 3945]
 
     def __init__(
@@ -48,7 +48,7 @@ def __init__(
     ):
         self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
         self.rng = check_random_state(self.seed)
-        super(Benchmark, self).__init__(rng=seed)
+        super(MLBenchmark, self).__init__(rng=seed)
 
         self.task_id = task_id
         self.valid_size = valid_size
@@ -84,7 +84,7 @@ def get_configuration_space(seed=None):
         raise NotImplementedError()
 
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=1):
+    def get_fidelity_space(seed=None, fidelity_choice=None):
         """Fidelity space available --- specifies the fidelity dimensions
 
         If fidelity_choice is 0
@@ -194,8 +194,8 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
 
         # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
         # use 10 times the number of classes as lower bound for the dataset fraction
-        n_classes = len(self.task.class_labels)
-        self.lower_bound_train_size = (10 * n_classes) / self.train_X.shape[0]
+        self.n_classes = len(self.task.class_labels)
+        self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
         self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
 
         if verbose:
@@ -219,7 +219,7 @@ def init_model(self, config, fidelity=None, rng=None):
         """
         raise NotImplementedError()
 
-    def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+    def _train_objective(self, config, fidelity, shuffle, rng, eval="valid"):
         # initializing model
         model = self.init_model(config, fidelity, rng)
 
@@ -260,7 +260,7 @@ def _raw_objective(self, config, fidelity, shuffle, rng, eval="valid"):
         train_loss = 1 - scores["acc"]
         return model, model_fit_time, train_loss, scores, score_cost
 
-    def objective(
+    def objective_function(
             self,
             configuration: Union[CS.Configuration, Dict],
             fidelity: Union[CS.Configuration, Dict, None] = None,
@@ -270,7 +270,7 @@ def objective(
     ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
         """
-        model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
+        model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
             configuration, fidelity, shuffle, rng
         )
         val_scores = dict()
@@ -310,7 +310,7 @@ def objective(
             'info': info
         }
 
-    def objective_test(
+    def objective_function_test(
             self,
             configuration: Union[CS.Configuration, Dict],
             fidelity: Union[CS.Configuration, Dict, None] = None,
@@ -320,7 +320,7 @@ def objective_test(
     ) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the test set
         """
-        model, model_fit_time, train_loss, train_scores, train_score_cost = self._raw_objective(
+        model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
             configuration, fidelity, shuffle, rng, eval="test"
         )
         test_scores = dict()
@@ -350,34 +350,40 @@ def objective_test(
             'info': info
         }
 
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the validation set
-        """
-        return dict()
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
-        """Function that evaluates a 'config' on a 'fidelity' on the test set
-        """
-        return dict()
+    # # pylint: disable=arguments-differ
+    # @AbstractBenchmark.check_parameters
+    # def objective_function(
+    #         self,
+    #         configuration: Union[CS.Configuration, Dict],
+    #         fidelity: Union[CS.Configuration, Dict, None] = None,
+    #         shuffle: bool = False,
+    #         rng: Union[np.random.RandomState, int, None] = None,
+    #         **kwargs
+    # ) -> Dict:
+    #     """Function that evaluates a 'config' on a 'fidelity' on the validation set
+    #     """
+    #     return dict()
+    #
+    # # pylint: disable=arguments-differ
+    # @AbstractBenchmark.check_parameters
+    # def objective_function_test(
+    #         self,
+    #         configuration: Union[CS.Configuration, Dict],
+    #         fidelity: Union[CS.Configuration, Dict, None] = None,
+    #         shuffle: bool = False,
+    #         rng: Union[np.random.RandomState, int, None] = None,
+    #         **kwargs
+    # ) -> Dict:
+    #     """Function that evaluates a 'config' on a 'fidelity' on the test set
+    #     """
+    #     return dict()
 
     def get_meta_information(self):
         """ Returns the meta information for the benchmark """
-        pass
+        return {'name': 'Support Vector Machine',
+                'shape of train data': self.x_train.shape,
+                'shape of test data': self.x_test.shape,
+                'shape of valid data': self.x_valid.shape,
+                'initial random seed': self.rng,
+                'task_id': self.task_id
+                }
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 7426a37a..b815e1bd 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -15,10 +15,10 @@
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, make_scorer
 
-from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
 
-class RandomForestBenchmark(Benchmark):
+class RandomForestBenchmark(MLBenchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index 9aad5e44..1d0e2d00 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -1,354 +1,81 @@
-"""
-
-Changelog:
-==========
-0.0.3
-* New container release due to a general change in the communication between container and HPOBench.
-  Works with HPOBench >= v0.0.8
-
-0.0.2:
-* Standardize the structure of the meta information
-
-0.0.1:
-* First implementation
-
-"""
-
-import logging
 import time
-from typing import Union, Tuple, Dict, List
-
-import ConfigSpace as CS
+import openml
 import numpy as np
-from scipy import sparse
-from sklearn import pipeline
-from sklearn import svm
-from sklearn.compose import ColumnTransformer
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
+from sklearn.svm import SVC
 from sklearn.impute import SimpleImputer
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline, Pipeline
 from sklearn.metrics import accuracy_score, make_scorer
-from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
-
-import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
-
-__version__ = '0.0.3'
-
-logger = logging.getLogger('SVMBenchmark')
-
-
-class SupportVectorMachine(AbstractBenchmark):
-    """
-    Hyperparameter optimization task to optimize the regularization
-    parameter C and the kernel parameter gamma of a support vector machine.
-    Both hyperparameters are optimized on a log scale in [-10, 10].
-    The X_test data set is only used for a final offline evaluation of
-    a configuration. For that the validation and training data is
-    concatenated to form the whole training data set.
-    """
-
-    def __init__(self, task_id: Union[int, None] = None,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        """
-        Parameters
-        ----------
-        task_id : int, None
-        rng : np.random.RandomState, int, None
-        """
-        super(SupportVectorMachine, self).__init__(rng=rng)
 
-        self.task_id = task_id
-        self.cache_size = 200  # Cache for the SVC in MB
-        self.accuracy_scorer = make_scorer(accuracy_score)
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
-        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
-            self.get_data()
-        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
-
-        # Sort data (Categorical + numerical) so that categorical and continous are not mixed.
-        categorical_idx = np.argwhere(self.categorical_data)
-        continuous_idx = np.argwhere(~self.categorical_data)
-        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
-        self.categorical_data = self.categorical_data[sorting]
-        self.x_train = self.x_train[:, sorting]
-        self.x_valid = self.x_valid[:, sorting]
-        self.x_test = self.x_test[:, sorting]
-
-        nan_columns = np.all(np.isnan(self.x_train), axis=0)
-        self.categorical_data = self.categorical_data[~nan_columns]
-        self.x_train, self.x_valid, self.x_test, self.categories = \
-            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
-                                                                 is_categorical=self.categorical_data)
-
-        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
-                                         size=len(self.x_train),
-                                         replace=False)
-
-        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
-        # (https://arxiv.org/pdf/1605.07079.pdf),
-        # use 10 time the number of classes as lower bound for the dataset fraction
-        n_classes = np.unique(self.y_train).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
-
-    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
-        """ Loads the data given a task or another source. """
-
-        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
-                                                             'overwrite the get_data method.')
-
-        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
-        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
-
-        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
-
-    def shuffle_data(self, rng=None):
-        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
-        class-random-state"""
-        random_state = rng_helper.get_rng(rng, self.rng)
-        random_state.shuffle(self.train_idx)
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(self, configuration: Union[CS.Configuration, Dict],
-                           fidelity: Union[CS.Configuration, Dict, None] = None,
-                           shuffle: bool = False,
-                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a SVM model given a hyperparameter configuration and
-        evaluates the model on the validation set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the SVM model
-        fidelity: Dict, None
-            Fidelity parameters for the SVM model, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : validation loss
-            cost : time to train and evaluate the model
-            info : Dict
-                train_loss : training loss
-                fidelity : used fidelities in this evaluation
-        """
-        start_time = time.time()
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        # Split of dataset subset
-        if self.lower_bound_train_size > fidelity['dataset_fraction']:
-            train_size = self.lower_bound_train_size
-            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
-                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
-                           f'{self.lower_bound_train_size:.8f}')
-        else:
-            train_size = fidelity['dataset_fraction']
 
-        train_size = int(train_size * len(self.train_idx))
-        train_idx = self.train_idx[:train_size]
-
-        # Transform hyperparameters to linear scale
-        hp_c = np.exp(float(configuration['C']))
-        hp_gamma = np.exp(float(configuration['gamma']))
-
-        # Train support vector machine
-        model = self.get_pipeline(hp_c, hp_gamma)
-        model.fit(self.x_train[train_idx], self.y_train[train_idx])
-
-        # Compute validation error
-        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
-        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
-
-        cost = time.time() - start_time
-
-        return {'function_value': float(val_loss),
-                "cost": cost,
-                'info': {'train_loss': float(train_loss),
-                         'fidelity': fidelity}}
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
-                                fidelity: Union[CS.Configuration, Dict, None] = None,
-                                shuffle: bool = False,
-                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a SVM model with a given configuration on both the X_train
-        and validation data set and evaluates the model on the X_test data set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the SVM Model
-        fidelity: Dict, None
-            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : X_test loss
-            cost : time to X_train and evaluate the model
-            info : Dict
-                train_valid_loss: Loss on the train+valid data set
-                fidelity : used fidelities in this evaluation
-        """
-        assert np.isclose(fidelity['dataset_fraction'], 1), \
-            f'Data set fraction must be 1 but was {fidelity["dataset_fraction"]}'
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start_time = time.time()
-
-        # Concatenate training and validation dataset
-        if isinstance(self.x_train, sparse.csr.csr_matrix) or isinstance(self.x_valid, sparse.csr.csr_matrix):
-            data = sparse.vstack((self.x_train, self.x_valid))
-        else:
-            data = np.concatenate((self.x_train, self.x_valid))
-        targets = np.concatenate((self.y_train, self.y_valid))
-
-        # Transform hyperparameters to linear scale
-        hp_c = np.exp(float(configuration['C']))
-        hp_gamma = np.exp(float(configuration['gamma']))
-
-        model = self.get_pipeline(hp_c, hp_gamma)
-        model.fit(data, targets)
-
-        # Compute validation error
-        train_valid_loss = 1 - self.accuracy_scorer(model, data, targets)
-
-        # Compute test error
-        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
-
-        cost = time.time() - start_time
-
-        return {'function_value': float(test_loss),
-                "cost": cost,
-                'info': {'train_valid_loss': float(train_valid_loss),
-                         'fidelity': fidelity}}
-
-    def get_pipeline(self, C: float, gamma: float) -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-
-        model = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", MinMaxScaler(feature_range=(0, 1)), ~self.categorical_data)])),
-            ('svm',
-             svm.SVC(gamma=gamma, C=C, random_state=self.rng, cache_size=self.cache_size))
-        ])
-        return model
+class SVMBenchmark(MLBenchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1
+    ):
+        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
+        self.cache_size = 200
 
     @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """
-        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
-        the SVM Model
-
-        For a detailed explanation of the hyperparameters:
-        https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
         """
-
-        seed = seed if seed is not None else np.random.randint(1, 100000)
         cs = CS.ConfigurationSpace(seed=seed)
-
+        # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
         cs.add_hyperparameters([
-            CS.UniformFloatHyperparameter('C', lower=-10., upper=10., default_value=0., log=False),
-            CS.UniformFloatHyperparameter('gamma', lower=-10., upper=10., default_value=1., log=False),
+            CS.UniformFloatHyperparameter(
+                "C", 0.03125, 32768, log=True, default_value=1.0
+            ),
+            CS.UniformFloatHyperparameter(
+                "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
+            )
         ])
-        # cs.generate_all_continuous_from_bounds(SupportVectorMachine.get_meta_information()['bounds'])
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """
-        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
-        the SupportVector Benchmark
-
-        Fidelities
-        ----------
-        dataset_fraction: float - [0.1, 1]
-            fraction of training data set to use
+    def get_fidelity_space(seed=None, fidelity_choice=None):
+        """Fidelity space available --- specifies the fidelity dimensions
 
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
+        For SVM, only a single fidelity exists, i.e., subsample fraction.
+        if fidelity_choice == 0
+            uses the entire data (subsample=1), reflecting the black-box setup
+        else
+            parameterizes the fraction of data to subsample
 
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
         """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
-        fidel_space = CS.ConfigurationSpace(seed=seed)
+        z_cs = CS.ConfigurationSpace(seed=seed)
 
-        fidel_space.add_hyperparameters([
-            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
-        ])
-        return fidel_space
-
-    def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
-        return {'name': 'Support Vector Machine',
-                'references': ["@InProceedings{pmlr-v54-klein17a",
-                               "author = {Aaron Klein and Stefan Falkner and Simon Bartels and Philipp Hennig and "
-                               "Frank Hutter}, "
-                               "title = {{Fast Bayesian Optimization of Machine Learning Hyperparameters on "
-                               "Large Datasets}}"
-                               "pages = {528--536}, year = {2017},"
-                               "editor = {Aarti Singh and Jerry Zhu},"
-                               "volume = {54},"
-                               "series = {Proceedings of Machine Learning Research},"
-                               "address = {Fort Lauderdale, FL, USA},"
-                               "month = {20--22 Apr},"
-                               "publisher = {PMLR},"
-                               "pdf = {http://proceedings.mlr.press/v54/klein17a/klein17a.pdf}, "
-                               "url = {http://proceedings.mlr.press/v54/klein17a.html}, "
-                               ],
-                'code': 'https://github.com/automl/HPOlib1.5/blob/container/hpolib/benchmarks/ml/svm_benchmark.py',
-                'shape of train data': self.x_train.shape,
-                'shape of test data': self.x_test.shape,
-                'shape of valid data': self.x_valid.shape,
-                'initial random seed': self.rng,
-                'task_id': self.task_id
-                }
+        if fidelity_choice == 0:
+            subsample = CS.Constant('subsample', value=1)
+        else:
+            # TODO: dynamically adapt based on 1/512 and lower_bound_train_size and set log=True
+            lower = 0.1
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=lower, upper=1, default_value=0.33, log=False
+            )
+        z_cs.add_hyperparameter(subsample)
+        return z_cs
+
+    def init_model(self, config, fidelity=None, rng=None):
+        # initializing model
+        rng = self.rng if rng is None else rng
+        config = config.get_dictionary()
+        model = SVC(
+            **config,
+            random_state=rng,
+            cache_size=self.cache_size
+        )
+        return model
diff --git a/hpobench/benchmarks/ml/svm_benchmark_2.py b/hpobench/benchmarks/ml/svm_benchmark_2.py
deleted file mode 100644
index 845f40e0..00000000
--- a/hpobench/benchmarks/ml/svm_benchmark_2.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import time
-import openml
-import numpy as np
-import pandas as pd
-import ConfigSpace as CS
-from typing import Union, Dict
-
-from sklearn.svm import SVC
-from sklearn.impute import SimpleImputer
-from sklearn.utils import check_random_state
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import train_test_split
-from sklearn.pipeline import make_pipeline, Pipeline
-from sklearn.metrics import accuracy_score, make_scorer
-
-from hpobench.benchmarks.ml.ml_benchmark_template import Benchmark
-
-
-class SVMBenchmark(Benchmark):
-    def __init__(
-            self,
-            task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
-            valid_size: float = 0.33,
-            fidelity_choice: int = 1
-    ):
-        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
-        self.cache_size = 200
-
-    @staticmethod
-    def get_configuration_space(seed=None):
-        """Parameter space to be optimized --- contains the hyperparameters
-        """
-        cs = CS.ConfigurationSpace(seed=seed)
-        # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
-        cs.add_hyperparameters([
-            CS.UniformFloatHyperparameter(
-                "C", 0.03125, 32768, log=True, default_value=1.0
-            ),
-            CS.UniformFloatHyperparameter(
-                "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
-            )
-        ])
-        return cs
-
-    @classmethod
-    def get_fidelity_space(cls, seed=None, fidelity_choice=None):
-        """Fidelity space available --- specifies the fidelity dimensions
-
-        For SVM, only a single fidelity exists, i.e., subsample fraction.
-        if fidelity_choice == 0
-            uses the entire data (subsample=1), reflecting the black-box setup
-        else
-            parameterizes the fraction of data to subsample
-
-        """
-        z_cs = CS.ConfigurationSpace(seed=seed)
-
-        if fidelity_choice == 0:
-            subsample = CS.Constant('subsample', value=1)
-        else:
-            # TODO: dynamically adapt based on 1/512 and lower_bound_train_size and set log=True
-            lower = 0.1
-            subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=lower, upper=1, default_value=0.33, log=False
-            )
-        z_cs.add_hyperparameter(subsample)
-        return z_cs
-
-    def init_model(self, config, fidelity=None, rng=None):
-        # initializing model
-        rng = self.rng if rng is None else rng
-        config = config.get_dictionary()
-        model = SVC(
-            **config,
-            random_state=rng,
-            cache_size=self.cache_size
-        )
-        return model
diff --git a/hpobench/benchmarks/ml/svm_benchmark_old.py b/hpobench/benchmarks/ml/svm_benchmark_old.py
new file mode 100644
index 00000000..0a765e45
--- /dev/null
+++ b/hpobench/benchmarks/ml/svm_benchmark_old.py
@@ -0,0 +1,350 @@
+"""
+
+Changelog:
+==========
+0.0.2:
+* Standardize the structure of the meta information
+
+0.0.1:
+* First implementation
+
+"""
+
+import logging
+import time
+from typing import Union, Tuple, Dict, List
+
+import ConfigSpace as CS
+import numpy as np
+from scipy import sparse
+from sklearn import pipeline
+from sklearn import svm
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.metrics import accuracy_score, make_scorer
+from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
+
+__version__ = '0.0.2'
+
+logger = logging.getLogger('SVMBenchmark')
+
+
+class SupportVectorMachine(AbstractBenchmark):
+    """
+    Hyperparameter optimization task to optimize the regularization
+    parameter C and the kernel parameter gamma of a support vector machine.
+    Both hyperparameters are optimized on a log scale in [-10, 10].
+    The X_test data set is only used for a final offline evaluation of
+    a configuration. For that the validation and training data is
+    concatenated to form the whole training data set.
+    """
+
+    def __init__(self, task_id: Union[int, None] = None,
+                 rng: Union[np.random.RandomState, int, None] = None):
+        """
+        Parameters
+        ----------
+        task_id : int, None
+        rng : np.random.RandomState, int, None
+        """
+        super(SupportVectorMachine, self).__init__(rng=rng)
+
+        self.task_id = task_id
+        self.cache_size = 200  # Cache for the SVC in MB
+        self.accuracy_scorer = make_scorer(accuracy_score)
+
+        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
+            self.get_data()
+        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
+
+        # Sort data (Categorical + numerical) so that categorical and continous are not mixed.
+        categorical_idx = np.argwhere(self.categorical_data)
+        continuous_idx = np.argwhere(~self.categorical_data)
+        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
+        self.categorical_data = self.categorical_data[sorting]
+        self.x_train = self.x_train[:, sorting]
+        self.x_valid = self.x_valid[:, sorting]
+        self.x_test = self.x_test[:, sorting]
+
+        nan_columns = np.all(np.isnan(self.x_train), axis=0)
+        self.categorical_data = self.categorical_data[~nan_columns]
+        self.x_train, self.x_valid, self.x_test, self.categories = \
+            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
+                                                                 is_categorical=self.categorical_data)
+
+        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
+                                         size=len(self.x_train),
+                                         replace=False)
+
+        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
+        # (https://arxiv.org/pdf/1605.07079.pdf),
+        # use 10 time the number of classes as lower bound for the dataset fraction
+        n_classes = np.unique(self.y_train).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
+
+    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
+        """ Loads the data given a task or another source. """
+
+        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
+                                                             'overwrite the get_data method.')
+
+        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
+        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
+
+        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
+
+    def shuffle_data(self, rng=None):
+        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
+        class-random-state"""
+        random_state = rng_helper.get_rng(rng, self.rng)
+        random_state.shuffle(self.train_idx)
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(self, configuration: Union[CS.Configuration, Dict],
+                           fidelity: Union[CS.Configuration, Dict, None] = None,
+                           shuffle: bool = False,
+                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
+        """
+        Trains a SVM model given a hyperparameter configuration and
+        evaluates the model on the validation set.
+
+        Parameters
+        ----------
+        configuration : Dict, CS.Configuration
+            Configuration for the SVM model
+        fidelity: Dict, None
+            Fidelity parameters for the SVM model, check get_fidelity_space(). Uses default (max) value if None.
+        shuffle : bool
+            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
+            Defaults to ``False``.
+        rng : np.random.RandomState, int, None,
+            Random seed for benchmark. By default the class level random seed.
+
+            To prevent overfitting on a single seed, it is possible to pass a
+            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
+            If this parameter is not given, the default random state is used.
+        kwargs
+
+        Returns
+        -------
+        Dict -
+            function_value : validation loss
+            cost : time to train and evaluate the model
+            info : Dict
+                train_loss : training loss
+                fidelity : used fidelities in this evaluation
+        """
+        start_time = time.time()
+
+        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
+
+        if shuffle:
+            self.shuffle_data(self.rng)
+
+        # Split of dataset subset
+        if self.lower_bound_train_size > fidelity['dataset_fraction']:
+            train_size = self.lower_bound_train_size
+            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
+                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
+                           f'{self.lower_bound_train_size:.8f}')
+        else:
+            train_size = fidelity['dataset_fraction']
+
+        train_size = int(train_size * len(self.train_idx))
+        train_idx = self.train_idx[:train_size]
+
+        # Transform hyperparameters to linear scale
+        hp_c = np.exp(float(configuration['C']))
+        hp_gamma = np.exp(float(configuration['gamma']))
+
+        # Train support vector machine
+        model = self.get_pipeline(hp_c, hp_gamma)
+        model.fit(self.x_train[train_idx], self.y_train[train_idx])
+
+        # Compute validation error
+        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
+        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
+
+        cost = time.time() - start_time
+
+        return {'function_value': float(val_loss),
+                "cost": cost,
+                'info': {'train_loss': float(train_loss),
+                         'fidelity': fidelity}}
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
+                                fidelity: Union[CS.Configuration, Dict, None] = None,
+                                shuffle: bool = False,
+                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
+        """
+        Trains a SVM model with a given configuration on both the X_train
+        and validation data set and evaluates the model on the X_test data set.
+
+        Parameters
+        ----------
+        configuration : Dict, CS.Configuration
+            Configuration for the SVM Model
+        fidelity: Dict, None
+            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
+        shuffle : bool
+            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
+            Defaults to ``False``.
+        rng : np.random.RandomState, int, None,
+            Random seed for benchmark. By default the class level random seed.
+            To prevent overfitting on a single seed, it is possible to pass a
+            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
+            If this parameter is not given, the default random state is used.
+        kwargs
+
+        Returns
+        -------
+        Dict -
+            function_value : X_test loss
+            cost : time to X_train and evaluate the model
+            info : Dict
+                train_valid_loss: Loss on the train+valid data set
+                fidelity : used fidelities in this evaluation
+        """
+        assert np.isclose(fidelity['dataset_fraction'], 1), \
+            f'Data set fraction must be 1 but was {fidelity["dataset_fraction"]}'
+
+        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
+
+        if shuffle:
+            self.shuffle_data(self.rng)
+
+        start_time = time.time()
+
+        # Concatenate training and validation dataset
+        if isinstance(self.x_train, sparse.csr.csr_matrix) or isinstance(self.x_valid, sparse.csr.csr_matrix):
+            data = sparse.vstack((self.x_train, self.x_valid))
+        else:
+            data = np.concatenate((self.x_train, self.x_valid))
+        targets = np.concatenate((self.y_train, self.y_valid))
+
+        # Transform hyperparameters to linear scale
+        hp_c = np.exp(float(configuration['C']))
+        hp_gamma = np.exp(float(configuration['gamma']))
+
+        model = self.get_pipeline(hp_c, hp_gamma)
+        model.fit(data, targets)
+
+        # Compute validation error
+        train_valid_loss = 1 - self.accuracy_scorer(model, data, targets)
+
+        # Compute test error
+        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
+
+        cost = time.time() - start_time
+
+        return {'function_value': float(test_loss),
+                "cost": cost,
+                'info': {'train_valid_loss': float(train_valid_loss),
+                         'fidelity': fidelity}}
+
+    def get_pipeline(self, C: float, gamma: float) -> pipeline.Pipeline:
+        """ Create the scikit-learn (training-)pipeline """
+
+        model = pipeline.Pipeline([
+            ('preprocess_impute',
+             ColumnTransformer([
+                 ("categorical", "passthrough", self.categorical_data),
+                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
+            ('preprocess_one_hot',
+             ColumnTransformer([
+                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
+                 ("continuous", MinMaxScaler(feature_range=(0, 1)), ~self.categorical_data)])),
+            ('svm',
+             svm.SVC(gamma=gamma, C=C, random_state=self.rng, cache_size=self.cache_size))
+        ])
+        return model
+
+    @staticmethod
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """
+        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
+        the SVM Model
+
+        For a detailed explanation of the hyperparameters:
+        https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
+
+        Parameters
+        ----------
+        seed : int, None
+            Fixing the seed for the ConfigSpace.ConfigurationSpace
+
+        Returns
+        -------
+        ConfigSpace.ConfigurationSpace
+        """
+
+        seed = seed if seed is not None else np.random.randint(1, 100000)
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformFloatHyperparameter('C', lower=-10., upper=10., default_value=0., log=False),
+            CS.UniformFloatHyperparameter('gamma', lower=-10., upper=10., default_value=1., log=False),
+        ])
+        # cs.generate_all_continuous_from_bounds(SupportVectorMachine.get_meta_information()['bounds'])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """
+        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
+        the SupportVector Benchmark
+
+        Fidelities
+        ----------
+        dataset_fraction: float - [0.1, 1]
+            fraction of training data set to use
+
+        Parameters
+        ----------
+        seed : int, None
+            Fixing the seed for the ConfigSpace.ConfigurationSpace
+
+        Returns
+        -------
+        ConfigSpace.ConfigurationSpace
+        """
+        seed = seed if seed is not None else np.random.randint(1, 100000)
+        fidel_space = CS.ConfigurationSpace(seed=seed)
+
+        fidel_space.add_hyperparameters([
+            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
+        ])
+        return fidel_space
+
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        return {'name': 'Support Vector Machine',
+                'references': ["@InProceedings{pmlr-v54-klein17a",
+                               "author = {Aaron Klein and Stefan Falkner and Simon Bartels and Philipp Hennig and "
+                               "Frank Hutter}, "
+                               "title = {{Fast Bayesian Optimization of Machine Learning Hyperparameters on "
+                               "Large Datasets}}"
+                               "pages = {528--536}, year = {2017},"
+                               "editor = {Aarti Singh and Jerry Zhu},"
+                               "volume = {54},"
+                               "series = {Proceedings of Machine Learning Research},"
+                               "address = {Fort Lauderdale, FL, USA},"
+                               "month = {20--22 Apr},"
+                               "publisher = {PMLR},"
+                               "pdf = {http://proceedings.mlr.press/v54/klein17a/klein17a.pdf}, "
+                               "url = {http://proceedings.mlr.press/v54/klein17a.html}, "
+                               ],
+                'code': 'https://github.com/automl/HPOlib1.5/blob/container/hpolib/benchmarks/ml/svm_benchmark.py',
+                'shape of train data': self.x_train.shape,
+                'shape of test data': self.x_test.shape,
+                'shape of valid data': self.x_valid.shape,
+                'initial random seed': self.rng,
+                'task_id': self.task_id
+                }
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index e956d5a4..b038e4c9 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -1,430 +1,125 @@
-"""
-
-Changelog:
-==========
-0.0.3
-* New container release due to a general change in the communication between container and HPOBench.
-  Works with HPOBench >= v0.0.8
-
-0.0.2:
-* Change the search space definiton to match the paper: (https://arxiv.org/pdf/1802.09596.pdf)
-    eta:                [1e-5, 1] (def: 0.3)    ->  [2**-10, 1] (def: 0.3)
-    min_child_weight:   [0,05, 10] (def: 1)     ->  [1, 2**7] (def: 1)
-    colsample_bytree:   [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
-    colsample_bylevel:  [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
-    reg_lambda:         [1e-5, 2] (def: 1)      ->  [2**-10, 2**10] (def: 1)
-    reg_alpha:          [1e-5, 2] (def: 1e-5)   ->  [2**-10, 2**10] (def: 1)
-    max_depth:          -                       ->  [1, 15] (def: 6)
-    subsample_per_it:   -                       ->  [0.01, 1] (def: 1)
-    [booster:            -                      ->  [gbtree, gblinear, dart] (def: gbtree)]  *)
-
-    *) This parameter is only in the XGBoostExtendedBenchmark. Not in the XGBoostBenchmark class.
-
-* Increase the fidelity `n_estimators`
-    n_estimators        [2, 128] (def: 128)     ->  [1, 256] (def: 256)
-
-* Add class to optimize also the used booster method: (gbtree, gblinear or dart)
-    We have introduced a new class, which adds the used booster as parameter to the configuration space. To read more
-    about booster, please take a look in the official XGBoost-documentation (https://xgboost.readthedocs.io/en/latest).
-
-
-0.0.1:
-* First implementation of a XGBoost Benchmark.
-
-
-"""
-
-import logging
 import time
-from typing import Union, Tuple, Dict, List
-
-import ConfigSpace as CS
+import openml
 import numpy as np
+import pandas as pd
+import ConfigSpace as CS
+from typing import Union, Dict
+
 import xgboost as xgb
-from sklearn import pipeline
-from sklearn.compose import ColumnTransformer
 from sklearn.impute import SimpleImputer
-from sklearn.metrics import accuracy_score, make_scorer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, make_scorer
 
-import hpobench.util.rng_helper as rng_helper
-from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
-
-__version__ = '0.0.3'
-
-logger = logging.getLogger('XGBBenchmark')
-
-
-class XGBoostBenchmark(AbstractBenchmark):
-
-    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        """
-
-        Parameters
-        ----------
-        task_id : int, None
-        n_threads  : int, None
-        rng : np.random.RandomState, int, None
-        """
-
-        super(XGBoostBenchmark, self).__init__(rng=rng)
-        self.n_threads = n_threads
-        self.task_id = task_id
-        self.accuracy_scorer = make_scorer(accuracy_score)
-
-        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
-            self.get_data()
-        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
-
-        # XGB needs sorted data. Data should be (Categorical + numerical) not mixed.
-        categorical_idx = np.argwhere(self.categorical_data)
-        continuous_idx = np.argwhere(~self.categorical_data)
-        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
-        self.categorical_data = self.categorical_data[sorting]
-        self.x_train = self.x_train[:, sorting]
-        self.x_valid = self.x_valid[:, sorting]
-        self.x_test = self.x_test[:, sorting]
-
-        nan_columns = np.all(np.isnan(self.x_train), axis=0)
-        self.categorical_data = self.categorical_data[~nan_columns]
-
-        self.x_train, self.x_valid, self.x_test, self.categories = \
-            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
-                                                                 is_categorical=self.categorical_data)
-
-        # Determine the number of categories in the labels.
-        # In case of binary classification ``self.num_class`` has to be 1 for xgboost.
-        self.num_class = len(np.unique(np.concatenate([self.y_train, self.y_test, self.y_valid])))
-        self.num_class = 1 if self.num_class == 2 else self.num_class
-
-        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
-                                         size=len(self.x_train),
-                                         replace=False)
-
-        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
-        # (https://arxiv.org/pdf/1605.07079.pdf),
-        # use 10 time the number of classes as lower bound for the dataset fraction
-        n_classes = np.unique(self.y_train).shape[0]
-        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
-
-    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
-        """ Loads the data given a task or another source. """
-
-        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
-                                                             'overwrite the get_data method.')
-
-        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
-        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
-
-        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
-
-    def shuffle_data(self, rng=None):
-        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
-        class-random-state"""
-        random_state = rng_helper.get_rng(rng, self.rng)
-        random_state.shuffle(self.train_idx)
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function(self, configuration: Union[CS.Configuration, Dict],
-                           fidelity: Union[CS.Configuration, Dict, None] = None,
-                           shuffle: bool = False,
-                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a XGBoost model given a hyperparameter configuration and
-        evaluates the model on the validation set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the XGBoost model
-        fidelity: Dict, None
-            Fidelity parameters for the XGBoost model, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : validation loss
-            cost : time to train and evaluate the model
-            info : Dict
-                train_loss : trainings loss
-                fidelity : used fidelities in this evaluation
-        """
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start = time.time()
-
-        if self.lower_bound_train_size > fidelity['dataset_fraction']:
-            train_data_fraction = self.lower_bound_train_size
-            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
-                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
-                           f'{self.lower_bound_train_size:.8f}')
-        else:
-            train_data_fraction = fidelity['dataset_fraction']
-
-        train_idx = self.train_idx[:int(len(self.train_idx) * train_data_fraction)]
-
-        model = self._get_pipeline(n_estimators=fidelity["n_estimators"], **configuration)
-        model.fit(X=self.x_train[train_idx], y=self.y_train[train_idx])
-
-        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
-        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
-        cost = time.time() - start
-
-        return {'function_value': float(val_loss),
-                'cost': cost,
-                'info': {'train_loss': float(train_loss),
-                         'fidelity': fidelity}
-                }
-
-    # pylint: disable=arguments-differ
-    @AbstractBenchmark.check_parameters
-    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
-                                fidelity: Union[CS.Configuration, Dict, None] = None,
-                                shuffle: bool = False,
-                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
-        """
-        Trains a XGBoost model with a given configuration on both the train
-        and validation data set and evaluates the model on the test data set.
-
-        Parameters
-        ----------
-        configuration : Dict, CS.Configuration
-            Configuration for the XGBoost Model
-        fidelity: Dict, None
-            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
-        shuffle : bool
-            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
-            Defaults to ``False``.
-        rng : np.random.RandomState, int, None,
-            Random seed for benchmark. By default the class level random seed.
-            To prevent overfitting on a single seed, it is possible to pass a
-            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
-            If this parameter is not given, the default random state is used.
-        kwargs
-
-        Returns
-        -------
-        Dict -
-            function_value : test loss
-            cost : time to train and evaluate the model
-            info : Dict
-                fidelity : used fidelities in this evaluation
-        """
-        default_dataset_fraction = self.get_fidelity_space().get_hyperparameter('dataset_fraction').default_value
-        if fidelity['dataset_fraction'] != default_dataset_fraction:
-            raise NotImplementedError(f'Test error can not be computed for dataset_fraction <= '
-                                      f'{default_dataset_fraction}')
-
-        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
-
-        if shuffle:
-            self.shuffle_data(self.rng)
-
-        start = time.time()
-
-        # Impute potential nan values with the feature-
-        data = np.concatenate((self.x_train, self.x_valid))
-        targets = np.concatenate((self.y_train, self.y_valid))
-
-        model = self._get_pipeline(n_estimators=fidelity['n_estimators'], **configuration)
-        model.fit(X=data, y=targets)
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
-        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
-        cost = time.time() - start
 
-        return {'function_value': float(test_loss),
-                'cost': cost,
-                'info': {'fidelity': fidelity}}
+class XGBoostBenchmark(MLBenchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1
+    ):
+        super(XGBoostBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
+        pass
 
     @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
         """
-        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
-        the XGBoost Model
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
-        """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
         cs = CS.ConfigurationSpace(seed=seed)
 
         cs.add_hyperparameters([
-            CS.UniformFloatHyperparameter('eta', lower=2**-10, upper=1., default_value=0.3, log=True),
-            CS.UniformIntegerHyperparameter('max_depth', lower=1, upper=15, default_value=6, log=False),
-            CS.UniformFloatHyperparameter('min_child_weight', lower=1., upper=2**7., default_value=1., log=True),
-            CS.UniformFloatHyperparameter('colsample_bytree', lower=0.01, upper=1., default_value=1.),
-            CS.UniformFloatHyperparameter('colsample_bylevel', lower=0.01, upper=1., default_value=1.),
-            CS.UniformFloatHyperparameter('reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True),
-            CS.UniformFloatHyperparameter('reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True),
-            CS.UniformFloatHyperparameter('subsample_per_it', lower=0.1, upper=1, default_value=1, log=False)
+            CS.UniformFloatHyperparameter(
+                'eta', lower=2**-10, upper=1., default_value=0.3, log=True
+            ),  # learning rate
+            CS.UniformIntegerHyperparameter(
+                'max_depth', lower=1, upper=15, default_value=6, log=False
+            ),
+            CS.UniformFloatHyperparameter(
+                'min_child_weight', lower=1., upper=2**7., default_value=1., log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'colsample_bytree', lower=0.01, upper=1., default_value=1.
+            ),
+            # CS.UniformFloatHyperparameter(
+            #     'colsample_bylevel', lower=0.01, upper=1., default_value=1.
+            # ),
+            CS.UniformFloatHyperparameter(
+                'reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True
+            ),
+            # CS.UniformFloatHyperparameter(
+            #     'reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True
+            # ),
+            # CS.UniformFloatHyperparameter(
+            #     'subsample_per_it', lower=0.1, upper=1, default_value=1, log=False
+            # )
         ])
-
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
         """
-        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
-        the XGBoost Benchmark
-
-        Parameters
-        ----------
-        seed : int, None
-            Fixing the seed for the ConfigSpace.ConfigurationSpace
-
-        Returns
-        -------
-        ConfigSpace.ConfigurationSpace
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
+        if fidelity_choice == 0:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 1:
+            # only n_estimators as fidelity
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 2:
+            # only subsample as fidelity
+            ntrees = CS.Constant('n_estimators', value=100)
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+            )
+        else:
+            # both n_estimators and subsample as fidelities
+            ntrees = CS.UniformIntegerHyperparameter(
+                'n_estimators', lower=2, upper=100, default_value=10, log=False
+            )
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+            )
+        z_cs.add_hyperparameters([ntrees, subsample])
+        return z_cs
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
         """
-        seed = seed if seed is not None else np.random.randint(1, 100000)
-        fidel_space = CS.ConfigurationSpace(seed=seed)
-
-        fidel_space.add_hyperparameters([
-            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
-            CS.UniformIntegerHyperparameter("n_estimators", lower=1, upper=256, default_value=256, log=False)
-        ])
-
-        return fidel_space
-
-    def get_meta_information(self) -> Dict:
-        """ Returns the meta information for the benchmark """
-        return {'name': 'XGBoost',
-                'references': ['@article{probst2019tunability,'
-                               'title={Tunability: Importance of hyperparameters of machine learning algorithms.},'
-                               'author={Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd},'
-                               'journal={J. Mach. Learn. Res.},'
-                               'volume={20},'
-                               'number={53},'
-                               'pages={1--32},'
-                               'year={2019}'
-                               '}'],
-                'code': 'https://github.com/automl/HPOlib1.5/blob/development/hpolib/benchmarks/ml/'
-                        'xgboost_benchmark.py',
-                'shape of train data': self.x_train.shape,
-                'shape of test data': self.x_test.shape,
-                'shape of valid data': self.x_valid.shape,
-                'initial random seed': self.rng,
-                'task_id': self.task_id
-                }
-
-    def _get_pipeline(self, max_depth: int, eta: float, min_child_weight: int,
-                      colsample_bytree: float, colsample_bylevel: float, reg_lambda: int, reg_alpha: int,
-                      n_estimators: int, subsample_per_it: float) \
-            -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
-
-        clf = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", "passthrough", ~self.categorical_data)])),
-            ('xgb',
-             xgb.XGBClassifier(
-                 max_depth=max_depth,
-                 learning_rate=eta,
-                 min_child_weight=min_child_weight,
-                 colsample_bytree=colsample_bytree,
-                 colsample_bylevel=colsample_bylevel,
-                 reg_alpha=reg_alpha,
-                 reg_lambda=reg_lambda,
-                 n_estimators=n_estimators,
-                 objective=objective,
-                 n_jobs=self.n_threads,
-                 random_state=self.rng.randint(1, 100000),
-                 num_class=self.num_class,
-                 subsample=subsample_per_it))
-            ])
-        return clf
-
-
-class XGBoostExtendedBenchmark(XGBoostBenchmark):
-    """
-    Similar to XGBoostBenchmark but enables also the optimization of the used booster.
-    """
-
-    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
-                 rng: Union[np.random.RandomState, int, None] = None):
-        super(XGBoostExtendedBenchmark, self).__init__(task_id=task_id, n_threads=n_threads, rng=rng)
-
-    @staticmethod
-    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        cs = XGBoostBenchmark.get_configuration_space(seed)
-        hp_booster = CS.CategoricalHyperparameter('booster', choices=['gbtree', 'gblinear', 'dart'],
-                                                  default_value='gbtree')
-        cs.add_hyperparameter(hp_booster)
-
-        # XGBoost with 'gblinear' can not use some
-        # parameters. Exclude them from the configuration space by introducing a condition.
-        hps = ['colsample_bylevel', 'colsample_bytree', 'max_depth',  'min_child_weight', 'subsample_per_it']
-
-        # The NotEqualsCondition means: "Make parameter X active if hp_booster is not equal to gblinear."
-        conditions = [CS.NotEqualsCondition(cs.get_hyperparameter(hp), hp_booster, 'gblinear') for hp in hps]
-        cs.add_conditions(conditions)
-        return cs
-
-    # noinspection PyMethodOverriding
-    # pylint: disable=arguments-differ
-    def _get_pipeline(self, n_estimators: int, booster: str, reg_lambda: int, reg_alpha: int, eta: float,
-                      min_child_weight: int = None, max_depth: int = None, colsample_bytree: float = None,
-                      colsample_bylevel: float = None, subsample_per_it: float = None) \
-            -> pipeline.Pipeline:
-        """ Create the scikit-learn (training-)pipeline """
-        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
-
-        configuration = dict(booster=booster,
-                             max_depth=max_depth,
-                             learning_rate=eta,
-                             min_child_weight=min_child_weight,
-                             colsample_bytree=colsample_bytree,
-                             colsample_bylevel=colsample_bylevel,
-                             reg_alpha=reg_alpha,
-                             reg_lambda=reg_lambda,
-                             n_estimators=n_estimators,
-                             objective=objective,
-                             n_jobs=self.n_threads,
-                             random_state=self.rng.randint(1, 100000),
-                             num_class=self.num_class,
-                             subsample=subsample_per_it)
-
-        configuration = {k: v for k, v in configuration.items() if v is not None}
-
-        clf = pipeline.Pipeline([
-            ('preprocess_impute',
-             ColumnTransformer([
-                 ("categorical", "passthrough", self.categorical_data),
-                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
-            ('preprocess_one_hot',
-             ColumnTransformer([
-                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
-                 ("continuous", "passthrough", ~self.categorical_data)])),
-            ('xgb',
-             xgb.XGBClassifier(**configuration))
-        ])
-        return clf
+        rng = rng if (rng is None and isinstance(rng, int)) else self.seed
+        extra_args = dict(
+            n_estimators=fidelity['n_estimators'],
+            objective="binary:logistic",
+            random_state=rng,
+            subsample=1
+        )
+        if self.n_classes > 2:
+            extra_args["objective"] = "multi:softmax"
+            extra_args.update({"num_class": self.n_classes})
+        model = xgb.XGBClassifier(
+            **config.get_dictionary(),
+            **extra_args
+        )
+        return model
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark_old.py b/hpobench/benchmarks/ml/xgboost_benchmark_old.py
new file mode 100644
index 00000000..fb380c89
--- /dev/null
+++ b/hpobench/benchmarks/ml/xgboost_benchmark_old.py
@@ -0,0 +1,426 @@
+"""
+
+Changelog:
+==========
+0.0.2:
+* Change the search space definiton to match the paper: (https://arxiv.org/pdf/1802.09596.pdf)
+    eta:                [1e-5, 1] (def: 0.3)    ->  [2**-10, 1] (def: 0.3)
+    min_child_weight:   [0,05, 10] (def: 1)     ->  [1, 2**7] (def: 1)
+    colsample_bytree:   [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
+    colsample_bylevel:  [0,05, 1] (def: 1)      ->  [0.01, 1] (def: 1)
+    reg_lambda:         [1e-5, 2] (def: 1)      ->  [2**-10, 2**10] (def: 1)
+    reg_alpha:          [1e-5, 2] (def: 1e-5)   ->  [2**-10, 2**10] (def: 1)
+    max_depth:          -                       ->  [1, 15] (def: 6)
+    subsample_per_it:   -                       ->  [0.01, 1] (def: 1)
+    [booster:            -                      ->  [gbtree, gblinear, dart] (def: gbtree)]  *)
+
+    *) This parameter is only in the XGBoostExtendedBenchmark. Not in the XGBoostBenchmark class.
+
+* Increase the fidelity `n_estimators`
+    n_estimators        [2, 128] (def: 128)     ->  [1, 256] (def: 256)
+
+* Add class to optimize also the used booster method: (gbtree, gblinear or dart)
+    We have introduced a new class, which adds the used booster as parameter to the configuration space. To read more
+    about booster, please take a look in the official XGBoost-documentation (https://xgboost.readthedocs.io/en/latest).
+
+
+0.0.1:
+* First implementation of a XGBoost Benchmark.
+
+
+"""
+
+import logging
+import time
+from typing import Union, Tuple, Dict, List
+
+import ConfigSpace as CS
+import numpy as np
+import xgboost as xgb
+from sklearn import pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.metrics import accuracy_score, make_scorer
+from sklearn.preprocessing import OneHotEncoder
+
+import hpobench.util.rng_helper as rng_helper
+from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager
+
+__version__ = '0.0.2'
+
+logger = logging.getLogger('XGBBenchmark')
+
+
+class XGBoostBenchmark(AbstractBenchmark):
+
+    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
+                 rng: Union[np.random.RandomState, int, None] = None):
+        """
+
+        Parameters
+        ----------
+        task_id : int, None
+        n_threads  : int, None
+        rng : np.random.RandomState, int, None
+        """
+
+        super(XGBoostBenchmark, self).__init__(rng=rng)
+        self.n_threads = n_threads
+        self.task_id = task_id
+        self.accuracy_scorer = make_scorer(accuracy_score)
+
+        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
+            self.get_data()
+        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])
+
+        # XGB needs sorted data. Data should be (Categorical + numerical) not mixed.
+        categorical_idx = np.argwhere(self.categorical_data)
+        continuous_idx = np.argwhere(~self.categorical_data)
+        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
+        self.categorical_data = self.categorical_data[sorting]
+        self.x_train = self.x_train[:, sorting]
+        self.x_valid = self.x_valid[:, sorting]
+        self.x_test = self.x_test[:, sorting]
+
+        nan_columns = np.all(np.isnan(self.x_train), axis=0)
+        self.categorical_data = self.categorical_data[~nan_columns]
+
+        self.x_train, self.x_valid, self.x_test, self.categories = \
+            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
+                                                                 is_categorical=self.categorical_data)
+
+        # Determine the number of categories in the labels.
+        # In case of binary classification ``self.num_class`` has to be 1 for xgboost.
+        self.num_class = len(np.unique(np.concatenate([self.y_train, self.y_test, self.y_valid])))
+        self.num_class = 1 if self.num_class == 2 else self.num_class
+
+        self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)),
+                                         size=len(self.x_train),
+                                         replace=False)
+
+        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
+        # (https://arxiv.org/pdf/1605.07079.pdf),
+        # use 10 time the number of classes as lower bound for the dataset fraction
+        n_classes = np.unique(self.y_train).shape[0]
+        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]
+
+    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
+        """ Loads the data given a task or another source. """
+
+        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
+                                                             'overwrite the get_data method.')
+
+        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng)
+        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()
+
+        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types
+
+    def shuffle_data(self, rng=None):
+        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
+        class-random-state"""
+        random_state = rng_helper.get_rng(rng, self.rng)
+        random_state.shuffle(self.train_idx)
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(self, configuration: Union[CS.Configuration, Dict],
+                           fidelity: Union[CS.Configuration, Dict, None] = None,
+                           shuffle: bool = False,
+                           rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
+        """
+        Trains a XGBoost model given a hyperparameter configuration and
+        evaluates the model on the validation set.
+
+        Parameters
+        ----------
+        configuration : Dict, CS.Configuration
+            Configuration for the XGBoost model
+        fidelity: Dict, None
+            Fidelity parameters for the XGBoost model, check get_fidelity_space(). Uses default (max) value if None.
+        shuffle : bool
+            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
+            Defaults to ``False``.
+        rng : np.random.RandomState, int, None,
+            Random seed for benchmark. By default the class level random seed.
+
+            To prevent overfitting on a single seed, it is possible to pass a
+            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
+            If this parameter is not given, the default random state is used.
+        kwargs
+
+        Returns
+        -------
+        Dict -
+            function_value : validation loss
+            cost : time to train and evaluate the model
+            info : Dict
+                train_loss : trainings loss
+                fidelity : used fidelities in this evaluation
+        """
+        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
+
+        if shuffle:
+            self.shuffle_data(self.rng)
+
+        start = time.time()
+
+        if self.lower_bound_train_size > fidelity['dataset_fraction']:
+            train_data_fraction = self.lower_bound_train_size
+            logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) '
+                           f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to '
+                           f'{self.lower_bound_train_size:.8f}')
+        else:
+            train_data_fraction = fidelity['dataset_fraction']
+
+        train_idx = self.train_idx[:int(len(self.train_idx) * train_data_fraction)]
+
+        model = self._get_pipeline(n_estimators=fidelity["n_estimators"], **configuration)
+        model.fit(X=self.x_train[train_idx], y=self.y_train[train_idx])
+
+        train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx])
+        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
+        cost = time.time() - start
+
+        return {'function_value': float(val_loss),
+                'cost': cost,
+                'info': {'train_loss': float(train_loss),
+                         'fidelity': fidelity}
+                }
+
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
+                                fidelity: Union[CS.Configuration, Dict, None] = None,
+                                shuffle: bool = False,
+                                rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
+        """
+        Trains a XGBoost model with a given configuration on both the train
+        and validation data set and evaluates the model on the test data set.
+
+        Parameters
+        ----------
+        configuration : Dict, CS.Configuration
+            Configuration for the XGBoost Model
+        fidelity: Dict, None
+            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
+        shuffle : bool
+            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
+            Defaults to ``False``.
+        rng : np.random.RandomState, int, None,
+            Random seed for benchmark. By default the class level random seed.
+            To prevent overfitting on a single seed, it is possible to pass a
+            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
+            If this parameter is not given, the default random state is used.
+        kwargs
+
+        Returns
+        -------
+        Dict -
+            function_value : test loss
+            cost : time to train and evaluate the model
+            info : Dict
+                fidelity : used fidelities in this evaluation
+        """
+        default_dataset_fraction = self.get_fidelity_space().get_hyperparameter('dataset_fraction').default_value
+        if fidelity['dataset_fraction'] != default_dataset_fraction:
+            raise NotImplementedError(f'Test error can not be computed for dataset_fraction <= '
+                                      f'{default_dataset_fraction}')
+
+        self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng)
+
+        if shuffle:
+            self.shuffle_data(self.rng)
+
+        start = time.time()
+
+        # Impute potential nan values with the feature-
+        data = np.concatenate((self.x_train, self.x_valid))
+        targets = np.concatenate((self.y_train, self.y_valid))
+
+        model = self._get_pipeline(n_estimators=fidelity['n_estimators'], **configuration)
+        model.fit(X=data, y=targets)
+
+        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
+        cost = time.time() - start
+
+        return {'function_value': float(test_loss),
+                'cost': cost,
+                'info': {'fidelity': fidelity}}
+
+    @staticmethod
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """
+        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
+        the XGBoost Model
+
+        Parameters
+        ----------
+        seed : int, None
+            Fixing the seed for the ConfigSpace.ConfigurationSpace
+
+        Returns
+        -------
+        ConfigSpace.ConfigurationSpace
+        """
+        seed = seed if seed is not None else np.random.randint(1, 100000)
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.UniformFloatHyperparameter('eta', lower=2**-10, upper=1., default_value=0.3, log=True),
+            CS.UniformIntegerHyperparameter('max_depth', lower=1, upper=15, default_value=6, log=False),
+            CS.UniformFloatHyperparameter('min_child_weight', lower=1., upper=2**7., default_value=1., log=True),
+            CS.UniformFloatHyperparameter('colsample_bytree', lower=0.01, upper=1., default_value=1.),
+            CS.UniformFloatHyperparameter('colsample_bylevel', lower=0.01, upper=1., default_value=1.),
+            CS.UniformFloatHyperparameter('reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True),
+            CS.UniformFloatHyperparameter('reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True),
+            CS.UniformFloatHyperparameter('subsample_per_it', lower=0.1, upper=1, default_value=1, log=False)
+        ])
+
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """
+        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
+        the XGBoost Benchmark
+
+        Parameters
+        ----------
+        seed : int, None
+            Fixing the seed for the ConfigSpace.ConfigurationSpace
+
+        Returns
+        -------
+        ConfigSpace.ConfigurationSpace
+        """
+        seed = seed if seed is not None else np.random.randint(1, 100000)
+        fidel_space = CS.ConfigurationSpace(seed=seed)
+
+        fidel_space.add_hyperparameters([
+            CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
+            CS.UniformIntegerHyperparameter("n_estimators", lower=1, upper=256, default_value=256, log=False)
+        ])
+
+        return fidel_space
+
+    def get_meta_information(self) -> Dict:
+        """ Returns the meta information for the benchmark """
+        return {'name': 'XGBoost',
+                'references': ['@article{probst2019tunability,'
+                               'title={Tunability: Importance of hyperparameters of machine learning algorithms.},'
+                               'author={Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd},'
+                               'journal={J. Mach. Learn. Res.},'
+                               'volume={20},'
+                               'number={53},'
+                               'pages={1--32},'
+                               'year={2019}'
+                               '}'],
+                'code': 'https://github.com/automl/HPOlib1.5/blob/development/hpolib/benchmarks/ml/'
+                        'xgboost_benchmark_old.py',
+                'shape of train data': self.x_train.shape,
+                'shape of test data': self.x_test.shape,
+                'shape of valid data': self.x_valid.shape,
+                'initial random seed': self.rng,
+                'task_id': self.task_id
+                }
+
+    def _get_pipeline(self, max_depth: int, eta: float, min_child_weight: int,
+                      colsample_bytree: float, colsample_bylevel: float, reg_lambda: int, reg_alpha: int,
+                      n_estimators: int, subsample_per_it: float) \
+            -> pipeline.Pipeline:
+        """ Create the scikit-learn (training-)pipeline """
+        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
+
+        clf = pipeline.Pipeline([
+            ('preprocess_impute',
+             ColumnTransformer([
+                 ("categorical", "passthrough", self.categorical_data),
+                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
+            ('preprocess_one_hot',
+             ColumnTransformer([
+                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
+                 ("continuous", "passthrough", ~self.categorical_data)])),
+            ('xgb',
+             xgb.XGBClassifier(
+                 max_depth=max_depth,
+                 learning_rate=eta,
+                 min_child_weight=min_child_weight,
+                 colsample_bytree=colsample_bytree,
+                 colsample_bylevel=colsample_bylevel,
+                 reg_alpha=reg_alpha,
+                 reg_lambda=reg_lambda,
+                 n_estimators=n_estimators,
+                 objective=objective,
+                 n_jobs=self.n_threads,
+                 random_state=self.rng.randint(1, 100000),
+                 num_class=self.num_class,
+                 subsample=subsample_per_it))
+            ])
+        return clf
+
+
+class XGBoostExtendedBenchmark(XGBoostBenchmark):
+    """
+    Similar to XGBoostBenchmark but enables also the optimization of the used booster.
+    """
+
+    def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
+                 rng: Union[np.random.RandomState, int, None] = None):
+        super(XGBoostExtendedBenchmark, self).__init__(task_id=task_id, n_threads=n_threads, rng=rng)
+
+    @staticmethod
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        cs = XGBoostBenchmark.get_configuration_space(seed)
+        hp_booster = CS.CategoricalHyperparameter('booster', choices=['gbtree', 'gblinear', 'dart'],
+                                                  default_value='gbtree')
+        cs.add_hyperparameter(hp_booster)
+
+        # XGBoost with 'gblinear' can not use some
+        # parameters. Exclude them from the configuration space by introducing a condition.
+        hps = ['colsample_bylevel', 'colsample_bytree', 'max_depth',  'min_child_weight', 'subsample_per_it']
+
+        # The NotEqualsCondition means: "Make parameter X active if hp_booster is not equal to gblinear."
+        conditions = [CS.NotEqualsCondition(cs.get_hyperparameter(hp), hp_booster, 'gblinear') for hp in hps]
+        cs.add_conditions(conditions)
+        return cs
+
+    # noinspection PyMethodOverriding
+    # pylint: disable=arguments-differ
+    def _get_pipeline(self, n_estimators: int, booster: str, reg_lambda: int, reg_alpha: int, eta: float,
+                      min_child_weight: int = None, max_depth: int = None, colsample_bytree: float = None,
+                      colsample_bylevel: float = None, subsample_per_it: float = None) \
+            -> pipeline.Pipeline:
+        """ Create the scikit-learn (training-)pipeline """
+        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'
+
+        configuration = dict(booster=booster,
+                             max_depth=max_depth,
+                             learning_rate=eta,
+                             min_child_weight=min_child_weight,
+                             colsample_bytree=colsample_bytree,
+                             colsample_bylevel=colsample_bylevel,
+                             reg_alpha=reg_alpha,
+                             reg_lambda=reg_lambda,
+                             n_estimators=n_estimators,
+                             objective=objective,
+                             n_jobs=self.n_threads,
+                             random_state=self.rng.randint(1, 100000),
+                             num_class=self.num_class,
+                             subsample=subsample_per_it)
+
+        configuration = {k: v for k, v in configuration.items() if v is not None}
+
+        clf = pipeline.Pipeline([
+            ('preprocess_impute',
+             ColumnTransformer([
+                 ("categorical", "passthrough", self.categorical_data),
+                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
+            ('preprocess_one_hot',
+             ColumnTransformer([
+                 ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
+                 ("continuous", "passthrough", ~self.categorical_data)])),
+            ('xgb',
+             xgb.XGBClassifier(**configuration))
+        ])
+        return clf
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 885ce606..9bc5ff3b 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -64,7 +64,7 @@ def test_rng_serialization():
 def test_rng_serialization_xgb():
     import json
     from hpobench.util.container_utils import BenchmarkEncoder, BenchmarkDecoder
-    from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark
+    from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark
 
     b = XGBoostBenchmark(task_id=167149, rng=0)
     meta = b.get_meta_information()
diff --git a/tests/test_whitebox.py b/tests/test_whitebox.py
index 7e4c32aa..c3f5e0ff 100644
--- a/tests/test_whitebox.py
+++ b/tests/test_whitebox.py
@@ -14,7 +14,7 @@
 
 
 def test_whitebox_without_container_xgb():
-    from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+    from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
     b = Benchmark(task_id=167199, rng=0)
     cs = b.get_configuration_space(seed=0)
 

From 9e907e6ef5c5b6f60699dc0f8dffd0a6607a4134 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 8 Jul 2021 19:18:18 +0200
Subject: [PATCH 25/95] Option to load data splits from disk

---
 hpobench/benchmarks/ml/histgb_benchmark.py    |  5 +-
 .../benchmarks/ml/ml_benchmark_template.py    | 53 ++++++++-----------
 hpobench/benchmarks/ml/rf_benchmark.py        |  7 ++-
 hpobench/benchmarks/ml/svm_benchmark.py       |  5 +-
 hpobench/benchmarks/ml/xgboost_benchmark.py   |  7 ++-
 5 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 21ed4ec0..0edcd3fa 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -28,9 +28,10 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1
+            fidelity_choice: int = 1,
+            data_path: Union[str, None] = None
     ):
-        super(HistGBBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
+        super(HistGBBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice, data_path)
         pass
 
     @staticmethod
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index e0ab59bc..24cccbd4 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -1,3 +1,4 @@
+import os
 import time
 import openml
 import numpy as np
@@ -44,10 +45,13 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1
+            fidelity_choice: int = 1,
+            data_path: Union[str, None] = None,
+            global_seed: int = 1
     ):
         self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
         self.rng = check_random_state(self.seed)
+        self.global_seed = global_seed  # used for fixed training-validation splits
         super(MLBenchmark, self).__init__(rng=seed)
 
         self.task_id = task_id
@@ -55,7 +59,7 @@ def __init__(
         self.scorers = dict()
         for k, v in metrics.items():
             self.scorers[k] = make_scorer(v, **metrics_kwargs[k])
-        # self.scorers = make_scorer(accuracy_score)
+        self.data_path = data_path
 
         # Data variables
         self.train_X = None
@@ -129,6 +133,19 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
 
         The validation set is fixed till this function is called again or explicitly altered
         """
+        if self.data_path is not None and os.path.isdir(self.data_path):
+            data_path = os.path.join(self.data_path, str(self.task_id))
+            data_str = os.path.join(data_path, "{}_{}.parquet.gzip")
+            required_file_list = [
+                ("train", "x"), ("train", "y"),
+                ("valid", "x"), ("valid", "y"),
+                ("test", "x"), ("test", "y")
+            ]
+            for files in required_file_list:
+                if not os.path.isfile(data_str.format("train", "x")):
+                    raise FileNotFoundError("{} not found!".format(data_str.format(*files)))
+            return
+
         # fetches task
         self.task = openml.tasks.get_task(self.task_id, download_data=False)
         # fetches dataset
@@ -146,7 +163,7 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
         (cont_idx,) = np.where(~categorical_ind)
 
         # splitting dataset into train and test (10% test)
-        # train-test split is fixed for a task and its associated dataset
+        # train-test split is fixed for a task and its associated dataset (from OpenML)
         self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
         train_x = X.iloc[self.train_idx]
         train_y = y.iloc[self.train_idx]
@@ -158,7 +175,7 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
         valid_size = self.valid_size if valid_size is None else valid_size
         self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
             train_x, train_y, test_size=valid_size,
-            shuffle=True, stratify=train_y, random_state=self.rng
+            shuffle=True, stratify=train_y, random_state=check_random_state(self.global_seed)
         )
 
         # preprocessor to handle missing values, categorical columns encodings,
@@ -350,34 +367,6 @@ def objective_function_test(
             'info': info
         }
 
-    # # pylint: disable=arguments-differ
-    # @AbstractBenchmark.check_parameters
-    # def objective_function(
-    #         self,
-    #         configuration: Union[CS.Configuration, Dict],
-    #         fidelity: Union[CS.Configuration, Dict, None] = None,
-    #         shuffle: bool = False,
-    #         rng: Union[np.random.RandomState, int, None] = None,
-    #         **kwargs
-    # ) -> Dict:
-    #     """Function that evaluates a 'config' on a 'fidelity' on the validation set
-    #     """
-    #     return dict()
-    #
-    # # pylint: disable=arguments-differ
-    # @AbstractBenchmark.check_parameters
-    # def objective_function_test(
-    #         self,
-    #         configuration: Union[CS.Configuration, Dict],
-    #         fidelity: Union[CS.Configuration, Dict, None] = None,
-    #         shuffle: bool = False,
-    #         rng: Union[np.random.RandomState, int, None] = None,
-    #         **kwargs
-    # ) -> Dict:
-    #     """Function that evaluates a 'config' on a 'fidelity' on the test set
-    #     """
-    #     return dict()
-
     def get_meta_information(self):
         """ Returns the meta information for the benchmark """
         return {'name': 'Support Vector Machine',
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index b815e1bd..3850399c 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -24,9 +24,12 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1
+            fidelity_choice: int = 1,
+            data_path: Union[str, None] = None
     ):
-        super(RandomForestBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
+        super(RandomForestBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, data_path
+        )
         pass
 
     @staticmethod
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index 1d0e2d00..190671ca 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -24,9 +24,10 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1
+            fidelity_choice: int = 1,
+            data_path: Union[str, None] = None
     ):
-        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
+        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice, data_path)
         self.cache_size = 200
 
     @staticmethod
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index b038e4c9..4c93d2ef 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -24,9 +24,12 @@ def __init__(
             task_id: Union[int, None] = None,
             seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1
+            fidelity_choice: int = 1,
+            data_path: Union[str, None] = None
     ):
-        super(XGBoostBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice)
+        super(XGBoostBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, data_path
+        )
         pass
 
     @staticmethod

From f0d4f36ca01b8c9141841b41ec65d1f570e9c6f4 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 12 Jul 2021 15:40:54 +0200
Subject: [PATCH 26/95] Reordering data load to work for different cases

---
 .../benchmarks/ml/ml_benchmark_template.py    | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index 24cccbd4..dacb64db 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -133,6 +133,16 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
 
         The validation set is fixed till this function is called again or explicitly altered
         """
+        # fetches task
+        self.task = openml.tasks.get_task(self.task_id, download_data=False)
+        self.n_classes = len(self.task.class_labels)
+        # fetches dataset
+        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
+        if verbose:
+            print(self.task, '\n')
+            print(self.dataset, '\n')
+
+        # check if the path to data splits is valid
         if self.data_path is not None and os.path.isdir(self.data_path):
             data_path = os.path.join(self.data_path, str(self.task_id))
             data_str = os.path.join(data_path, "{}_{}.parquet.gzip")
@@ -144,16 +154,9 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
             for files in required_file_list:
                 if not os.path.isfile(data_str.format("train", "x")):
                     raise FileNotFoundError("{} not found!".format(data_str.format(*files)))
+            # ignore the remaining data loaders and preprocessors as valid data splits available
             return
 
-        # fetches task
-        self.task = openml.tasks.get_task(self.task_id, download_data=False)
-        # fetches dataset
-        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
-        if verbose:
-            print(self.task, '\n')
-            print(self.dataset, '\n')
-
         # loads full data
         X, y, categorical_ind, feature_names = self.dataset.get_data(
             target=self.task.target_name, dataset_format="dataframe"
@@ -211,7 +214,7 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
 
         # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
         # use 10 times the number of classes as lower bound for the dataset fraction
-        self.n_classes = len(self.task.class_labels)
+
         self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
         self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
 

From dbeae7c07b8ab59883be01cdc591b8810e8ae434 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 14 Jul 2021 20:21:52 +0200
Subject: [PATCH 27/95] Updating source of SVM HP range

---
 hpobench/benchmarks/ml/svm_benchmark.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index 190671ca..e6d9e0f7 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -35,13 +35,13 @@ def get_configuration_space(seed=None):
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
-        # from https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.p
+        # https://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf (Section 3.2)
         cs.add_hyperparameters([
             CS.UniformFloatHyperparameter(
-                "C", 0.03125, 32768, log=True, default_value=1.0
+                "C", 2**-5, 2**15, log=True, default_value=1.0
             ),
             CS.UniformFloatHyperparameter(
-                "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
+                "gamma", 2**-15, 2**3, log=True, default_value=0.1
             )
         ])
         return cs

From f277a2e7532e7678a4bd7da0d65a1da5707c4798 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 14 Jul 2021 22:52:52 +0200
Subject: [PATCH 28/95] Adding Tabular Benchmark class

---
 hpobench/benchmarks/ml/__init__.py          |  4 ++
 hpobench/benchmarks/ml/histgb_benchmark.py  | 14 +----
 hpobench/benchmarks/ml/rf_benchmark.py      | 14 +----
 hpobench/benchmarks/ml/svm_benchmark.py     | 14 +----
 hpobench/benchmarks/ml/tabular_benchmark.py | 70 +++++++++++++++++++++
 hpobench/benchmarks/ml/xgboost_benchmark.py | 11 ----
 6 files changed, 77 insertions(+), 50 deletions(-)
 create mode 100644 hpobench/benchmarks/ml/tabular_benchmark.py

diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index e69de29b..54cf8d51 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -0,0 +1,4 @@
+from .svm_benchmark import SVMBenchmark
+from .rf_benchmark import RandomForestBenchmark
+from .xgboost_benchmark import XGBoostBenchmark
+from .histgb_benchmark import HistGBBenchmark
\ No newline at end of file
diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 0edcd3fa..a803aeea 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -1,19 +1,7 @@
-import time
-import openml
 import numpy as np
-import pandas as pd
 import ConfigSpace as CS
 from copy import deepcopy
-from typing import Union, Dict
-
-from sklearn.impute import SimpleImputer
-from sklearn.pipeline import make_pipeline
-from sklearn.utils import check_random_state
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score, make_scorer
+from typing import Union
 
 # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 3850399c..e6cfa8ba 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -1,19 +1,7 @@
-import time
-import openml
 import numpy as np
-import pandas as pd
 import ConfigSpace as CS
-from typing import Union, Dict
-
-from sklearn.impute import SimpleImputer
-from sklearn.pipeline import make_pipeline
-from sklearn.utils import check_random_state
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
+from typing import Union
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score, make_scorer
 
 from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index e6d9e0f7..fc541567 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -1,19 +1,7 @@
-import time
-import openml
-import numpy as np
-import pandas as pd
 import ConfigSpace as CS
-from typing import Union, Dict
+from typing import Union, List, Dict
 
 from sklearn.svm import SVC
-from sklearn.impute import SimpleImputer
-from sklearn.utils import check_random_state
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import train_test_split
-from sklearn.pipeline import make_pipeline, Pipeline
-from sklearn.metrics import accuracy_score, make_scorer
 
 from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 
diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
new file mode 100644
index 00000000..782ee254
--- /dev/null
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -0,0 +1,70 @@
+import os
+import glom
+import numpy as np
+import ConfigSpace as CS
+import pickle5 as pickle
+from typing import Union, List
+
+
+class TabularBenchmark:
+    def __init__(self, table_path: str, seed: Union[int, None]=None):
+        assert os.path.isfile(table_path), "Not a valid path: {}".format(table_path)
+        table = self._load_table(table_path)
+        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
+        self.exp_args = table['exp_args']
+        self.config_spaces = table['config_spaces']
+        self.x_cs = self.get_hyperparameter_space(seed=self.seed)
+        self.z_cs = self.get_fidelity_space(seed=self.seed)
+        self.table = table['data']
+        self.global_minimums = table['global_min']
+
+    def _load_table(self, path):
+        with open(path, "rb") as f:
+            table = pickle.load(f)
+        return table
+
+    def get_hyperparameter_space(self, seed=None, original=False):
+        cs = CS.ConfigurationSpace(seed=seed)
+        if original:
+            _cs = self.config_spaces['x']
+        _cs = self.config_spaces['x_discrete']
+        for hp in _cs.get_hyperparameters():
+            cs.add_hyperparameter(hp)
+        return cs
+
+    def get_fidelity_space(self, seed=None, original=False):
+        cs = CS.ConfigurationSpace(seed=seed)
+        if original:
+            _cs = self.config_spaces['z']
+        _cs = self.config_spaces['z_discrete']
+        for hp in _cs.get_hyperparameters():
+            cs.add_hyperparameter(hp)
+        return cs
+
+    def sample_hyperparamer(self, n: int = 1) -> Union[CS.Configuration, List]:
+        return self.x_cs.sample_configuration(n)
+
+    def sample_fidelity(self, n: int = 1) -> Union[CS.Configuration, List]:
+        return self.z_cs.sample_configuration(n)
+
+    def get_global_min(self, metric: str = "acc"):
+        """ Retrieves the minimum (1 - metric) for train, validation and test splits
+        """
+        assert metric in self.global_minimums.keys(), \
+            "Not a valid metric: {}".format(list(self.global_minimums.keys()))
+        return self.global_minimums[metric]
+
+    def objective_function(self, config, fidelity):
+        self.x_cs.check_configuration(config)
+        self.z_cs.check_configuration(fidelity)
+        key_path = []
+        for name in np.sort(self.x_cs.get_hyperparameter_names()):
+            key_path.append(config[str(name)])
+        for name in np.sort(self.z_cs.get_hyperparameter_names()):
+            key_path.append(fidelity[str(name)])
+        val = glom.glom(self.table, glom.Path(*key_path), default=None)
+        if val is None:
+            raise ValueError(
+                "Invalid config-fidelity or not recorded in table!\n{}\n{}".format(config, fidelity)
+            )
+        return val
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index 4c93d2ef..5efe56ed 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -1,19 +1,8 @@
-import time
-import openml
 import numpy as np
-import pandas as pd
 import ConfigSpace as CS
 from typing import Union, Dict
 
 import xgboost as xgb
-from sklearn.impute import SimpleImputer
-from sklearn.pipeline import make_pipeline
-from sklearn.utils import check_random_state
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score, make_scorer
 
 from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
 

From 60d564683175259b074fad355f8fd4dafe40a61a Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 15 Jul 2021 21:42:58 +0200
Subject: [PATCH 29/95] Adding TabularBenchmark interface + easy import

---
 hpobench/benchmarks/ml/__init__.py          |  1 +
 hpobench/benchmarks/ml/tabular_benchmark.py | 63 +++++++++++++++++++--
 2 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index 54cf8d51..1e64edc9 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -1,3 +1,4 @@
+from .tabular_benchmark import TabularBenchmark
 from .svm_benchmark import SVMBenchmark
 from .rf_benchmark import RandomForestBenchmark
 from .xgboost_benchmark import XGBoostBenchmark
diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index 782ee254..eff31523 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -3,14 +3,17 @@
 import numpy as np
 import ConfigSpace as CS
 import pickle5 as pickle
-from typing import Union, List
+from copy import deepcopy
+from typing import Union, List, Dict
+from hpobench.benchmarks.ml.ml_benchmark_template import metrics
 
 
 class TabularBenchmark:
-    def __init__(self, table_path: str, seed: Union[int, None]=None):
+    def __init__(self, table_path: str, seed: Union[int, None] = None):
         assert os.path.isfile(table_path), "Not a valid path: {}".format(table_path)
         table = self._load_table(table_path)
         self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
+        self.rng = np.random.RandomState(self.seed)
         self.exp_args = table['exp_args']
         self.config_spaces = table['config_spaces']
         self.x_cs = self.get_hyperparameter_space(seed=self.seed)
@@ -54,7 +57,14 @@ def get_global_min(self, metric: str = "acc"):
             "Not a valid metric: {}".format(list(self.global_minimums.keys()))
         return self.global_minimums[metric]
 
-    def objective_function(self, config, fidelity):
+    def _objective(
+            self,
+            config: CS.Configuration,
+            fidelity: CS.Configuration,
+            seed: Union[int, None] = None,
+            metric: Union[str, None] = "acc",
+            eval: Union[str] = "val"
+    ) -> Dict:
         self.x_cs.check_configuration(config)
         self.z_cs.check_configuration(fidelity)
         key_path = []
@@ -67,4 +77,49 @@ def objective_function(self, config, fidelity):
             raise ValueError(
                 "Invalid config-fidelity or not recorded in table!\n{}\n{}".format(config, fidelity)
             )
-        return val
+        seeds = list(val.keys())
+        assert metric in list(metrics.keys()), \
+            "metric not found among: {{{}}}".format(", ".join(list(metrics.keys())))
+        score_key = "{}_scores".format(eval)
+        cost_key = "{}_scores".format(eval)
+        if seed is None:
+            result = dict(function_value=0.0, cost=0.0, info=dict())
+            loss = []
+            costs = 0.0
+            info = dict()
+            for seed in seeds:
+                result = deepcopy(val[seed])
+                loss.append(1 - result["info"][score_key][metric])
+                costs += result["info"]["model_cost"] + result["info"][cost_key][metric]
+                info[seed] = result["info"]
+            loss = np.mean(loss)
+            result["function_value"] = loss
+            result["cost"] = costs
+            result["info"] = info
+        else:
+            assert seed in list(val.keys()), \
+                "seed not found among: {{{}}}".format(", ".join([str(s) for s in seeds]))
+            result = deepcopy(val[seed])
+            result["function_value"] = 1 - result["info"][score_key][metric]
+            result["cost"] = result["info"]["model_cost"] + result["info"][cost_key][metric]
+        return result
+
+    def objective_function(
+            self,
+            config: CS.Configuration,
+            fidelity: CS.Configuration,
+            seed: Union[int, None] = None,
+            metric: Union[str, None] = "acc"
+    ) -> Dict:
+        result = self._objective(config, fidelity, seed, metric, eval="val")
+        return result
+
+    def objective_function_test(
+            self,
+            config: CS.Configuration,
+            fidelity: CS.Configuration,
+            seed: Union[int, None] = None,
+            metric: Union[str, None] = "acc"
+    ) -> Dict:
+        result = self._objective(config, fidelity, seed, metric, eval="test")
+        return result

From c4100fd55c5c1da7e57a8b6057e9f318fe107e2e Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Fri, 16 Jul 2021 21:39:03 +0200
Subject: [PATCH 30/95] Adding LR space

---
 hpobench/benchmarks/ml/__init__.py          |  3 +-
 hpobench/benchmarks/ml/histgb_benchmark.py  |  5 +-
 hpobench/benchmarks/ml/lr_benchmark.py      | 83 +++++++++++++++++++++
 hpobench/benchmarks/ml/rf_benchmark.py      |  5 +-
 hpobench/benchmarks/ml/svm_benchmark.py     |  4 +-
 hpobench/benchmarks/ml/xgboost_benchmark.py |  5 +-
 6 files changed, 92 insertions(+), 13 deletions(-)
 create mode 100644 hpobench/benchmarks/ml/lr_benchmark.py

diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index 1e64edc9..d31a8bed 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -2,4 +2,5 @@
 from .svm_benchmark import SVMBenchmark
 from .rf_benchmark import RandomForestBenchmark
 from .xgboost_benchmark import XGBoostBenchmark
-from .histgb_benchmark import HistGBBenchmark
\ No newline at end of file
+from .histgb_benchmark import HistGBBenchmark
+from .lr_benchmark import LRBenchmark
\ No newline at end of file
diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index a803aeea..b2bb238f 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -63,7 +63,6 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
         if fidelity_choice == 0:
             # only subsample as fidelity
             ntrees = CS.Constant('n_estimators', value=100)
@@ -78,7 +77,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             # only subsample as fidelity
             ntrees = CS.Constant('n_estimators', value=100)
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         else:
             # both n_estimators and subsample as fidelities
@@ -86,7 +85,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
                 'n_estimators', lower=2, upper=100, default_value=10, log=False
             )
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs
diff --git a/hpobench/benchmarks/ml/lr_benchmark.py b/hpobench/benchmarks/ml/lr_benchmark.py
new file mode 100644
index 00000000..c9fd4c9b
--- /dev/null
+++ b/hpobench/benchmarks/ml/lr_benchmark.py
@@ -0,0 +1,83 @@
+import ConfigSpace as CS
+from typing import Union, List, Dict
+
+from sklearn.linear_model import SGDClassifier
+
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+
+
+class LRBenchmark(MLBenchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            data_path: Union[str, None] = None
+    ):
+        super(LRBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice, data_path)
+        self.cache_size = 200
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+        cs.add_hyperparameters([
+            CS.UniformFloatHyperparameter(
+                "alpha", 10**-5, 10**4, log=True, default_value=1.0
+            ),
+            CS.UniformFloatHyperparameter(
+                "eta0", 2**-10, 1, log=True, default_value=0.3
+            )
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=None):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        For SVM, only a single fidelity exists, i.e., subsample fraction.
+        if fidelity_choice == 0
+            uses the entire data (subsample=1), reflecting the black-box setup
+        else
+            parameterizes the fraction of data to subsample
+
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+
+        if fidelity_choice == 0:
+            iter = CS.Constant('iter', value=1000)
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 1:
+            iter = CS.UniformIntegerHyperparameter(
+                'iter', lower=100, upper=10000, default_value=100, log=False
+            )
+            subsample = CS.Constant('subsample', value=1)
+        elif fidelity_choice == 2:
+            iter = CS.Constant('iter', value=1000)
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
+            )
+        else:
+            iter = CS.UniformIntegerHyperparameter(
+                'iter', lower=100, upper=10000, default_value=100, log=False
+            )
+            subsample = CS.UniformFloatHyperparameter(
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
+            )
+        z_cs.add_hyperparameters([iter, subsample])
+        return z_cs
+
+    def init_model(self, config, fidelity=None, rng=None):
+        # initializing model
+        rng = self.rng if rng is None else rng
+        config = config.get_dictionary()
+        model = SGDClassifier(
+            **config,
+            loss="log",
+            max_iter=fidelity["iter"],
+            learning_rate="invscaling",
+            random_state=rng
+        )
+        return model
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index e6cfa8ba..1972a226 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -56,7 +56,6 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
         if fidelity_choice == 0:
             # only subsample as fidelity
             ntrees = CS.Constant('n_estimators', value=100)
@@ -71,7 +70,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             # only subsample as fidelity
             ntrees = CS.Constant('n_estimators', value=100)
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         else:
             # both n_estimators and subsample as fidelities
@@ -79,7 +78,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
                 'n_estimators', lower=2, upper=100, default_value=10, log=False
             )
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index fc541567..784a91b9 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -50,10 +50,8 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
         if fidelity_choice == 0:
             subsample = CS.Constant('subsample', value=1)
         else:
-            # TODO: dynamically adapt based on 1/512 and lower_bound_train_size and set log=True
-            lower = 0.1
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=lower, upper=1, default_value=0.33, log=False
+                'subsample', lower=0.1, upper=1, default_value=0.33, log=False
             )
         z_cs.add_hyperparameter(subsample)
         return z_cs
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index 5efe56ed..f4680854 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -69,7 +69,6 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-        subsample_lower_bound = np.max((0.1, (0.1 or self.lower_bound_train_size)))
         if fidelity_choice == 0:
             # only subsample as fidelity
             ntrees = CS.Constant('n_estimators', value=100)
@@ -84,7 +83,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             # only subsample as fidelity
             ntrees = CS.Constant('n_estimators', value=100)
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         else:
             # both n_estimators and subsample as fidelities
@@ -92,7 +91,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
                 'n_estimators', lower=2, upper=100, default_value=10, log=False
             )
             subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=subsample_lower_bound, upper=1, default_value=1, log=False
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs

From 9c6dcdb4f926bd38be91d6f8cddb5e39e93db808 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 19 Jul 2021 14:55:31 +0200
Subject: [PATCH 31/95] Standardizing fidelity space definitions

---
 hpobench/benchmarks/ml/histgb_benchmark.py  | 42 +++++++++++---------
 hpobench/benchmarks/ml/lr_benchmark.py      | 43 ++++++++++++---------
 hpobench/benchmarks/ml/rf_benchmark.py      | 42 +++++++++++---------
 hpobench/benchmarks/ml/svm_benchmark.py     | 15 ++++---
 hpobench/benchmarks/ml/xgboost_benchmark.py | 42 +++++++++++---------
 5 files changed, 104 insertions(+), 80 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index b2bb238f..93b5d908 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -63,30 +63,34 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-        if fidelity_choice == 0:
-            # only subsample as fidelity
-            ntrees = CS.Constant('n_estimators', value=100)
-            subsample = CS.Constant('subsample', value=1)
-        elif fidelity_choice == 1:
-            # only n_estimators as fidelity
-            ntrees = CS.UniformIntegerHyperparameter(
+        fidelity1 = dict(
+            fixed=CS.Constant('n_estimators', value=100),
+            variable=CS.UniformIntegerHyperparameter(
                 'n_estimators', lower=2, upper=100, default_value=10, log=False
             )
-            subsample = CS.Constant('subsample', value=1)
-        elif fidelity_choice == 2:
-            # only subsample as fidelity
-            ntrees = CS.Constant('n_estimators', value=100)
-            subsample = CS.UniformFloatHyperparameter(
+        )
+        fidelity2 = dict(
+            fixed=CS.Constant('subsample', value=1),
+            variable=CS.UniformFloatHyperparameter(
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
+        )
+        if fidelity_choice == 0:
+            # black-box setting (full fidelity)
+            ntrees = fidelity1["fixed"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 1:
+            # gray-box setting (multi-fidelity) - ntrees
+            ntrees = fidelity1["variable"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 2:
+            # gray-box setting (multi-fidelity) - data subsample
+            ntrees = fidelity1["fixed"]
+            subsample = fidelity2["variable"]
         else:
-            # both n_estimators and subsample as fidelities
-            ntrees = CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=2, upper=100, default_value=10, log=False
-            )
-            subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=0.1, upper=1, default_value=1, log=False
-            )
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            ntrees = fidelity1["variable"]
+            subsample = fidelity2["variable"]
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs
 
diff --git a/hpobench/benchmarks/ml/lr_benchmark.py b/hpobench/benchmarks/ml/lr_benchmark.py
index c9fd4c9b..cdbdf33d 100644
--- a/hpobench/benchmarks/ml/lr_benchmark.py
+++ b/hpobench/benchmarks/ml/lr_benchmark.py
@@ -16,7 +16,7 @@ def __init__(
             data_path: Union[str, None] = None
     ):
         super(LRBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice, data_path)
-        self.cache_size = 200
+        self.cache_size = 500
 
     @staticmethod
     def get_configuration_space(seed=None):
@@ -45,27 +45,34 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
 
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-
-        if fidelity_choice == 0:
-            iter = CS.Constant('iter', value=1000)
-            subsample = CS.Constant('subsample', value=1)
-        elif fidelity_choice == 1:
-            iter = CS.UniformIntegerHyperparameter(
-                'iter', lower=100, upper=10000, default_value=100, log=False
+        fidelity1 = dict(
+            fixed=CS.Constant('iter', value=1000),
+            variable=CS.UniformIntegerHyperparameter(
+                'iter', lower=10, upper=1000, default_value=100, log=False
             )
-            subsample = CS.Constant('subsample', value=1)
-        elif fidelity_choice == 2:
-            iter = CS.Constant('iter', value=1000)
-            subsample = CS.UniformFloatHyperparameter(
+        )
+        fidelity2 = dict(
+            fixed=CS.Constant('subsample', value=1),
+            variable=CS.UniformFloatHyperparameter(
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
+        )
+        if fidelity_choice == 0:
+            # black-box setting (full fidelity)
+            iter = fidelity1["fixed"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 1:
+            # gray-box setting (multi-fidelity) - iterations
+            iter = fidelity1["variable"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 2:
+            # gray-box setting (multi-fidelity) - data subsample
+            iter = fidelity1["fixed"]
+            subsample = fidelity2["variable"]
         else:
-            iter = CS.UniformIntegerHyperparameter(
-                'iter', lower=100, upper=10000, default_value=100, log=False
-            )
-            subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=0.1, upper=1, default_value=1, log=False
-            )
+            # gray-box setting (multi-multi-fidelity) - iterations + data subsample
+            iter = fidelity1["variable"]
+            subsample = fidelity2["variable"]
         z_cs.add_hyperparameters([iter, subsample])
         return z_cs
 
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 1972a226..9fa8416e 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -56,30 +56,34 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-        if fidelity_choice == 0:
-            # only subsample as fidelity
-            ntrees = CS.Constant('n_estimators', value=100)
-            subsample = CS.Constant('subsample', value=1)
-        elif fidelity_choice == 1:
-            # only n_estimators as fidelity
-            ntrees = CS.UniformIntegerHyperparameter(
+        fidelity1 = dict(
+            fixed=CS.Constant('n_estimators', value=100),
+            variable=CS.UniformIntegerHyperparameter(
                 'n_estimators', lower=2, upper=100, default_value=10, log=False
             )
-            subsample = CS.Constant('subsample', value=1)
-        elif fidelity_choice == 2:
-            # only subsample as fidelity
-            ntrees = CS.Constant('n_estimators', value=100)
-            subsample = CS.UniformFloatHyperparameter(
+        )
+        fidelity2 = dict(
+            fixed=CS.Constant('subsample', value=1),
+            variable=CS.UniformFloatHyperparameter(
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
+        )
+        if fidelity_choice == 0:
+            # black-box setting (full fidelity)
+            ntrees = fidelity1["fixed"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 1:
+            # gray-box setting (multi-fidelity) - ntrees
+            ntrees = fidelity1["variable"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 2:
+            # gray-box setting (multi-fidelity) - data subsample
+            ntrees = fidelity1["fixed"]
+            subsample = fidelity2["variable"]
         else:
-            # both n_estimators and subsample as fidelities
-            ntrees = CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=2, upper=100, default_value=10, log=False
-            )
-            subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=0.1, upper=1, default_value=1, log=False
-            )
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            ntrees = fidelity1["variable"]
+            subsample = fidelity2["variable"]
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs
 
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index 784a91b9..267620b4 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -46,13 +46,18 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
 
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-
-        if fidelity_choice == 0:
-            subsample = CS.Constant('subsample', value=1)
-        else:
-            subsample = CS.UniformFloatHyperparameter(
+        fidelity = dict(
+            fixed=CS.Constant('subsample', value=1),
+            variable=CS.UniformFloatHyperparameter(
                 'subsample', lower=0.1, upper=1, default_value=0.33, log=False
             )
+        )
+        if fidelity_choice == 0:
+            # black-box setting (full fidelity)
+            subsample = fidelity["fixed"]
+        else:
+            # gray-box setting (multi-fidelity) - data subsample
+            subsample = fidelity["variable"]
         z_cs.add_hyperparameter(subsample)
         return z_cs
 
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index f4680854..dc4a4621 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -69,30 +69,34 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
         z_cs = CS.ConfigurationSpace(seed=seed)
-        if fidelity_choice == 0:
-            # only subsample as fidelity
-            ntrees = CS.Constant('n_estimators', value=100)
-            subsample = CS.Constant('subsample', value=1)
-        elif fidelity_choice == 1:
-            # only n_estimators as fidelity
-            ntrees = CS.UniformIntegerHyperparameter(
+        fidelity1 = dict(
+            fixed=CS.Constant('n_estimators', value=100),
+            variable=CS.UniformIntegerHyperparameter(
                 'n_estimators', lower=2, upper=100, default_value=10, log=False
             )
-            subsample = CS.Constant('subsample', value=1)
-        elif fidelity_choice == 2:
-            # only subsample as fidelity
-            ntrees = CS.Constant('n_estimators', value=100)
-            subsample = CS.UniformFloatHyperparameter(
+        )
+        fidelity2 = dict(
+            fixed=CS.Constant('subsample', value=1),
+            variable=CS.UniformFloatHyperparameter(
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
+        )
+        if fidelity_choice == 0:
+            # black-box setting (full fidelity)
+            ntrees = fidelity1["fixed"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 1:
+            # gray-box setting (multi-fidelity) - ntrees
+            ntrees = fidelity1["variable"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 2:
+            # gray-box setting (multi-fidelity) - data subsample
+            ntrees = fidelity1["fixed"]
+            subsample = fidelity2["variable"]
         else:
-            # both n_estimators and subsample as fidelities
-            ntrees = CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=2, upper=100, default_value=10, log=False
-            )
-            subsample = CS.UniformFloatHyperparameter(
-                'subsample', lower=0.1, upper=1, default_value=1, log=False
-            )
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            ntrees = fidelity1["variable"]
+            subsample = fidelity2["variable"]
         z_cs.add_hyperparameters([ntrees, subsample])
         return z_cs
 

From 74b6919b30db1fb32887a352fedf4494c84e6cfc Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 19 Jul 2021 18:39:08 +0200
Subject: [PATCH 32/95] Standardizing HPs + Adding NN space

---
 hpobench/benchmarks/ml/__init__.py          |   3 +-
 hpobench/benchmarks/ml/histgb_benchmark.py  |  24 +--
 hpobench/benchmarks/ml/nn_benchmark.py      | 174 ++++++++++++++++++++
 hpobench/benchmarks/ml/rf_benchmark.py      |   2 +-
 hpobench/benchmarks/ml/xgboost_benchmark.py |   4 +-
 5 files changed, 185 insertions(+), 22 deletions(-)
 create mode 100644 hpobench/benchmarks/ml/nn_benchmark.py

diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index d31a8bed..37d5cd33 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -3,4 +3,5 @@
 from .rf_benchmark import RandomForestBenchmark
 from .xgboost_benchmark import XGBoostBenchmark
 from .histgb_benchmark import HistGBBenchmark
-from .lr_benchmark import LRBenchmark
\ No newline at end of file
+from .lr_benchmark import LRBenchmark
+from .nn_benchmark import NNBenchmark
\ No newline at end of file
diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 93b5d908..ba2a4112 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -35,17 +35,12 @@ def get_configuration_space(seed=None):
             CS.UniformIntegerHyperparameter(
                 'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
             ),
-            #TODO: fix lr value range error in map_to_config()
             CS.UniformFloatHyperparameter(
-                'learning_rate', lower=1e-5, upper=1e-1, default_value=0.1, log=True
+                'learning_rate', lower=2**-10, upper=1, default_value=0.3, log=True
             ),
-            #TODO: find best way to encode l2 reg. since log params cannot have 0 as exact bound
-            # scales the regularization parameter by using it as a power of 10
-            # such that the range of the parameter becomes {0, 1e-7, 1e-6, ..., 1e-1}
-            # where 10 ** 0 is enforced to be 0 (no regularization)
-            CS.UniformIntegerHyperparameter(
-                'l2_regularization', lower=-7, upper=0, default_value=0, log=False
-            )  # value of 1 indicates 0 regularization
+            CS.UniformFloatHyperparameter(
+                'l2_regularization', lower=2**-10, upper=2**10, default_value=0.1, log=True
+            )
         ])
         return cs
 
@@ -66,7 +61,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('n_estimators', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=2, upper=100, default_value=10, log=False
+                'n_estimators', lower=1, upper=128, default_value=10, log=False
             )
         )
         fidelity2 = dict(
@@ -98,15 +93,8 @@ def init_model(self, config, fidelity=None, rng=None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
-        config = deepcopy(config).get_dictionary()
-        l2 = config.pop("l2_regularization")
-        l2 = 0 if l2 == 1 else 10 ** l2
-        # TODO: decide on encoding of learning rate
-        #TODO: allow non-encoded categoricals?
-        #TODO: early stopping set to False?
         model = HistGradientBoostingClassifier(
-            **config,
-            l2_regularization=l2,
+            **config.get_dictionary(),
             max_iter=fidelity['n_estimators'],  # a fidelity being used during initialization
             early_stopping=False,
             random_state=rng
diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py
new file mode 100644
index 00000000..6a2deb73
--- /dev/null
+++ b/hpobench/benchmarks/ml/nn_benchmark.py
@@ -0,0 +1,174 @@
+import numpy as np
+import ConfigSpace as CS
+from copy import deepcopy
+from typing import Union, Tuple
+from sklearn.neural_network import MLPClassifier
+
+from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+
+
+class NNBenchmark(MLBenchmark):
+    def __init__(
+            self,
+            task_id: Union[int, None] = None,
+            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            valid_size: float = 0.33,
+            fidelity_choice: int = 1,
+            data_path: Union[str, None] = None
+    ):
+        super(NNBenchmark, self).__init__(
+            task_id, seed, valid_size, fidelity_choice, data_path
+        )
+        # fixing layers in the architecture
+        self.n_layers = 5
+        pass
+
+    @staticmethod
+    def get_configuration_space(seed=None):
+        """Parameter space to be optimized --- contains the hyperparameters
+        """
+        cs = CS.ConfigurationSpace(seed=seed)
+
+        cs.add_hyperparameters([
+            CS.CategoricalHyperparameter(
+                'shape', default_value="funnel",
+                choices=["funnel", "long_funnel", "rhombus", "diamond", "hexagon",
+                         "brick", "triangle", "stairs"]
+            ),
+            CS.OrdinalHyperparameter(
+                'max_hidden_dim', sequence=[64, 128, 256, 512, 1024], default_value=128
+            ),
+            CS.UniformIntegerHyperparameter(
+                'batch_size', lower=4, upper=128, default_value=16, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'learning_rate_init', lower=2**-10, upper=1, default_value=0.3, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'momentum', lower=0, upper=1, default_value=0.9, log=False
+            ),
+        ])
+        return cs
+
+    @staticmethod
+    def get_fidelity_space(seed=None, fidelity_choice=1):
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of epochs (max_iter)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        z_cs = CS.ConfigurationSpace(seed=seed)
+        fidelity1 = dict(
+            fixed=CS.Constant('iter', value=100),
+            variable=CS.UniformIntegerHyperparameter(
+                'iter', lower=3, upper=30, default_value=50, log=False
+            )
+        )
+        fidelity2 = dict(
+            fixed=CS.Constant('subsample', value=1),
+            variable=CS.UniformFloatHyperparameter(
+                'subsample', lower=0.1, upper=1, default_value=1, log=False
+            )
+        )
+        if fidelity_choice == 0:
+            # black-box setting (full fidelity)
+            iter = fidelity1["fixed"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 1:
+            # gray-box setting (multi-fidelity) - epochs/iteration
+            iter = fidelity1["variable"]
+            subsample = fidelity2["fixed"]
+        elif fidelity_choice == 2:
+            # gray-box setting (multi-fidelity) - data subsample
+            iter = fidelity1["fixed"]
+            subsample = fidelity2["variable"]
+        else:
+            # gray-box setting (multi-multi-fidelity) - epochs + data subsample
+            iter = fidelity1["variable"]
+            subsample = fidelity2["variable"]
+        z_cs.add_hyperparameters([iter, subsample])
+        return z_cs
+
+    def _get_architecture(self, shape: str, max_hidden_size: int) -> Tuple:
+        # https://mikkokotila.github.io/slate/#shapes
+        arch = []
+        if shape == "funnel":
+            for i in range(self.n_layers):
+                arch.append(max_hidden_size)
+                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
+        elif shape == "long_funnel":
+            brick_arch_len = np.ceil(self.n_layers / 2).astype(int)
+            for i in range(brick_arch_len):
+                arch.append(max_hidden_size)
+            for i in range(self.n_layers - brick_arch_len):
+                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
+                arch.append(max_hidden_size)
+        elif shape == "rhombus":
+            arch.append(max_hidden_size)
+            rhombus_len = self.n_layers // 2
+            _arch = []
+            for i in range(rhombus_len):
+                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
+                _arch.append(max_hidden_size)
+            arch = np.flip(_arch).tolist() + arch + _arch
+        elif shape == "diamond":
+            # open rhombus
+            arch.append(max_hidden_size)
+            rhombus_len = self.n_layers // 2
+            second_max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
+            _arch = []
+            for i in range(rhombus_len):
+                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
+                _arch.append(max_hidden_size)
+            arch = [second_max_hidden_size] * rhombus_len + arch + _arch
+        elif shape == "hexagon":
+            if self.n_layers % 2 == 0:
+                arch.append(max_hidden_size)
+            half_len = np.ceil(self.n_layers / 2).astype(int)
+            _arch = []
+            for i in range(half_len):
+                _arch.append(max_hidden_size)
+                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
+            arch = _arch[::-1] + arch + _arch[:-1]
+        elif shape == "triangle":
+            # reverse funnel
+            for i in range(self.n_layers):
+                arch.append(max_hidden_size)
+                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
+            arch = arch[::-1]
+        elif shape == "stairs":
+            for i in range(1, self.n_layers+1):
+                arch.append(max_hidden_size)
+                if i % 2 == 0 or self.n_layers < 4:
+                    max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
+        else:
+            # default to brick design
+            arch = tuple([max_hidden_size] * self.n_layers)
+        arch = tuple(arch)
+        return arch
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
+        """
+        rng = self.rng if rng is None else rng
+        config = deepcopy(config.get_dictionary())
+        shape = config["shape"]
+        max_hidden_dim = config["max_hidden_dim"]
+        config.pop("shape")
+        config.pop("max_hidden_dim")
+        model = MLPClassifier(
+            **config,
+            hidden_layer_sizes=self._get_architecture(shape, max_hidden_dim),
+            activation="relu",
+            solver="sgd",
+            learning_rate="invscaling",
+            max_iter=fidelity['iter'],  # a fidelity being used during initialization
+            random_state=rng
+        )
+        return model
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 9fa8416e..a57b7726 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -59,7 +59,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('n_estimators', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=2, upper=100, default_value=10, log=False
+                'n_estimators', lower=1, upper=128, default_value=10, log=False
             )
         )
         fidelity2 = dict(
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index dc4a4621..4c77a92e 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -32,7 +32,7 @@ def get_configuration_space(seed=None):
                 'eta', lower=2**-10, upper=1., default_value=0.3, log=True
             ),  # learning rate
             CS.UniformIntegerHyperparameter(
-                'max_depth', lower=1, upper=15, default_value=6, log=False
+                'max_depth', lower=1, upper=15, default_value=2, log=False
             ),
             CS.UniformFloatHyperparameter(
                 'min_child_weight', lower=1., upper=2**7., default_value=1., log=True
@@ -72,7 +72,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('n_estimators', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=2, upper=100, default_value=10, log=False
+                'n_estimators', lower=1, upper=128, default_value=10, log=False
             )
         )
         fidelity2 = dict(

From 785055eccb6480586f512f6d2ace1625ba32591e Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 19 Jul 2021 18:40:24 +0200
Subject: [PATCH 33/95] Small placeholder for testing

---
 hpobench/benchmarks/ml/nn_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py
index 6a2deb73..0063cdc9 100644
--- a/hpobench/benchmarks/ml/nn_benchmark.py
+++ b/hpobench/benchmarks/ml/nn_benchmark.py
@@ -67,7 +67,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('iter', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'iter', lower=3, upper=30, default_value=50, log=False
+                'iter', lower=3, upper=30, default_value=30, log=False
             )
         )
         fidelity2 = dict(

From 0159a35ff4e2532bb9011326927eba77009b160a Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Tue, 20 Jul 2021 14:34:33 +0200
Subject: [PATCH 34/95] Updating NN HP space + Helper function for
 TabularBenchmark

---
 hpobench/benchmarks/ml/nn_benchmark.py      | 13 +++++++------
 hpobench/benchmarks/ml/tabular_benchmark.py | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py
index 0063cdc9..89aa115f 100644
--- a/hpobench/benchmarks/ml/nn_benchmark.py
+++ b/hpobench/benchmarks/ml/nn_benchmark.py
@@ -38,15 +38,15 @@ def get_configuration_space(seed=None):
             CS.OrdinalHyperparameter(
                 'max_hidden_dim', sequence=[64, 128, 256, 512, 1024], default_value=128
             ),
+            CS.UniformFloatHyperparameter(
+                'alpha', lower=10**-5, upper=10**4, default_value=10**-3, log=True
+            ),
             CS.UniformIntegerHyperparameter(
-                'batch_size', lower=4, upper=128, default_value=16, log=True
+                'batch_size', lower=4, upper=256, default_value=32, log=True
             ),
             CS.UniformFloatHyperparameter(
                 'learning_rate_init', lower=2**-10, upper=1, default_value=0.3, log=True
-            ),
-            CS.UniformFloatHyperparameter(
-                'momentum', lower=0, upper=1, default_value=0.9, log=False
-            ),
+            )
         ])
         return cs
 
@@ -67,7 +67,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('iter', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'iter', lower=3, upper=30, default_value=30, log=False
+                'iter', lower=3, upper=150, default_value=30, log=False
             )
         )
         fidelity2 = dict(
@@ -168,6 +168,7 @@ def init_model(self, config, fidelity=None, rng=None):
             activation="relu",
             solver="sgd",
             learning_rate="invscaling",
+            momentum=0.9,
             max_iter=fidelity['iter'],  # a fidelity being used during initialization
             random_state=rng
         )
diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index eff31523..ff5fcd8e 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -26,6 +26,18 @@ def _load_table(self, path):
             table = pickle.load(f)
         return table
 
+    def _get_model_name(self):
+        return self.exp_args["space"]
+
+    def _total_number_of_configurations(self, space: str="hyperparameters") -> int:
+        """ Returns the number of unique configurations in the parameter/fidelity space
+        """
+        count = 1
+        cs = self.x_cs if space == "hyperparameters" else self.z_cs
+        for hp in cs.get_hyperparameters():
+            count *= len(hp.sequence)
+        return count
+
     def get_hyperparameter_space(self, seed=None, original=False):
         cs = CS.ConfigurationSpace(seed=seed)
         if original:
@@ -57,6 +69,12 @@ def get_global_min(self, metric: str = "acc"):
             "Not a valid metric: {}".format(list(self.global_minimums.keys()))
         return self.global_minimums[metric]
 
+    def get_max_fidelity(self) -> Dict:
+        max_fidelity = dict()
+        for hp in self.z_cs.get_hyperparameters():
+            max_fidelity[hp.name] = np.sort(hp.sequence)[-1]
+        return max_fidelity
+
     def _objective(
             self,
             config: CS.Configuration,

From e9e097af3d094495c81abbe94c2984597e63273a Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Tue, 20 Jul 2021 20:11:49 +0200
Subject: [PATCH 35/95] Adding fidelity range retrieval utility to
 TabularBenchmark

---
 hpobench/benchmarks/ml/tabular_benchmark.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index ff5fcd8e..e0be2fc0 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -75,6 +75,13 @@ def get_max_fidelity(self) -> Dict:
             max_fidelity[hp.name] = np.sort(hp.sequence)[-1]
         return max_fidelity
 
+    def get_fidelity_range(self):
+        fidelities = []
+        for hp in self.z_cs.get_hyperparameters():
+            if not isinstance(hp, CS.Constant) and len(hp.sequence) > 1:
+                fidelities.append((hp.name, hp.sequence[0], hp.sequence[-1]))
+        return fidelities
+
     def _objective(
             self,
             config: CS.Configuration,

From 47971098ea3a9221d6839ff031be1970a3318933 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Wed, 21 Jul 2021 16:14:12 +0200
Subject: [PATCH 36/95] Enforcing subsample lower bound check inside objective

---
 hpobench/benchmarks/ml/ml_benchmark_template.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index dacb64db..cdde98ad 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -177,8 +177,8 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
         # validation set is fixed till this function is called again or explicitly altered
         valid_size = self.valid_size if valid_size is None else valid_size
         self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
-            train_x, train_y, test_size=valid_size,
-            shuffle=True, stratify=train_y, random_state=check_random_state(self.global_seed)
+            train_x, train_y, test_size=valid_size, shuffle=True, stratify=train_y,
+            random_state=check_random_state(self.global_seed)  # uses global seed for fixed splits
         )
 
         # preprocessor to handle missing values, categorical columns encodings,
@@ -200,7 +200,7 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
             ])
         )
         if verbose:
-            print("Shape of data pre-preprocessing: {}".format(train_X.shape))
+            print("Shape of data pre-preprocessing: {}".format(self.train_X.shape))
 
         # preprocessor fit only on the training set
         self.train_X = self.preprocessor.fit_transform(self.train_X)
@@ -219,7 +219,7 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
         self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
 
         if verbose:
-            print("Shape of data post-preprocessing: {}".format(train_X.shape), "\n")
+            print("Shape of data post-preprocessing: {}".format(self.train_X.shape), "\n")
 
         if verbose:
             print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
@@ -261,9 +261,13 @@ def _train_objective(self, config, fidelity, shuffle, rng, eval="valid"):
 
         # subsample here:
         # application of the other fidelity to the dataset that the model interfaces
+        if self.lower_bound_train_size is None:
+            self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
+            self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
+        subsample = np.max((fidelity['subsample'], self.lower_bound_train_size))
         train_idx = self.rng.choice(
             np.arange(len(train_X)), size=int(
-                fidelity['subsample'] * len(train_X)
+                subsample * len(train_X)
             )
         )
         # fitting the model with subsampled data

From dbb73278d067a163ef7c032c0c85095ec2091e8a Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 22 Jul 2021 00:55:00 +0200
Subject: [PATCH 37/95] Bug fix + adding precicion as metric

---
 .../benchmarks/ml/ml_benchmark_template.py    | 43 +++++++++----------
 hpobench/benchmarks/ml/svm_benchmark.py       |  2 +-
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index cdde98ad..bc169077 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -13,27 +13,23 @@
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import make_scorer, roc_auc_score, accuracy_score, f1_score, \
-    top_k_accuracy_score, balanced_accuracy_score
+from sklearn.metrics import make_scorer, accuracy_score, balanced_accuracy_score, \
+    precision_score, f1_score
 
 from hpobench.abstract_benchmark import AbstractBenchmark
 
 
 metrics = dict(
-    #TODO: decide on metrics generalized for different datasets
     acc=accuracy_score,
     bal_acc=balanced_accuracy_score,
     f1=f1_score,
-    # roc=roc_auc_score,
-    # topk=top_k_accuracy_score
+    precision=precision_score,
 )
 metrics_kwargs = dict(
-    #TODO: decide on metric parameters
     acc=dict(),
     bal_acc=dict(),
-    f1=dict(average="weighted"),
-    # roc=dict(average="weighted"),
-    # topk=dict()
+    f1=dict(average="macro", zero_division=0),
+    precision=dict(average="macro", zero_division=0),
 )
 
 
@@ -174,11 +170,11 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
         self.test_y = y.iloc[self.test_idx]
 
         # splitting training into training and validation
-        # validation set is fixed till this function is called again or explicitly altered
+        # validation set is fixed as per the global seed independent of the benchmark seed
         valid_size = self.valid_size if valid_size is None else valid_size
         self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
             train_x, train_y, test_size=valid_size, shuffle=True, stratify=train_y,
-            random_state=check_random_state(self.global_seed)  # uses global seed for fixed splits
+            random_state=check_random_state(self.global_seed)
         )
 
         # preprocessor to handle missing values, categorical columns encodings,
@@ -214,7 +210,6 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
 
         # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
         # use 10 times the number of classes as lower bound for the dataset fraction
-
         self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
         self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
 
@@ -228,7 +223,7 @@ def load_data_from_openml(self, valid_size=None, verbose=False):
             print("\nData loading complete!\n")
         return
 
-    def shuffle_data_idx(self, train_id=None, ng=None):
+    def shuffle_data_idx(self, train_idx=None, rng=None):
         rng = self.rng if rng is None else rng
         train_idx = self.train_idx if train_idx is None else train_idx
         rng.shuffle(train_idx)
@@ -311,11 +306,12 @@ def objective_function(
             _start = time.time()
             test_scores[k] = v(model, self.test_X, self.test_y)
             test_score_cost[k] = time.time() - _start
-        val_loss = 1 - test_scores["acc"]
+        test_loss = 1 - test_scores["acc"]
 
         info = {
             'train_loss': train_loss,
             'val_loss': val_loss,
+            'test_loss': test_loss,
             'model_cost': model_fit_time,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
@@ -330,7 +326,7 @@ def objective_function(
 
         return {
             'function_value': info['val_loss'],
-            'cost': model_fit_time + info['train_costs']['acc'] + info['val_costs']['acc'],
+            'cost': model_fit_time + info['val_costs']['acc'],
             'info': info
         }
 
@@ -370,16 +366,17 @@ def objective_function_test(
 
         return {
             'function_value': info['test_loss'],
-            'cost': model_fit_time + info['train_costs']['acc'] + info['test_costs']['acc'],
+            'cost': model_fit_time + info['test_costs']['acc'],
             'info': info
         }
 
     def get_meta_information(self):
         """ Returns the meta information for the benchmark """
-        return {'name': 'Support Vector Machine',
-                'shape of train data': self.x_train.shape,
-                'shape of test data': self.x_test.shape,
-                'shape of valid data': self.x_valid.shape,
-                'initial random seed': self.rng,
-                'task_id': self.task_id
-                }
+        return {
+            'name': 'Support Vector Machine',
+            'shape of train data': self.train_X.shape,
+            'shape of test data': self.test_X.shape,
+            'shape of valid data': self.valid_X.shape,
+            'initial random seed': self.seed,
+            'task_id': self.task_id
+        }
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index 267620b4..61e9840d 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -1,5 +1,5 @@
 import ConfigSpace as CS
-from typing import Union, List, Dict
+from typing import Union
 
 from sklearn.svm import SVC
 

From 7d5ca578bafdc2f97c0d27550110e3d2474c39df Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 22 Jul 2021 21:25:02 +0200
Subject: [PATCH 38/95] Fixing param spaces and model building for LR, SVM

---
 hpobench/benchmarks/ml/lr_benchmark.py  | 21 +++++++++++----------
 hpobench/benchmarks/ml/svm_benchmark.py |  8 ++++----
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/hpobench/benchmarks/ml/lr_benchmark.py b/hpobench/benchmarks/ml/lr_benchmark.py
index cdbdf33d..de791aa6 100644
--- a/hpobench/benchmarks/ml/lr_benchmark.py
+++ b/hpobench/benchmarks/ml/lr_benchmark.py
@@ -25,10 +25,10 @@ def get_configuration_space(seed=None):
         cs = CS.ConfigurationSpace(seed=seed)
         cs.add_hyperparameters([
             CS.UniformFloatHyperparameter(
-                "alpha", 10**-5, 10**4, log=True, default_value=1.0
+                "alpha", 1e-5, 1, log=True, default_value=1e-3
             ),
             CS.UniformFloatHyperparameter(
-                "eta0", 2**-10, 1, log=True, default_value=0.3
+                "eta0", 1e-5, 1, log=True, default_value=1e-2
             )
         ])
         return cs
@@ -48,13 +48,13 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
         fidelity1 = dict(
             fixed=CS.Constant('iter', value=1000),
             variable=CS.UniformIntegerHyperparameter(
-                'iter', lower=10, upper=1000, default_value=100, log=False
+                'iter', lower=10, upper=1000, default_value=1000, log=False
             )
         )
         fidelity2 = dict(
-            fixed=CS.Constant('subsample', value=1),
+            fixed=CS.Constant('subsample', value=1.0),
             variable=CS.UniformFloatHyperparameter(
-                'subsample', lower=0.1, upper=1, default_value=1, log=False
+                'subsample', lower=0.1, upper=1.0, default_value=1.0, log=False
             )
         )
         if fidelity_choice == 0:
@@ -79,12 +79,13 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
     def init_model(self, config, fidelity=None, rng=None):
         # initializing model
         rng = self.rng if rng is None else rng
-        config = config.get_dictionary()
+        # https://scikit-learn.org/stable/modules/sgd.html
         model = SGDClassifier(
-            **config,
-            loss="log",
+            **config.get_dictionary(),
+            loss="log",  # performs Logistic Regression
             max_iter=fidelity["iter"],
-            learning_rate="invscaling",
-            random_state=rng
+            learning_rate="adaptive",
+            tol=None,
+            random_state=rng,
         )
         return model
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index 61e9840d..fe1afb66 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -23,13 +23,13 @@ def get_configuration_space(seed=None):
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
-        # https://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf (Section 3.2)
+        # https://jmlr.org/papers/volume20/18-444/18-444.pdf (Table 1)
         cs.add_hyperparameters([
             CS.UniformFloatHyperparameter(
-                "C", 2**-5, 2**15, log=True, default_value=1.0
+                "C", 2**-10, 2**10, log=True, default_value=1.0
             ),
             CS.UniformFloatHyperparameter(
-                "gamma", 2**-15, 2**3, log=True, default_value=0.1
+                "gamma", 2**-10, 2**10, log=True, default_value=0.1
             )
         ])
         return cs
@@ -49,7 +49,7 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
         fidelity = dict(
             fixed=CS.Constant('subsample', value=1),
             variable=CS.UniformFloatHyperparameter(
-                'subsample', lower=0.1, upper=1, default_value=0.33, log=False
+                'subsample', lower=0.1, upper=1.0, default_value=1.0, log=False
             )
         )
         if fidelity_choice == 0:

From a6d94bbddb14560840b219d44999f97ea6e2e67b Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 26 Jul 2021 21:58:01 +0200
Subject: [PATCH 39/95] TabularBenchmark edit to read compressed files and
 query a dataframe

---
 .../benchmarks/ml/ml_benchmark_template.py    |   3 +
 hpobench/benchmarks/ml/tabular_benchmark.py   | 119 +++++++++++-------
 2 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index bc169077..b8a66790 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -353,10 +353,13 @@ def objective_function_test(
 
         info = {
             'train_loss': train_loss,
+            'val_loss': None,
             'test_loss': test_loss,
             'model_cost': model_fit_time,
             'train_scores': train_scores,
             'train_costs': train_score_cost,
+            'val_scores': dict(),
+            'val_costs': dict(),
             'test_scores': test_scores,
             'test_costs': test_score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index e0be2fc0..528f2eef 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -1,6 +1,9 @@
 import os
 import glom
+import json
+import pickle
 import numpy as np
+import pandas as pd
 import ConfigSpace as CS
 import pickle5 as pickle
 from copy import deepcopy
@@ -9,25 +12,38 @@
 
 
 class TabularBenchmark:
-    def __init__(self, table_path: str, seed: Union[int, None] = None):
-        assert os.path.isfile(table_path), "Not a valid path: {}".format(table_path)
-        table = self._load_table(table_path)
+    def __init__(self, path: str, model: str, task_id: int, seed: Union[int, None] = None):
+        assert os.path.isdir(path), "Not a valid path: {}".format(path)
+        self.data_path = os.path.join(path, "{}_{}_data.parquet.gzip".format(model, task_id))
+        assert os.path.isfile(self.data_path)
+        self.config_path = os.path.join(path, "{}_{}_configs.pkl".format(model, task_id))
+        assert os.path.isfile(self.config_path)
+        self.exp_args_path = os.path.join(path, "{}_{}.json".format(model, task_id))
+        assert os.path.isfile(self.exp_args_path)
+
         self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
         self.rng = np.random.RandomState(self.seed)
-        self.exp_args = table['exp_args']
-        self.config_spaces = table['config_spaces']
+        self.table = self._load_parquet(self.data_path)
+        self.exp_args = self._load_json(self.exp_args_path)
+        self.config_spaces = self._load_pickle(self.config_path)
+
         self.x_cs = self.get_hyperparameter_space(seed=self.seed)
         self.z_cs = self.get_fidelity_space(seed=self.seed)
-        self.table = table['data']
-        self.global_minimums = table['global_min']
+        self.global_minimums = self.exp_args["global_min"]
 
-    def _load_table(self, path):
+    def _load_pickle(self, path):
         with open(path, "rb") as f:
-            table = pickle.load(f)
-        return table
+            data = pickle.load(f)
+        return data
 
-    def _get_model_name(self):
-        return self.exp_args["space"]
+    def _load_parquet(self, path):
+        data = pd.read_parquet(path)
+        return data
+
+    def _load_json(self, path):
+        with open(path, "r") as f:
+            data = json.load(f)
+        return data
 
     def _total_number_of_configurations(self, space: str="hyperparameters") -> int:
         """ Returns the number of unique configurations in the parameter/fidelity space
@@ -38,6 +54,9 @@ def _total_number_of_configurations(self, space: str="hyperparameters") -> int:
             count *= len(hp.sequence)
         return count
 
+    def _seeds_used(self):
+        return self.table.seed.unique().tolist()
+
     def get_hyperparameter_space(self, seed=None, original=False):
         cs = CS.ConfigurationSpace(seed=seed)
         if original:
@@ -82,51 +101,57 @@ def get_fidelity_range(self):
                 fidelities.append((hp.name, hp.sequence[0], hp.sequence[-1]))
         return fidelities
 
+    def _search_dataframe(self, row_dict, df):
+        # https://stackoverflow.com/a/46165056/8363967
+        mask = np.array([True] * df.shape[0])
+        for i, param in enumerate(df.drop("result", axis=1).columns):
+            mask *= df[param].values == row_dict[param]
+        idx = np.where(mask)
+        if len(idx) != 1:
+            return None
+        idx = idx[0][0]
+        result = df.iloc[idx]["result"]
+        return result
+
     def _objective(
             self,
             config: CS.Configuration,
             fidelity: CS.Configuration,
             seed: Union[int, None] = None,
             metric: Union[str, None] = "acc",
-            eval: Union[str] = "val"
+            evaluation: Union[str] = ""
     ) -> Dict:
         self.x_cs.check_configuration(config)
         self.z_cs.check_configuration(fidelity)
-        key_path = []
-        for name in np.sort(self.x_cs.get_hyperparameter_names()):
-            key_path.append(config[str(name)])
-        for name in np.sort(self.z_cs.get_hyperparameter_names()):
-            key_path.append(fidelity[str(name)])
-        val = glom.glom(self.table, glom.Path(*key_path), default=None)
-        if val is None:
-            raise ValueError(
-                "Invalid config-fidelity or not recorded in table!\n{}\n{}".format(config, fidelity)
-            )
-        seeds = list(val.keys())
         assert metric in list(metrics.keys()), \
             "metric not found among: {{{}}}".format(", ".join(list(metrics.keys())))
-        score_key = "{}_scores".format(eval)
-        cost_key = "{}_scores".format(eval)
-        if seed is None:
-            result = dict(function_value=0.0, cost=0.0, info=dict())
-            loss = []
-            costs = 0.0
-            info = dict()
-            for seed in seeds:
-                result = deepcopy(val[seed])
-                loss.append(1 - result["info"][score_key][metric])
-                costs += result["info"]["model_cost"] + result["info"][cost_key][metric]
-                info[seed] = result["info"]
-            loss = np.mean(loss)
-            result["function_value"] = loss
-            result["cost"] = costs
-            result["info"] = info
+        score_key = "{}_scores".format(evaluation)
+        cost_key = "{}_scores".format(evaluation)
+
+        key_path = dict()
+        for name in np.sort(self.x_cs.get_hyperparameter_names()):
+            key_path[str(name)] = config[str(name)]
+        for name in np.sort(self.z_cs.get_hyperparameter_names()):
+            key_path[str(name)] = fidelity[str(name)]
+
+        if seed is not None:
+            assert seed in self._seeds_used()
+            seeds = [seed]
         else:
-            assert seed in list(val.keys()), \
-                "seed not found among: {{{}}}".format(", ".join([str(s) for s in seeds]))
-            result = deepcopy(val[seed])
-            result["function_value"] = 1 - result["info"][score_key][metric]
-            result["cost"] = result["info"]["model_cost"] + result["info"][cost_key][metric]
+            seeds = self._seeds_used()
+
+        loss = []
+        costs = 0.0
+        info = dict()
+        for seed in seeds:
+            key_path["seed"] = seed
+            res = self._search_dataframe(key_path, self.table)
+            loss.append(1 - res["info"][score_key][metric])
+            costs += res["info"]["model_cost"] + res["info"][cost_key][metric]
+            info[seed] = res["info"]
+            key_path.pop("seed")
+        loss = np.mean(loss)
+        result = dict(function_value=loss, cost=costs, info=info)
         return result
 
     def objective_function(
@@ -136,7 +161,7 @@ def objective_function(
             seed: Union[int, None] = None,
             metric: Union[str, None] = "acc"
     ) -> Dict:
-        result = self._objective(config, fidelity, seed, metric, eval="val")
+        result = self._objective(config, fidelity, seed, metric, evaluation="val")
         return result
 
     def objective_function_test(
@@ -146,5 +171,5 @@ def objective_function_test(
             seed: Union[int, None] = None,
             metric: Union[str, None] = "acc"
     ) -> Dict:
-        result = self._objective(config, fidelity, seed, metric, eval="test")
+        result = self._objective(config, fidelity, seed, metric, evaluation="test")
         return result

From 93b69081e38dbb9ddae4a1a9e2505ea5ca8c6bbf Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Tue, 27 Jul 2021 02:41:16 +0200
Subject: [PATCH 40/95] Not evaluating training set to save time

---
 hpobench/benchmarks/ml/ml_benchmark_template.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index b8a66790..d5cd6229 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -274,7 +274,7 @@ def _train_objective(self, config, fidelity, shuffle, rng, eval="valid"):
         score_cost = dict()
         for k, v in self.scorers.items():
             _start = time.time()
-            scores[k] = v(model, train_X, train_y)
+            scores[k] = 0  # v(model, train_X, train_y)
             score_cost[k] = time.time() - _start
         train_loss = 1 - scores["acc"]
         return model, model_fit_time, train_loss, scores, score_cost

From 8164eb0e21748d80532f2c74967536f5a36870fe Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Tue, 27 Jul 2021 21:26:36 +0200
Subject: [PATCH 41/95] Fidelity change for trees + NN space change

---
 hpobench/benchmarks/ml/histgb_benchmark.py    |  2 +-
 .../benchmarks/ml/ml_benchmark_template.py    | 15 +--
 hpobench/benchmarks/ml/nn_benchmark.py        | 95 +++----------------
 hpobench/benchmarks/ml/rf_benchmark.py        | 15 +--
 hpobench/benchmarks/ml/xgboost_benchmark.py   |  4 +-
 5 files changed, 35 insertions(+), 96 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index ba2a4112..1860ff48 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -61,7 +61,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('n_estimators', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=1, upper=128, default_value=10, log=False
+                'n_estimators', lower=16, upper=512, default_value=512, log=False
             )
         )
         fidelity2 = dict(
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/benchmarks/ml/ml_benchmark_template.py
index d5cd6229..83b39957 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/benchmarks/ml/ml_benchmark_template.py
@@ -234,7 +234,7 @@ def init_model(self, config, fidelity=None, rng=None):
         """
         raise NotImplementedError()
 
-    def _train_objective(self, config, fidelity, shuffle, rng, eval="valid"):
+    def _train_objective(self, config, fidelity, shuffle, rng, evaluation="valid"):
         # initializing model
         model = self.init_model(config, fidelity, rng)
 
@@ -273,9 +273,12 @@ def _train_objective(self, config, fidelity, shuffle, rng, eval="valid"):
         scores = dict()
         score_cost = dict()
         for k, v in self.scorers.items():
-            _start = time.time()
-            scores[k] = 0  # v(model, train_X, train_y)
-            score_cost[k] = time.time() - _start
+            scores[k] = 0.0
+            score_cost[k] = 0.0
+            if evaluation == "test":
+                _start = time.time()
+                scores[k] = v(model, train_X, train_y)
+                score_cost[k] = time.time() - _start
         train_loss = 1 - scores["acc"]
         return model, model_fit_time, train_loss, scores, score_cost
 
@@ -290,7 +293,7 @@ def objective_function(
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
         """
         model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
-            configuration, fidelity, shuffle, rng
+            configuration, fidelity, shuffle, rng, evaluation="val"
         )
         val_scores = dict()
         val_score_cost = dict()
@@ -341,7 +344,7 @@ def objective_function_test(
         """Function that evaluates a 'config' on a 'fidelity' on the test set
         """
         model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
-            configuration, fidelity, shuffle, rng, eval="test"
+            configuration, fidelity, shuffle, rng, evaluation="test"
         )
         test_scores = dict()
         test_score_cost = dict()
diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py
index 89aa115f..0bec70e4 100644
--- a/hpobench/benchmarks/ml/nn_benchmark.py
+++ b/hpobench/benchmarks/ml/nn_benchmark.py
@@ -19,8 +19,6 @@ def __init__(
         super(NNBenchmark, self).__init__(
             task_id, seed, valid_size, fidelity_choice, data_path
         )
-        # fixing layers in the architecture
-        self.n_layers = 5
         pass
 
     @staticmethod
@@ -30,22 +28,16 @@ def get_configuration_space(seed=None):
         cs = CS.ConfigurationSpace(seed=seed)
 
         cs.add_hyperparameters([
-            CS.CategoricalHyperparameter(
-                'shape', default_value="funnel",
-                choices=["funnel", "long_funnel", "rhombus", "diamond", "hexagon",
-                         "brick", "triangle", "stairs"]
-            ),
-            CS.OrdinalHyperparameter(
-                'max_hidden_dim', sequence=[64, 128, 256, 512, 1024], default_value=128
-            ),
-            CS.UniformFloatHyperparameter(
-                'alpha', lower=10**-5, upper=10**4, default_value=10**-3, log=True
-            ),
+            CS.UniformIntegerHyperparameter('depth', default_value=3, lower=1, upper=3),
+            CS.UniformIntegerHyperparameter('width', default_value=64, lower=16, upper=256),
             CS.UniformIntegerHyperparameter(
                 'batch_size', lower=4, upper=256, default_value=32, log=True
             ),
             CS.UniformFloatHyperparameter(
-                'learning_rate_init', lower=2**-10, upper=1, default_value=0.3, log=True
+                'alpha', lower=10**-8, upper=1, default_value=10**-3, log=True
+            ),
+            CS.UniformFloatHyperparameter(
+                'learning_rate_init', lower=10**-5, upper=1, default_value=10**-3, log=True
             )
         ])
         return cs
@@ -67,7 +59,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('iter', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'iter', lower=3, upper=150, default_value=30, log=False
+                'iter', lower=3, upper=243, default_value=243, log=False
             )
         )
         fidelity2 = dict(
@@ -95,80 +87,21 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         z_cs.add_hyperparameters([iter, subsample])
         return z_cs
 
-    def _get_architecture(self, shape: str, max_hidden_size: int) -> Tuple:
-        # https://mikkokotila.github.io/slate/#shapes
-        arch = []
-        if shape == "funnel":
-            for i in range(self.n_layers):
-                arch.append(max_hidden_size)
-                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
-        elif shape == "long_funnel":
-            brick_arch_len = np.ceil(self.n_layers / 2).astype(int)
-            for i in range(brick_arch_len):
-                arch.append(max_hidden_size)
-            for i in range(self.n_layers - brick_arch_len):
-                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
-                arch.append(max_hidden_size)
-        elif shape == "rhombus":
-            arch.append(max_hidden_size)
-            rhombus_len = self.n_layers // 2
-            _arch = []
-            for i in range(rhombus_len):
-                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
-                _arch.append(max_hidden_size)
-            arch = np.flip(_arch).tolist() + arch + _arch
-        elif shape == "diamond":
-            # open rhombus
-            arch.append(max_hidden_size)
-            rhombus_len = self.n_layers // 2
-            second_max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
-            _arch = []
-            for i in range(rhombus_len):
-                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
-                _arch.append(max_hidden_size)
-            arch = [second_max_hidden_size] * rhombus_len + arch + _arch
-        elif shape == "hexagon":
-            if self.n_layers % 2 == 0:
-                arch.append(max_hidden_size)
-            half_len = np.ceil(self.n_layers / 2).astype(int)
-            _arch = []
-            for i in range(half_len):
-                _arch.append(max_hidden_size)
-                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
-            arch = _arch[::-1] + arch + _arch[:-1]
-        elif shape == "triangle":
-            # reverse funnel
-            for i in range(self.n_layers):
-                arch.append(max_hidden_size)
-                max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
-            arch = arch[::-1]
-        elif shape == "stairs":
-            for i in range(1, self.n_layers+1):
-                arch.append(max_hidden_size)
-                if i % 2 == 0 or self.n_layers < 4:
-                    max_hidden_size = np.ceil(max_hidden_size / 2).astype(int)
-        else:
-            # default to brick design
-            arch = tuple([max_hidden_size] * self.n_layers)
-        arch = tuple(arch)
-        return arch
-
     def init_model(self, config, fidelity=None, rng=None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
         config = deepcopy(config.get_dictionary())
-        shape = config["shape"]
-        max_hidden_dim = config["max_hidden_dim"]
-        config.pop("shape")
-        config.pop("max_hidden_dim")
+        depth = config["depth"]
+        width = config["width"]
+        config.pop("depth")
+        config.pop("width")
+        hidden_layers = [width] * depth
         model = MLPClassifier(
             **config,
-            hidden_layer_sizes=self._get_architecture(shape, max_hidden_dim),
+            hidden_layer_sizes=hidden_layers,
             activation="relu",
-            solver="sgd",
-            learning_rate="invscaling",
-            momentum=0.9,
+            solver="adam",
             max_iter=fidelity['iter'],  # a fidelity being used during initialization
             random_state=rng
         )
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index a57b7726..0cebcdf5 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -28,16 +28,19 @@ def get_configuration_space(seed=None):
 
         cs.add_hyperparameters([
             CS.UniformIntegerHyperparameter(
-                'max_depth', lower=1, upper=15, default_value=2, log=False
+                'max_depth', lower=1, upper=30, default_value=10, log=False
             ),
-            CS.UniformIntegerHyperparameter(
-                'min_samples_split', lower=2, upper=128, default_value=2, log=True
+            CS.UniformFloatHyperparameter(
+                'min_samples_split', lower=0.05, upper=0.9, default_value=0.9, log=True
             ),
+            # CS.UniformIntegerHyperparameter(
+            #     'min_samples_split', lower=2, upper=20, default_value=2, log=False
+            # ),
             CS.UniformFloatHyperparameter(
-                'max_features', lower=0.1, upper=0.9, default_value=0.5, log=False
+                'max_features', lower=0.1, upper=1.0, default_value=0.5, log=False
             ),
             CS.UniformIntegerHyperparameter(
-                'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
+                'min_samples_leaf', lower=1, upper=20, default_value=1, log=False
             ),
         ])
         return cs
@@ -59,7 +62,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('n_estimators', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=1, upper=128, default_value=10, log=False
+                'n_estimators', lower=16, upper=512, default_value=512, log=False
             )
         )
         fidelity2 = dict(
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index 4c77a92e..e8b25999 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -32,7 +32,7 @@ def get_configuration_space(seed=None):
                 'eta', lower=2**-10, upper=1., default_value=0.3, log=True
             ),  # learning rate
             CS.UniformIntegerHyperparameter(
-                'max_depth', lower=1, upper=15, default_value=2, log=False
+                'max_depth', lower=1, upper=30, default_value=10, log=False
             ),
             CS.UniformFloatHyperparameter(
                 'min_child_weight', lower=1., upper=2**7., default_value=1., log=True
@@ -72,7 +72,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('n_estimators', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=1, upper=128, default_value=10, log=False
+                'n_estimators', lower=16, upper=512, default_value=512, log=False
             )
         )
         fidelity2 = dict(

From 6916c9cfebfed82e32586228e1ebaea559162436 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Fri, 30 Jul 2021 01:14:53 +0200
Subject: [PATCH 42/95] Final RF space

---
 hpobench/benchmarks/ml/rf_benchmark.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 0cebcdf5..cca69d0b 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -25,17 +25,13 @@ def get_configuration_space(seed=None):
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
-
         cs.add_hyperparameters([
             CS.UniformIntegerHyperparameter(
-                'max_depth', lower=1, upper=30, default_value=10, log=False
+                'max_depth', lower=1, upper=50, default_value=10, log=True
             ),
             CS.UniformFloatHyperparameter(
                 'min_samples_split', lower=0.05, upper=0.9, default_value=0.9, log=True
             ),
-            # CS.UniformIntegerHyperparameter(
-            #     'min_samples_split', lower=2, upper=20, default_value=2, log=False
-            # ),
             CS.UniformFloatHyperparameter(
                 'max_features', lower=0.1, upper=1.0, default_value=0.5, log=False
             ),

From 8e5912bd19c9c5b6c859d1063943a697fbfb260a Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Fri, 30 Jul 2021 01:59:44 +0200
Subject: [PATCH 43/95] Final XGB space

---
 hpobench/benchmarks/ml/xgboost_benchmark.py | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index e8b25999..221d9d59 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -32,26 +32,17 @@ def get_configuration_space(seed=None):
                 'eta', lower=2**-10, upper=1., default_value=0.3, log=True
             ),  # learning rate
             CS.UniformIntegerHyperparameter(
-                'max_depth', lower=1, upper=30, default_value=10, log=False
+                'max_depth', lower=6, upper=50, default_value=10, log=True
             ),
             CS.UniformFloatHyperparameter(
                 'min_child_weight', lower=1., upper=2**7., default_value=1., log=True
             ),
             CS.UniformFloatHyperparameter(
-                'colsample_bytree', lower=0.01, upper=1., default_value=1.
+                'colsample_bytree', lower=0.05, upper=1., default_value=1.
             ),
-            # CS.UniformFloatHyperparameter(
-            #     'colsample_bylevel', lower=0.01, upper=1., default_value=1.
-            # ),
             CS.UniformFloatHyperparameter(
                 'reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True
-            ),
-            # CS.UniformFloatHyperparameter(
-            #     'reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True
-            # ),
-            # CS.UniformFloatHyperparameter(
-            #     'subsample_per_it', lower=0.1, upper=1, default_value=1, log=False
-            # )
+            )
         ])
         return cs
 
@@ -72,7 +63,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('n_estimators', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=16, upper=512, default_value=512, log=False
+                'n_estimators', lower=50, upper=2000, default_value=1000, log=False
             )
         )
         fidelity2 = dict(
@@ -105,6 +96,7 @@ def init_model(self, config, fidelity=None, rng=None):
         """
         rng = rng if (rng is None and isinstance(rng, int)) else self.seed
         extra_args = dict(
+            booster="gbtree",
             n_estimators=fidelity['n_estimators'],
             objective="binary:logistic",
             random_state=rng,

From 6968ac365483ac51985954d3092100395ef687bf Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Fri, 30 Jul 2021 02:36:29 +0200
Subject: [PATCH 44/95] Final HistGB space

---
 hpobench/benchmarks/ml/histgb_benchmark.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 1860ff48..b431c056 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -30,13 +30,13 @@ def get_configuration_space(seed=None):
 
         cs.add_hyperparameters([
             CS.UniformIntegerHyperparameter(
-                'max_depth', lower=1, upper=15, default_value=2, log=False
+                'max_depth', lower=6, upper=30, default_value=6, log=True
             ),
             CS.UniformIntegerHyperparameter(
-                'min_samples_leaf', lower=1, upper=64, default_value=1, log=True
+                'max_leaf_node', lower=2, upper=64, default_value=32, log=True
             ),
             CS.UniformFloatHyperparameter(
-                'learning_rate', lower=2**-10, upper=1, default_value=0.3, log=True
+                'eta', lower=2**-10, upper=1, default_value=0.1, log=True
             ),
             CS.UniformFloatHyperparameter(
                 'l2_regularization', lower=2**-10, upper=2**10, default_value=0.1, log=True
@@ -61,7 +61,7 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         fidelity1 = dict(
             fixed=CS.Constant('n_estimators', value=100),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=16, upper=512, default_value=512, log=False
+                'n_estimators', lower=100, upper=1000, default_value=1000, log=False
             )
         )
         fidelity2 = dict(

From 79dd1f346cb0e6a16632cc6859a9a37e8dd598f7 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 2 Aug 2021 18:51:45 +0200
Subject: [PATCH 45/95] Finalizing RF, XGB, NN

---
 hpobench/benchmarks/ml/nn_benchmark.py      |  6 ++++--
 hpobench/benchmarks/ml/rf_benchmark.py      | 13 +++++++++----
 hpobench/benchmarks/ml/xgboost_benchmark.py |  8 +++-----
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py
index 0bec70e4..2c92b371 100644
--- a/hpobench/benchmarks/ml/nn_benchmark.py
+++ b/hpobench/benchmarks/ml/nn_benchmark.py
@@ -28,8 +28,10 @@ def get_configuration_space(seed=None):
         cs = CS.ConfigurationSpace(seed=seed)
 
         cs.add_hyperparameters([
-            CS.UniformIntegerHyperparameter('depth', default_value=3, lower=1, upper=3),
-            CS.UniformIntegerHyperparameter('width', default_value=64, lower=16, upper=256),
+            CS.UniformIntegerHyperparameter('depth', default_value=3, lower=1, upper=3, log=False),
+            CS.UniformIntegerHyperparameter(
+                'width', default_value=64, lower=16, upper=1024, log=True
+            ),
             CS.UniformIntegerHyperparameter(
                 'batch_size', lower=4, upper=256, default_value=32, log=True
             ),
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index cca69d0b..70e02bdb 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -1,6 +1,7 @@
 import numpy as np
 import ConfigSpace as CS
 from typing import Union
+from copy import deepcopy
 from sklearn.ensemble import RandomForestClassifier
 
 from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
@@ -29,11 +30,12 @@ def get_configuration_space(seed=None):
             CS.UniformIntegerHyperparameter(
                 'max_depth', lower=1, upper=50, default_value=10, log=True
             ),
-            CS.UniformFloatHyperparameter(
-                'min_samples_split', lower=0.05, upper=0.9, default_value=0.9, log=True
+            CS.UniformIntegerHyperparameter(
+                'min_samples_split', lower=2, upper=128, default_value=32, log=True
             ),
+            # the use of a float max_features is different than the sklearn usage
             CS.UniformFloatHyperparameter(
-                'max_features', lower=0.1, upper=1.0, default_value=0.5, log=False
+                'max_features', lower=0, upper=1.0, default_value=0.5, log=False
             ),
             CS.UniformIntegerHyperparameter(
                 'min_samples_leaf', lower=1, upper=20, default_value=1, log=False
@@ -90,8 +92,11 @@ def init_model(self, config, fidelity=None, rng=None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
+        config = deepcopy(config.get_dictionary())
+        n_features = self.train_X.shape[1]
+        config["max_features"] = int(np.rint(np.power(n_features, config["max_features"])))
         model = RandomForestClassifier(
-            **config.get_dictionary(),
+            **config,
             n_estimators=fidelity['n_estimators'],  # a fidelity being used during initialization
             bootstrap=True,
             random_state=rng
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index 221d9d59..0fe3f07c 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -32,13 +32,10 @@ def get_configuration_space(seed=None):
                 'eta', lower=2**-10, upper=1., default_value=0.3, log=True
             ),  # learning rate
             CS.UniformIntegerHyperparameter(
-                'max_depth', lower=6, upper=50, default_value=10, log=True
+                'max_depth', lower=1, upper=50, default_value=10, log=True
             ),
             CS.UniformFloatHyperparameter(
-                'min_child_weight', lower=1., upper=2**7., default_value=1., log=True
-            ),
-            CS.UniformFloatHyperparameter(
-                'colsample_bytree', lower=0.05, upper=1., default_value=1.
+                'colsample_bytree', lower=0.1, upper=1., default_value=1., log=False
             ),
             CS.UniformFloatHyperparameter(
                 'reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True
@@ -105,6 +102,7 @@ def init_model(self, config, fidelity=None, rng=None):
         if self.n_classes > 2:
             extra_args["objective"] = "multi:softmax"
             extra_args.update({"num_class": self.n_classes})
+
         model = xgb.XGBClassifier(
             **config.get_dictionary(),
             **extra_args

From ca1e0d4c090ae51e7b30d2308d5f6229c9b970cf Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 2 Aug 2021 23:44:00 +0200
Subject: [PATCH 46/95] TabularBenchmark edit to process only table and
 metadata

---
 hpobench/benchmarks/ml/tabular_benchmark.py | 44 ++++++++++-----------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index 528f2eef..9566d130 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -1,13 +1,11 @@
 import os
-import glom
 import json
-import pickle
 import numpy as np
 import pandas as pd
 import ConfigSpace as CS
-import pickle5 as pickle
-from copy import deepcopy
+from ConfigSpace.read_and_write import json as json_cs
 from typing import Union, List, Dict
+
 from hpobench.benchmarks.ml.ml_benchmark_template import metrics
 
 
@@ -16,25 +14,18 @@ def __init__(self, path: str, model: str, task_id: int, seed: Union[int, None] =
         assert os.path.isdir(path), "Not a valid path: {}".format(path)
         self.data_path = os.path.join(path, "{}_{}_data.parquet.gzip".format(model, task_id))
         assert os.path.isfile(self.data_path)
-        self.config_path = os.path.join(path, "{}_{}_configs.pkl".format(model, task_id))
-        assert os.path.isfile(self.config_path)
-        self.exp_args_path = os.path.join(path, "{}_{}.json".format(model, task_id))
-        assert os.path.isfile(self.exp_args_path)
+        self.metadata_path = os.path.join(path, "{}_{}_metadata.json".format(model, task_id))
+        assert os.path.isfile(self.metadata_path)
 
         self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
         self.rng = np.random.RandomState(self.seed)
         self.table = self._load_parquet(self.data_path)
-        self.exp_args = self._load_json(self.exp_args_path)
-        self.config_spaces = self._load_pickle(self.config_path)
-
+        self.metadata = self._load_json(self.metadata_path)
+        self.exp_args = self.metadata["exp_args"]
+        self.config_spaces = self.metadata["config_spaces"]
+        self.global_minimums = self.metadata["global_min"]
         self.x_cs = self.get_hyperparameter_space(seed=self.seed)
         self.z_cs = self.get_fidelity_space(seed=self.seed)
-        self.global_minimums = self.exp_args["global_min"]
-
-    def _load_pickle(self, path):
-        with open(path, "rb") as f:
-            data = pickle.load(f)
-        return data
 
     def _load_parquet(self, path):
         data = pd.read_parquet(path)
@@ -45,6 +36,13 @@ def _load_json(self, path):
             data = json.load(f)
         return data
 
+    def _preprocess_configspace(self, config_space):
+        """ Converts floats to np.float32 """
+        for hp in config_space.get_hyperparameters():
+            hp.sequence = tuple(np.array(hp.sequence).astype(np.float32))
+            hp.default_value = np.float32(hp.default_value)
+        return config_space
+
     def _total_number_of_configurations(self, space: str="hyperparameters") -> int:
         """ Returns the number of unique configurations in the parameter/fidelity space
         """
@@ -59,18 +57,18 @@ def _seeds_used(self):
 
     def get_hyperparameter_space(self, seed=None, original=False):
         cs = CS.ConfigurationSpace(seed=seed)
-        if original:
-            _cs = self.config_spaces['x']
-        _cs = self.config_spaces['x_discrete']
+        load_name = "x" if original else "x_discrete"
+        _cs = json_cs.read(self.config_spaces[load_name])
         for hp in _cs.get_hyperparameters():
             cs.add_hyperparameter(hp)
+        if not original:
+            cs = self._preprocess_configspace(cs)
         return cs
 
     def get_fidelity_space(self, seed=None, original=False):
         cs = CS.ConfigurationSpace(seed=seed)
-        if original:
-            _cs = self.config_spaces['z']
-        _cs = self.config_spaces['z_discrete']
+        load_name = "z" if original else "z_discrete"
+        _cs = json_cs.read(self.config_spaces[load_name])
         for hp in _cs.get_hyperparameters():
             cs.add_hyperparameter(hp)
         return cs

From 0d70d366cfc0095ccd12819600cc263e95b6cf80 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Wed, 11 Aug 2021 13:44:37 +0200
Subject: [PATCH 47/95] TabularBenchmark

- Rearrange the benchmark.
- Move data parts to a data manager. It is able to download the data from the web, if it is not present on the local machine.
- Enforce the API structure
---
 extra_requirements/ml.json                  |   3 +
 hpobench/abstract_benchmark.py              |   4 +-
 hpobench/benchmarks/ml/tabular_benchmark.py | 183 ++++++++++++--------
 hpobench/dependencies/ml/__init__.py        |   0
 hpobench/util/data_manager.py               |  99 +++++++++++
 tests/test_data_manager.py                  |  13 ++
 6 files changed, 225 insertions(+), 77 deletions(-)
 create mode 100644 extra_requirements/ml.json
 create mode 100644 hpobench/dependencies/ml/__init__.py

diff --git a/extra_requirements/ml.json b/extra_requirements/ml.json
new file mode 100644
index 00000000..8a68761f
--- /dev/null
+++ b/extra_requirements/ml.json
@@ -0,0 +1,3 @@
+{
+  "ml_tabular_benchmarks": ["pandas>=1.0.0"]
+}
\ No newline at end of file
diff --git a/hpobench/abstract_benchmark.py b/hpobench/abstract_benchmark.py
index abbbcb22..5f141f6a 100644
--- a/hpobench/abstract_benchmark.py
+++ b/hpobench/abstract_benchmark.py
@@ -36,8 +36,8 @@ def __init__(self, rng: Union[int, np.random.RandomState, None] = None, **kwargs
         """
 
         self.rng = rng_helper.get_rng(rng=rng)
-        self.configuration_space = self.get_configuration_space()
-        self.fidelity_space = self.get_fidelity_space()
+        self.configuration_space = self.get_configuration_space(self.rng.randint(0, 10000))
+        self.fidelity_space = self.get_fidelity_space(self.rng.randint(0, 10000))
 
     @abc.abstractmethod
     def objective_function(self, configuration: Union[ConfigSpace.Configuration, Dict],
diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index 9566d130..b86eb426 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -1,40 +1,72 @@
-import os
-import json
-import numpy as np
-import pandas as pd
+from pathlib import Path
+from typing import Union, List, Dict
+
+import ConfigSpace
 import ConfigSpace as CS
+import numpy as np
 from ConfigSpace.read_and_write import json as json_cs
-from typing import Union, List, Dict
 
-from hpobench.benchmarks.ml.ml_benchmark_template import metrics
+from hpobench.abstract_benchmark import AbstractBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import metrics
+from hpobench.util.data_manager import TabularDataManager
+
 
+class BaseTabularBenchmark(AbstractBenchmark):
 
-class TabularBenchmark:
-    def __init__(self, path: str, model: str, task_id: int, seed: Union[int, None] = None):
-        assert os.path.isdir(path), "Not a valid path: {}".format(path)
-        self.data_path = os.path.join(path, "{}_{}_data.parquet.gzip".format(model, task_id))
-        assert os.path.isfile(self.data_path)
-        self.metadata_path = os.path.join(path, "{}_{}_metadata.json".format(model, task_id))
-        assert os.path.isfile(self.metadata_path)
+    def __init__(self, model: str, task_id: int, data_dir: Union[Path, str, None] = None,
+                 rng: Union[int, np.random.RandomState, None] = None, **kwargs):
+
+        super(BaseTabularBenchmark, self).__init__(rng=rng, **kwargs)
+
+        self.task_id = task_id
+        self.model = model
+
+        self.table, self.metadata = TabularDataManager(model, task_id, data_dir)
 
-        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
-        self.rng = np.random.RandomState(self.seed)
-        self.table = self._load_parquet(self.data_path)
-        self.metadata = self._load_json(self.metadata_path)
         self.exp_args = self.metadata["exp_args"]
         self.config_spaces = self.metadata["config_spaces"]
         self.global_minimums = self.metadata["global_min"]
-        self.x_cs = self.get_hyperparameter_space(seed=self.seed)
-        self.z_cs = self.get_fidelity_space(seed=self.seed)
 
-    def _load_parquet(self, path):
-        data = pd.read_parquet(path)
-        return data
+    @AbstractBenchmark.check_parameters
+    def objective_function(self,
+                           configuration: Union[ConfigSpace.Configuration, Dict],
+                           fidelity: Union[Dict, ConfigSpace.Configuration, None] = None,
+                           rng: Union[np.random.RandomState, int, None] = None,
+                           seed: Union[int, None] = None,
+                           metric: Union[str, None] = 'acc',
+                           **kwargs) -> Dict:
 
-    def _load_json(self, path):
-        with open(path, "r") as f:
-            data = json.load(f)
-        return data
+        result = self._objective(configuration, fidelity, seed, metric, evaluation="val")
+        return result
+
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(self,
+                                configuration: Union[ConfigSpace.Configuration, Dict],
+                                fidelity: Union[Dict, ConfigSpace.Configuration, None] = None,
+                                rng: Union[np.random.RandomState, int, None] = None,
+                                seed: Union[int, None] = None,
+                                metric: Union[str, None] = 'acc',
+                                **kwargs) -> Dict:
+
+        result = self._objective(configuration, fidelity, seed, metric, evaluation="test")
+        return result
+
+    # pylint: disable=arguments-differ
+    def get_configuration_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+        raise NotImplementedError
+
+    # pylint: disable=arguments-differ
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+        raise NotImplementedError
+
+    # pylint: disable=arguments-differ
+    def get_meta_information(self) -> Dict:
+        """ Returns the meta information for the benchmark """
+        return {'name': 'BaseTabularBenchmark',
+                'references': [],
+                'task_id': self.task_id,
+                'model': self.model
+                }
 
     def _preprocess_configspace(self, config_space):
         """ Converts floats to np.float32 """
@@ -47,7 +79,7 @@ def _total_number_of_configurations(self, space: str="hyperparameters") -> int:
         """ Returns the number of unique configurations in the parameter/fidelity space
         """
         count = 1
-        cs = self.x_cs if space == "hyperparameters" else self.z_cs
+        cs = self.configuration_space if space == "hyperparameters" else self.fidelity_space
         for hp in cs.get_hyperparameters():
             count *= len(hp.sequence)
         return count
@@ -55,29 +87,11 @@ def _total_number_of_configurations(self, space: str="hyperparameters") -> int:
     def _seeds_used(self):
         return self.table.seed.unique().tolist()
 
-    def get_hyperparameter_space(self, seed=None, original=False):
-        cs = CS.ConfigurationSpace(seed=seed)
-        load_name = "x" if original else "x_discrete"
-        _cs = json_cs.read(self.config_spaces[load_name])
-        for hp in _cs.get_hyperparameters():
-            cs.add_hyperparameter(hp)
-        if not original:
-            cs = self._preprocess_configspace(cs)
-        return cs
-
-    def get_fidelity_space(self, seed=None, original=False):
-        cs = CS.ConfigurationSpace(seed=seed)
-        load_name = "z" if original else "z_discrete"
-        _cs = json_cs.read(self.config_spaces[load_name])
-        for hp in _cs.get_hyperparameters():
-            cs.add_hyperparameter(hp)
-        return cs
-
     def sample_hyperparamer(self, n: int = 1) -> Union[CS.Configuration, List]:
-        return self.x_cs.sample_configuration(n)
+        return self.configuration_space.sample_configuration(n)
 
     def sample_fidelity(self, n: int = 1) -> Union[CS.Configuration, List]:
-        return self.z_cs.sample_configuration(n)
+        return self.fidelity_space.sample_configuration(n)
 
     def get_global_min(self, metric: str = "acc"):
         """ Retrieves the minimum (1 - metric) for train, validation and test splits
@@ -88,13 +102,13 @@ def get_global_min(self, metric: str = "acc"):
 
     def get_max_fidelity(self) -> Dict:
         max_fidelity = dict()
-        for hp in self.z_cs.get_hyperparameters():
+        for hp in self.fidelity_space.get_hyperparameters():
             max_fidelity[hp.name] = np.sort(hp.sequence)[-1]
         return max_fidelity
 
     def get_fidelity_range(self):
         fidelities = []
-        for hp in self.z_cs.get_hyperparameters():
+        for hp in self.fidelity_space.get_hyperparameters():
             if not isinstance(hp, CS.Constant) and len(hp.sequence) > 1:
                 fidelities.append((hp.name, hp.sequence[0], hp.sequence[-1]))
         return fidelities
@@ -119,17 +133,16 @@ def _objective(
             metric: Union[str, None] = "acc",
             evaluation: Union[str] = ""
     ) -> Dict:
-        self.x_cs.check_configuration(config)
-        self.z_cs.check_configuration(fidelity)
-        assert metric in list(metrics.keys()), \
-            "metric not found among: {{{}}}".format(", ".join(list(metrics.keys())))
-        score_key = "{}_scores".format(evaluation)
-        cost_key = "{}_scores".format(evaluation)
+
+        metric_str = ', '.join(list(metrics.keys))
+        assert metric in list(metrics.keys()), f"metric not found among: {metric_str}"
+        score_key = f"{evaluation}_scores"
+        cost_key = f"{evaluation}_scores"
 
         key_path = dict()
-        for name in np.sort(self.x_cs.get_hyperparameter_names()):
+        for name in np.sort(self.configuration_space.get_hyperparameter_names()):
             key_path[str(name)] = config[str(name)]
-        for name in np.sort(self.z_cs.get_hyperparameter_names()):
+        for name in np.sort(self.fidelity_space.get_hyperparameter_names()):
             key_path[str(name)] = fidelity[str(name)]
 
         if seed is not None:
@@ -152,22 +165,42 @@ def _objective(
         result = dict(function_value=loss, cost=costs, info=info)
         return result
 
-    def objective_function(
-            self,
-            config: CS.Configuration,
-            fidelity: CS.Configuration,
-            seed: Union[int, None] = None,
-            metric: Union[str, None] = "acc"
-    ) -> Dict:
-        result = self._objective(config, fidelity, seed, metric, evaluation="val")
-        return result
 
-    def objective_function_test(
-            self,
-            config: CS.Configuration,
-            fidelity: CS.Configuration,
-            seed: Union[int, None] = None,
-            metric: Union[str, None] = "acc"
-    ) -> Dict:
-        result = self._objective(config, fidelity, seed, metric, evaluation="test")
-        return result
+class TabularBenchmark(BaseTabularBenchmark):
+    def __init__(self, model: str, task_id: int, data_dir: Union[Path, str, None] = None,
+                 rng: Union[int, np.random.RandomState, None] = None, **kwargs):
+        super(TabularBenchmark, self).__init__(model, task_id, data_dir, rng, **kwargs)
+
+    # pylint: disable=arguments-differ
+    def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        cs = json_cs.read(self.config_spaces['x_discrete'])
+        cs = self._preprocess_configspace(cs)
+        cs.seed(seed)
+        return cs
+
+    # pylint: disable=arguments-differ
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+        cs = json_cs.read(self.config_spaces['z_discrete'])
+        cs.seed(seed=seed)
+        return cs
+
+
+class OriginalTabularBenchmark(BaseTabularBenchmark):
+    def __init__(self, model: str, task_id: int, data_dir: Union[Path, str, None] = None,
+                 rng: Union[int, np.random.RandomState, None] = None, **kwargs):
+        super(OriginalTabularBenchmark, self).__init__(model, task_id, data_dir, rng, **kwargs)
+
+    # pylint: disable=arguments-differ
+    def get_configuration_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+        cs = json_cs.read(self.config_spaces['x'])
+        cs.seed(seed)
+        return cs
+
+    # pylint: disable=arguments-differ
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+        cs = json_cs.read(self.config_spaces['z'])
+        cs.seed(seed=seed)
+        return cs
+
+
+__all__ = [TabularBenchmark, OriginalTabularBenchmark]
diff --git a/hpobench/dependencies/ml/__init__.py b/hpobench/dependencies/ml/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 6e401215..371cbd3c 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -32,6 +32,12 @@
 except ImportError:
     print("oslo_concurrency not installed, can't download datasets for nasbench201 (not needed for containers)")
 
+try:
+    import pandas as pd
+except ImportError:
+    print("pandas is not installed, can't download datasets for the ml.tabular_benchmarks (not needed for containers)")
+
+
 import hpobench
 
 
@@ -66,6 +72,50 @@ def create_save_directory(self, save_dir: Path):
             self.logger.debug(f'Create directory {save_dir}')
             save_dir.mkdir(parents=True, exist_ok=True)
 
+    @lockutils.synchronized('not_thread_process_safe', external=True,
+                            lock_path=f'{hpobench.config_file.cache_dir}/lock_download_file', delay=0.5)
+    def _download_file_with_progressbar(self, data_url: str, data_file: Path):
+        data_file = Path(data_file)
+
+        if data_file.exists():
+            self.logger.info('Data File already exists. Skip downloading.')
+            return
+
+        self.logger.info(f"Download the file from {data_url} to {data_file}")
+        data_file.parent.mkdir(parents=True, exist_ok=True)
+
+        from tqdm import tqdm
+        r = requests.get(data_url, stream=True)
+        with open(data_file, 'wb') as f:
+            total_length = int(r.headers.get('content-length'))
+            for chunk in tqdm(r.iter_content(chunk_size=1024),
+                              unit_divisor=1024, unit='kB', total=int(total_length / 1024) + 1):
+                if chunk:
+                    _ = f.write(chunk)
+                    f.flush()
+        self.logger.info("Finished downloading")
+
+    @lockutils.synchronized('not_thread_process_safe', external=True,
+                            lock_path=f'{hpobench.config_file.cache_dir}/lock_unzip_file', delay=0.5)
+    def _untar_data(self, compressed_file: Path, save_dir: Union[Path, None] = None):
+        self.logger.debug('Extract the compressed data')
+        with tarfile.open(compressed_file, 'r') as fh:
+            if save_dir is None:
+                save_dir = compressed_file.parent
+            fh.extractall(save_dir)
+        self.logger.debug(f'Successfully extracted the data to {save_dir}')
+
+    @lockutils.synchronized('not_thread_process_safe', external=True,
+                            lock_path=f'{hpobench.config_file.cache_dir}/lock_unzip_file', delay=0.5)
+    def _unzip_data(self, compressed_file: Path, save_dir: Union[Path, None] = None):
+        self.logger.debug('Extract the compressed data')
+        with ZipFile(compressed_file, 'r') as fh:
+            if save_dir is None:
+                save_dir = compressed_file.parent
+            fh.extractall(save_dir)
+        self.logger.debug(f'Successfully extracted the data to {save_dir}')
+
+
 
 class HoldoutDataManager(DataManager):
     """  Base Class for loading and managing the Holdout data sets.
@@ -874,3 +924,52 @@ def _load(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndar
         X_tst, y_tst = data[n_trn + n_val:, 1:], data[n_trn + n_val:, 0]
 
         return X_trn, y_trn, X_val, y_val, X_tst, y_tst
+
+
+class TabularDataManager(DataManager):
+    def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None] = None):
+        super(TabularDataManager, self).__init__()
+
+        assert model in ['lr', 'svm']
+
+        self.model = model
+        self.task_id = str(task_id)
+
+        url_svm = 'https://figshare.com/s/5a0929ad9b2ccd8dda58'
+        url_lr = 'https://ndownloader.figshare.com/files/29027112?private_link=d644493a93dbab4b4ee1'
+
+        self.url_to_use = url_svm if model == 'svm' else url_lr
+
+        if data_dir is None:
+            data_dir = hpobench.config_file.data_dir / "TabularData"
+
+        self._save_dir = Path(data_dir)
+        self.create_save_directory(self._save_dir)
+
+        self.parquet_file = self._save_dir / self.task_id / f'{self.model}_{self.task_id}_data.parquet.gzip'
+        self.metadata_file = self._save_dir / self.task_id / f'{self.model}_{self.task_id}_metadata.json'
+
+    def load(self):
+        # Can we directly load the files?
+        if self.parquet_file.exists() and self.metadata_file.exists():
+            table = self._load_parquet(self.parquet_file)
+            metadata = self._load_json(self.metadata_file)
+            return table, metadata
+
+        # We have to download the entire zip file and etract then extract the parquet file.
+        self._download_file_with_progressbar(self.url_to_use, self._save_dir / f'{self.model}.zip')
+        self._unzip_data(self._save_dir / f'{self.model}.zip', self._save_dir)
+        table = self._load_parquet(self.parquet_file)
+        metadata = self._load_json(self.metadata_file)
+        return table, metadata
+
+    @staticmethod
+    def _load_parquet(path):
+        data = pd.read_parquet(path)
+        return data
+
+    @staticmethod
+    def _load_json(path):
+        with open(path, "r") as f:
+            data = json.load(f)
+        return data
diff --git a/tests/test_data_manager.py b/tests/test_data_manager.py
index 3ea3ecc4..fd57b627 100644
--- a/tests/test_data_manager.py
+++ b/tests/test_data_manager.py
@@ -99,3 +99,16 @@ def test_boston_data():
     assert 0 < len(x_test) == len(y_test)
     assert 0 < len(x_valid) == len(y_valid)
     assert len(y_valid) < len(x_train) == len(y_train)
+
+
+def test_tabular_datamanager():
+    from hpobench.util.data_manager import TabularDataManager
+    dm = TabularDataManager(model='lr',
+                            task_id='3')
+
+    table, meta_data = dm.load()
+
+    assert (hpobench.config_file.data_dir / "TabularData" / str(3) / f'lr_3_data.parquet.gzip').exists()
+    assert (hpobench.config_file.data_dir / "TabularData" / str(3) / f'lr_3_metadata.json').exists()
+
+    table_2, meta_data_2 = dm.load()

From 12ebce825d44aa00fe1d0b50d572abc7fd4fc6c8 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Wed, 11 Aug 2021 13:47:12 +0200
Subject: [PATCH 48/95] Pycodestyle

---
 hpobench/benchmarks/ml/tabular_benchmark.py | 21 ++++++++++-----------
 hpobench/util/data_manager.py               |  1 -
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index b86eb426..dd07ec02 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -1,7 +1,6 @@
 from pathlib import Path
 from typing import Union, List, Dict
 
-import ConfigSpace
 import ConfigSpace as CS
 import numpy as np
 from ConfigSpace.read_and_write import json as json_cs
@@ -29,8 +28,8 @@ def __init__(self, model: str, task_id: int, data_dir: Union[Path, str, None] =
 
     @AbstractBenchmark.check_parameters
     def objective_function(self,
-                           configuration: Union[ConfigSpace.Configuration, Dict],
-                           fidelity: Union[Dict, ConfigSpace.Configuration, None] = None,
+                           configuration: Union[CS.Configuration, Dict],
+                           fidelity: Union[Dict, CS.Configuration, None] = None,
                            rng: Union[np.random.RandomState, int, None] = None,
                            seed: Union[int, None] = None,
                            metric: Union[str, None] = 'acc',
@@ -41,8 +40,8 @@ def objective_function(self,
 
     @AbstractBenchmark.check_parameters
     def objective_function_test(self,
-                                configuration: Union[ConfigSpace.Configuration, Dict],
-                                fidelity: Union[Dict, ConfigSpace.Configuration, None] = None,
+                                configuration: Union[CS.Configuration, Dict],
+                                fidelity: Union[Dict, CS.Configuration, None] = None,
                                 rng: Union[np.random.RandomState, int, None] = None,
                                 seed: Union[int, None] = None,
                                 metric: Union[str, None] = 'acc',
@@ -52,11 +51,11 @@ def objective_function_test(self,
         return result
 
     # pylint: disable=arguments-differ
-    def get_configuration_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+    def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         raise NotImplementedError
 
     # pylint: disable=arguments-differ
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         raise NotImplementedError
 
     # pylint: disable=arguments-differ
@@ -75,7 +74,7 @@ def _preprocess_configspace(self, config_space):
             hp.default_value = np.float32(hp.default_value)
         return config_space
 
-    def _total_number_of_configurations(self, space: str="hyperparameters") -> int:
+    def _total_number_of_configurations(self, space: str = "hyperparameters") -> int:
         """ Returns the number of unique configurations in the parameter/fidelity space
         """
         count = 1
@@ -179,7 +178,7 @@ def get_configuration_space(self, seed: Union[int, None] = None) -> CS.Configura
         return cs
 
     # pylint: disable=arguments-differ
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         cs = json_cs.read(self.config_spaces['z_discrete'])
         cs.seed(seed=seed)
         return cs
@@ -191,13 +190,13 @@ def __init__(self, model: str, task_id: int, data_dir: Union[Path, str, None] =
         super(OriginalTabularBenchmark, self).__init__(model, task_id, data_dir, rng, **kwargs)
 
     # pylint: disable=arguments-differ
-    def get_configuration_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+    def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         cs = json_cs.read(self.config_spaces['x'])
         cs.seed(seed)
         return cs
 
     # pylint: disable=arguments-differ
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         cs = json_cs.read(self.config_spaces['z'])
         cs.seed(seed=seed)
         return cs
diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 371cbd3c..3063b2e7 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -116,7 +116,6 @@ def _unzip_data(self, compressed_file: Path, save_dir: Union[Path, None] = None)
         self.logger.debug(f'Successfully extracted the data to {save_dir}')
 
 
-
 class HoldoutDataManager(DataManager):
     """  Base Class for loading and managing the Holdout data sets.
 

From 873781e878d349497c6db34d309bf6c3bb1b817f Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Wed, 11 Aug 2021 13:48:13 +0200
Subject: [PATCH 49/95] Flake8

---
 hpobench/util/data_manager.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 3063b2e7..9e6f8fb9 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -13,19 +13,19 @@
 
 import abc
 import gzip
+import json
 import logging
 import pickle
 import tarfile
-import requests
-
 from io import BytesIO
 from pathlib import Path
+from time import time
 from typing import Tuple, Dict, Any, Union
 from urllib.request import urlretrieve, urlopen
 from zipfile import ZipFile
-from time import time
 
 import numpy as np
+import requests
 
 try:
     from oslo_concurrency import lockutils

From 532a905ba496f773cd5057969637f3f14a4563bf Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Wed, 11 Aug 2021 17:39:07 +0200
Subject: [PATCH 50/95] Adapt ML Benchmark Template to fit with current API

---
 hpobench/abstract_benchmark.py                |   7 +-
 hpobench/dependencies/ml/data_manager.py      | 163 ++++++++++++
 .../ml/ml_benchmark_template.py               | 245 +++++-------------
 3 files changed, 231 insertions(+), 184 deletions(-)
 create mode 100644 hpobench/dependencies/ml/data_manager.py
 rename hpobench/{benchmarks => dependencies}/ml/ml_benchmark_template.py (52%)

diff --git a/hpobench/abstract_benchmark.py b/hpobench/abstract_benchmark.py
index 5f141f6a..c9db4216 100644
--- a/hpobench/abstract_benchmark.py
+++ b/hpobench/abstract_benchmark.py
@@ -226,17 +226,12 @@ def get_configuration_space(seed: Union[int, None] = None) -> ConfigSpace.Config
 
     @staticmethod
     @abc.abstractmethod
-    def get_fidelity_space(
-            seed: Union[int, None] = None, fidelity_choice: Union[int, None] = None
-    ) -> ConfigSpace.ConfigurationSpace:
+    def get_fidelity_space(seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace:
         """ Defines the available fidelity parameters as a "fidelity space" for each benchmark.
         Parameters
         ----------
         seed: int, None
             Seed for the fidelity space.
-        fidelity_choice: int, None
-            integer value to choose the type of fidelity space
-
         Returns
         -------
         ConfigSpace.ConfigurationSpace
diff --git a/hpobench/dependencies/ml/data_manager.py b/hpobench/dependencies/ml/data_manager.py
new file mode 100644
index 00000000..9cc7f5f7
--- /dev/null
+++ b/hpobench/dependencies/ml/data_manager.py
@@ -0,0 +1,163 @@
+import openml
+import numpy as np
+import pandas as pd
+from typing import Union
+from pathlib import Path
+
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import make_pipeline
+from sklearn.utils import check_random_state
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+
+
+from hpobench.util.data_manager import DataManager
+
+
+from hpobench import config_file
+
+
+class OpenMLDataManager(DataManager):
+
+    def __init__(self, task_id: int,
+                 valid_size: Union[float, None] = 0.33,
+                 data_path: Union[str, Path, None] = None,
+                 global_seed: Union[int, None] = 1):
+
+        self.task_id = task_id
+        self.global_seed = global_seed
+
+        self.valid_size = valid_size
+
+        self.train_X = None
+        self.valid_X = None
+        self.test_X = None
+        self.train_y = None
+        self.valid_y = None
+        self.test_y = None
+        self.train_idx = None
+        self.test_idx = None
+        self.task = None
+        self.dataset = None
+        self.preprocessor = None
+        self.lower_bound_train_size = None
+        self.n_classes = None
+
+        if data_path is None:
+            data_path = config_file.data_dir / "OpenML"
+
+        self.data_path = data_path
+        super(OpenMLDataManager, self).__init__()
+
+    def load(self, valid_size=None, verbose=False):
+        """Fetches data from OpenML and initializes the train-validation-test data splits
+
+        The validation set is fixed till this function is called again or explicitly altered
+        """
+        # fetches task
+        self.task = openml.tasks.get_task(self.task_id, download_data=False)
+        self.n_classes = len(self.task.class_labels)
+
+        # fetches dataset
+        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
+        if verbose:
+            self.logger.debug(self.task)
+            self.logger.debug(self.dataset)
+
+        # check if the path to data splits is valid
+        if self.data_path is not None and self.data_path.is_dir():
+            data_path = self.data_path / str(self.task_id)
+            required_file_list = [
+                ("train", "x"), ("train", "y"),
+                ("valid", "x"), ("valid", "y"),
+                ("test", "x"), ("test", "y")
+            ]
+            for files in required_file_list:
+                data_str = "{}_{}.parquet.gzip".format(*files)
+                if (data_path / data_str).exists():
+                    raise FileNotFoundError("{} not found!".format(data_str.format(*files)))
+            # ignore the remaining data loaders and preprocessors as valid data splits available
+            return
+
+        # loads full data
+        X, y, categorical_ind, feature_names = self.dataset.get_data(
+            target=self.task.target_name, dataset_format="dataframe"
+        )
+        categorical_ind = np.array(categorical_ind)
+        (cat_idx,) = np.where(categorical_ind)
+        (cont_idx,) = np.where(~categorical_ind)
+
+        # splitting dataset into train and test (10% test)
+        # train-test split is fixed for a task and its associated dataset (from OpenML)
+        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
+        train_x = X.iloc[self.train_idx]
+        train_y = y.iloc[self.train_idx]
+        self.test_X = X.iloc[self.test_idx]
+        self.test_y = y.iloc[self.test_idx]
+
+        # splitting training into training and validation
+        # validation set is fixed as per the global seed independent of the benchmark seed
+        valid_size = self.valid_size if valid_size is None else valid_size
+        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
+            train_x, train_y, test_size=valid_size, shuffle=True, stratify=train_y,
+            random_state=check_random_state(self.global_seed)
+        )
+
+        # preprocessor to handle missing values, categorical columns encodings,
+        # and scaling numeric columns
+        self.preprocessor = make_pipeline(
+            ColumnTransformer([
+                (
+                    "cat",
+                    make_pipeline(SimpleImputer(strategy="most_frequent"),
+                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
+                    cat_idx.tolist(),
+                ),
+                (
+                    "cont",
+                    make_pipeline(SimpleImputer(strategy="median"),
+                                  StandardScaler()),
+                    cont_idx.tolist(),
+                )
+            ])
+        )
+        if verbose:
+            self.logger.debug("Shape of data pre-preprocessing: {}".format(self.train_X.shape))
+
+        # preprocessor fit only on the training set
+        self.train_X = self.preprocessor.fit_transform(self.train_X)
+        # applying preprocessor built on the training set, across validation and test splits
+        self.valid_X = self.preprocessor.transform(self.valid_X)
+        self.test_X = self.preprocessor.transform(self.test_X)
+        # converting boolean labels to strings
+        self.train_y = self._convert_labels(self.train_y)
+        self.valid_y = self._convert_labels(self.valid_y)
+        self.test_y = self._convert_labels(self.test_y)
+
+        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
+        # use 10 times the number of classes as lower bound for the dataset fraction
+        self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
+        self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
+
+        if verbose:
+            self.logger.debug("Shape of data post-preprocessing: {}".format(self.train_X.shape), "\n")
+            self.logger.debug("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
+            self.logger.debug("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
+            self.logger.debug("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
+            self.logger.debug("\nData loading complete!\n")
+        return
+
+    @staticmethod
+    def _convert_labels(labels):
+        """Converts boolean labels (if exists) to strings
+        """
+        label_types = list(map(lambda x: isinstance(x, bool), labels))
+        if np.all(label_types):
+            _labels = list(map(lambda x: str(x), labels))
+            if isinstance(labels, pd.Series):
+                labels = pd.Series(_labels, index=labels.index)
+            elif isinstance(labels, np.array):
+                labels = np.array(labels)
+        return labels
diff --git a/hpobench/benchmarks/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
similarity index 52%
rename from hpobench/benchmarks/ml/ml_benchmark_template.py
rename to hpobench/dependencies/ml/ml_benchmark_template.py
index 83b39957..54029736 100644
--- a/hpobench/benchmarks/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -5,19 +5,13 @@
 import pandas as pd
 import ConfigSpace as CS
 from typing import Union, Dict
+from pathlib import Path
 
-from sklearn.impute import SimpleImputer
-from sklearn.pipeline import make_pipeline
-from sklearn.utils import check_random_state
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import train_test_split
 from sklearn.metrics import make_scorer, accuracy_score, balanced_accuracy_score, \
     precision_score, f1_score
 
 from hpobench.abstract_benchmark import AbstractBenchmark
-
+from hpobench.dependencies.ml.data_manager import OpenMLDataManager
 
 metrics = dict(
     acc=accuracy_score,
@@ -25,6 +19,7 @@
     f1=f1_score,
     precision=precision_score,
 )
+
 metrics_kwargs = dict(
     acc=dict(),
     bal_acc=dict(),
@@ -39,16 +34,19 @@ class MLBenchmark(AbstractBenchmark):
     def __init__(
             self,
             task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
+            rng: Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            data_path: Union[str, None] = None,
+            data_path: Union[str, Path, None] = None,
             global_seed: int = 1
     ):
-        self.seed = seed if seed is not None else np.random.randint(1, 10 ** 6)
-        self.rng = check_random_state(self.seed)
+        super(MLBenchmark, self).__init__(rng=rng)
+
+        if isinstance(rng, int):
+            self.seed = rng
+        else:
+            self.seed = self.rng.randint(1, 10**6)
+
         self.global_seed = global_seed  # used for fixed training-validation splits
-        super(MLBenchmark, self).__init__(rng=seed)
 
         self.task_id = task_id
         self.valid_size = valid_size
@@ -57,25 +55,27 @@ def __init__(
             self.scorers[k] = make_scorer(v, **metrics_kwargs[k])
         self.data_path = data_path
 
+        dm = OpenMLDataManager(task_id, valid_size, data_path, global_seed)
+        dm.load()
+
         # Data variables
-        self.train_X = None
-        self.valid_X = None
-        self.test_X = None
-        self.train_y = None
-        self.valid_y = None
-        self.test_y = None
-        self.train_idx = None
-        self.test_idx = None
-        self.task = None
-        self.dataset = None
-        self.preprocessor = None
-        self.lower_bound_train_size = None
-        self.load_data_from_openml()
+        self.train_X = dm.train_X
+        self.valid_X = dm.valid_X
+        self.test_X = dm.test_X
+        self.train_y = dm.train_y
+        self.valid_y = dm.valid_y
+        self.test_y = dm.test_y
+        self.train_idx = dm.train_idx
+        self.test_idx = dm.test_idx
+        self.task = dm.task
+        self.dataset = dm.dataset
+        self.preprocessor = dm.preprocessor
+        self.lower_bound_train_size = dm.lower_bound_train_size
+        self.n_classes = dm.n_classes
 
         # Observation and fidelity spaces
-        self.fidelity_choice = fidelity_choice
-        self.z_cs = self.get_fidelity_space(self.seed, self.fidelity_choice)
-        self.x_cs = self.get_configuration_space(self.seed)
+        self.fidelity_space = self.get_fidelity_space(self.seed)
+        self.configuration_space = self.get_configuration_space(self.seed)
 
     @staticmethod
     def get_configuration_space(seed=None):
@@ -84,7 +84,7 @@ def get_configuration_space(seed=None):
         raise NotImplementedError()
 
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=None):
+    def get_fidelity_space(seed=None):
         """Fidelity space available --- specifies the fidelity dimensions
 
         If fidelity_choice is 0
@@ -98,130 +98,35 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
         """
         raise NotImplementedError()
 
+    def get_meta_information(self):
+        """ Returns the meta information for the benchmark """
+        return {
+            'name': 'Support Vector Machine',
+            'shape of train data': self.train_X.shape,
+            'shape of test data': self.test_X.shape,
+            'shape of valid data': self.valid_X.shape,
+            'initial random seed': self.seed,
+            'task_id': self.task_id
+        }
+
+    def init_model(self, config, fidelity=None, rng=None):
+        """ Function that returns the model initialized based on the configuration and fidelity
+        """
+        raise NotImplementedError()
+
     def get_config(self, size=None):
         """Samples configuration(s) from the (hyper) parameter space
         """
         if size is None:  # return only one config
-            return self.x_cs.sample_configuration()
-        return [self.x_cs.sample_configuration() for i in range(size)]
+            return self.configuration_space.sample_configuration()
+        return [self.configuration_space.sample_configuration() for i in range(size)]
 
     def get_fidelity(self, size=None):
         """Samples candidate fidelities from the fidelity space
         """
         if size is None:  # return only one config
-            return self.z_cs.sample_configuration()
-        return [self.z_cs.sample_configuration() for i in range(size)]
-
-    def _convert_labels(self, labels):
-        """Converts boolean labels (if exists) to strings
-        """
-        label_types = list(map(lambda x: isinstance(x, bool), labels))
-        if np.all(label_types):
-            _labels = list(map(lambda x: str(x), labels))
-            if isinstance(labels, pd.Series):
-                labels = pd.Series(_labels, index=labels.index)
-            elif isinstance(labels, np.array):
-                labels = np.array(labels)
-        return labels
-
-    def load_data_from_openml(self, valid_size=None, verbose=False):
-        """Fetches data from OpenML and initializes the train-validation-test data splits
-
-        The validation set is fixed till this function is called again or explicitly altered
-        """
-        # fetches task
-        self.task = openml.tasks.get_task(self.task_id, download_data=False)
-        self.n_classes = len(self.task.class_labels)
-        # fetches dataset
-        self.dataset = openml.datasets.get_dataset(self.task.dataset_id, download_data=False)
-        if verbose:
-            print(self.task, '\n')
-            print(self.dataset, '\n')
-
-        # check if the path to data splits is valid
-        if self.data_path is not None and os.path.isdir(self.data_path):
-            data_path = os.path.join(self.data_path, str(self.task_id))
-            data_str = os.path.join(data_path, "{}_{}.parquet.gzip")
-            required_file_list = [
-                ("train", "x"), ("train", "y"),
-                ("valid", "x"), ("valid", "y"),
-                ("test", "x"), ("test", "y")
-            ]
-            for files in required_file_list:
-                if not os.path.isfile(data_str.format("train", "x")):
-                    raise FileNotFoundError("{} not found!".format(data_str.format(*files)))
-            # ignore the remaining data loaders and preprocessors as valid data splits available
-            return
-
-        # loads full data
-        X, y, categorical_ind, feature_names = self.dataset.get_data(
-            target=self.task.target_name, dataset_format="dataframe"
-        )
-        categorical_ind = np.array(categorical_ind)
-        (cat_idx,) = np.where(categorical_ind)
-        (cont_idx,) = np.where(~categorical_ind)
-
-        # splitting dataset into train and test (10% test)
-        # train-test split is fixed for a task and its associated dataset (from OpenML)
-        self.train_idx, self.test_idx = self.task.get_train_test_split_indices()
-        train_x = X.iloc[self.train_idx]
-        train_y = y.iloc[self.train_idx]
-        self.test_X = X.iloc[self.test_idx]
-        self.test_y = y.iloc[self.test_idx]
-
-        # splitting training into training and validation
-        # validation set is fixed as per the global seed independent of the benchmark seed
-        valid_size = self.valid_size if valid_size is None else valid_size
-        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
-            train_x, train_y, test_size=valid_size, shuffle=True, stratify=train_y,
-            random_state=check_random_state(self.global_seed)
-        )
-
-        # preprocessor to handle missing values, categorical columns encodings,
-        # and scaling numeric columns
-        self.preprocessor = make_pipeline(
-            ColumnTransformer([
-                (
-                    "cat",
-                    make_pipeline(SimpleImputer(strategy="most_frequent"),
-                                  OneHotEncoder(sparse=False, handle_unknown="ignore")),
-                    cat_idx.tolist(),
-                ),
-                (
-                    "cont",
-                    make_pipeline(SimpleImputer(strategy="median"),
-                                  StandardScaler()),
-                    cont_idx.tolist(),
-                )
-            ])
-        )
-        if verbose:
-            print("Shape of data pre-preprocessing: {}".format(self.train_X.shape))
-
-        # preprocessor fit only on the training set
-        self.train_X = self.preprocessor.fit_transform(self.train_X)
-        # applying preprocessor built on the training set, across validation and test splits
-        self.valid_X = self.preprocessor.transform(self.valid_X)
-        self.test_X = self.preprocessor.transform(self.test_X)
-        # converting boolean labels to strings
-        self.train_y = self._convert_labels(self.train_y)
-        self.valid_y = self._convert_labels(self.valid_y)
-        self.test_y = self._convert_labels(self.test_y)
-
-        # Similar to (https://arxiv.org/pdf/1605.07079.pdf)
-        # use 10 times the number of classes as lower bound for the dataset fraction
-        self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0]
-        self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size))
-
-        if verbose:
-            print("Shape of data post-preprocessing: {}".format(self.train_X.shape), "\n")
-
-        if verbose:
-            print("\nTraining data (X, y): ({}, {})".format(self.train_X.shape, self.train_y.shape))
-            print("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
-            print("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
-            print("\nData loading complete!\n")
-        return
+            return self.fidelity_space.sample_configuration()
+        return [self.fidelity_space.sample_configuration() for i in range(size)]
 
     def shuffle_data_idx(self, train_idx=None, rng=None):
         rng = self.rng if rng is None else rng
@@ -229,11 +134,6 @@ def shuffle_data_idx(self, train_idx=None, rng=None):
         rng.shuffle(train_idx)
         return train_idx
 
-    def init_model(self, config, fidelity=None, rng=None):
-        """ Function that returns the model initialized based on the configuration and fidelity
-        """
-        raise NotImplementedError()
-
     def _train_objective(self, config, fidelity, shuffle, rng, evaluation="valid"):
         # initializing model
         model = self.init_model(config, fidelity, rng)
@@ -282,14 +182,14 @@ def _train_objective(self, config, fidelity, shuffle, rng, evaluation="valid"):
         train_loss = 1 - scores["acc"]
         return model, model_fit_time, train_loss, scores, score_cost
 
-    def objective_function(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function(self,
+                           configuration: Union[CS.Configuration, Dict],
+                           fidelity: Union[CS.Configuration, Dict, None] = None,
+                           shuffle: bool = False,
+                           rng: Union[np.random.RandomState, int, None] = None,
+                           **kwargs) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the validation set
         """
         model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
@@ -333,14 +233,14 @@ def objective_function(
             'info': info
         }
 
-    def objective_function_test(
-            self,
-            configuration: Union[CS.Configuration, Dict],
-            fidelity: Union[CS.Configuration, Dict, None] = None,
-            shuffle: bool = False,
-            rng: Union[np.random.RandomState, int, None] = None,
-            **kwargs
-    ) -> Dict:
+    # pylint: disable=arguments-differ
+    @AbstractBenchmark.check_parameters
+    def objective_function_test(self,
+                                configuration: Union[CS.Configuration, Dict],
+                                fidelity: Union[CS.Configuration, Dict, None] = None,
+                                shuffle: bool = False,
+                                rng: Union[np.random.RandomState, int, None] = None,
+                                **kwargs) -> Dict:
         """Function that evaluates a 'config' on a 'fidelity' on the test set
         """
         model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective(
@@ -375,14 +275,3 @@ def objective_function_test(
             'cost': model_fit_time + info['test_costs']['acc'],
             'info': info
         }
-
-    def get_meta_information(self):
-        """ Returns the meta information for the benchmark """
-        return {
-            'name': 'Support Vector Machine',
-            'shape of train data': self.train_X.shape,
-            'shape of test data': self.test_X.shape,
-            'shape of valid data': self.valid_X.shape,
-            'initial random seed': self.seed,
-            'task_id': self.task_id
-        }

From 9dbd61c6255f2ac1ecde3e9f5250d78d9698bba5 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Wed, 11 Aug 2021 17:55:58 +0200
Subject: [PATCH 51/95] Corret Datamanager.

But how to download the task data to disk?
---
 hpobench/dependencies/ml/data_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hpobench/dependencies/ml/data_manager.py b/hpobench/dependencies/ml/data_manager.py
index 9cc7f5f7..d65ee62c 100644
--- a/hpobench/dependencies/ml/data_manager.py
+++ b/hpobench/dependencies/ml/data_manager.py
@@ -76,7 +76,7 @@ def load(self, valid_size=None, verbose=False):
             ]
             for files in required_file_list:
                 data_str = "{}_{}.parquet.gzip".format(*files)
-                if (data_path / data_str).exists():
+                if not (data_path / data_str).exists():
                     raise FileNotFoundError("{} not found!".format(data_str.format(*files)))
             # ignore the remaining data loaders and preprocessors as valid data splits available
             return

From 0304146fa91d818178cb3b98bea9b445edfe0d2f Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Wed, 11 Aug 2021 17:57:20 +0200
Subject: [PATCH 52/95] Finalize HistGB Benchmarks

- split them into multiple benchmarks according to their fidelity spaces.
---
 hpobench/benchmarks/ml/__init__.py         |   7 --
 hpobench/benchmarks/ml/histgb_benchmark.py | 123 ++++++++++++++-------
 2 files changed, 82 insertions(+), 48 deletions(-)

diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index 37d5cd33..e69de29b 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -1,7 +0,0 @@
-from .tabular_benchmark import TabularBenchmark
-from .svm_benchmark import SVMBenchmark
-from .rf_benchmark import RandomForestBenchmark
-from .xgboost_benchmark import XGBoostBenchmark
-from .histgb_benchmark import HistGBBenchmark
-from .lr_benchmark import LRBenchmark
-from .nn_benchmark import NNBenchmark
\ No newline at end of file
diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index b431c056..332dadc4 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -1,31 +1,25 @@
-import numpy as np
 import ConfigSpace as CS
-from copy import deepcopy
+import numpy as np
 from typing import Union
 
 # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.ensemble import HistGradientBoostingClassifier
 
-from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
 class HistGBBenchmark(MLBenchmark):
-    def __init__(
-            self,
-            task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
-            valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            data_path: Union[str, None] = None
-    ):
-        super(HistGBBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice, data_path)
-        pass
+    def __init__(self,
+                 task_id: Union[int, None] = None,
+                 rng: Union[np.random.RandomState, int, None] = None,
+                 valid_size: float = 0.33,
+                 data_path: Union[str, None] = None):
+        super(HistGBBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
-    def get_configuration_space(seed=None):
-        """Parameter space to be optimized --- contains the hyperparameters
-        """
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """Parameter space to be optimized --- contains the hyperparameters"""
         cs = CS.ConfigurationSpace(seed=seed)
 
         cs.add_hyperparameters([
@@ -45,21 +39,24 @@ def get_configuration_space(seed=None):
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=1):
+    def _get_fidelity_choices(ntrees_choice: str, subsample_choice: str):
         """Fidelity space available --- specifies the fidelity dimensions
 
-        If fidelity_choice is 0
+        If SearchSpace is 0
             Fidelity space is the maximal fidelity, akin to a black-box function
-        If fidelity_choice is 1
+        If SearchSpace is 1
             Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
-        If fidelity_choice is 2
+        If SearchSpace is 2
             Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
-        If fidelity_choice is >2
+        If SearchSpace is >2
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
-        z_cs = CS.ConfigurationSpace(seed=seed)
+        assert ntrees_choice in ['fixed', 'variable']
+        assert subsample_choice in ['fixed', 'variable']
+
         fidelity1 = dict(
-            fixed=CS.Constant('n_estimators', value=100),
+            # TODO: this value was 100 in the original code. Please check if 100 or 1000.
+            fixed=CS.Constant('n_estimators', value=1000),
             variable=CS.UniformIntegerHyperparameter(
                 'n_estimators', lower=100, upper=1000, default_value=1000, log=False
             )
@@ -70,24 +67,24 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         )
-        if fidelity_choice == 0:
-            # black-box setting (full fidelity)
-            ntrees = fidelity1["fixed"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 1:
-            # gray-box setting (multi-fidelity) - ntrees
-            ntrees = fidelity1["variable"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 2:
-            # gray-box setting (multi-fidelity) - data subsample
-            ntrees = fidelity1["fixed"]
-            subsample = fidelity2["variable"]
-        else:
-            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
-            ntrees = fidelity1["variable"]
-            subsample = fidelity2["variable"]
-        z_cs.add_hyperparameters([ntrees, subsample])
-        return z_cs
+        ntrees = fidelity1[ntrees_choice]
+        subsample = fidelity2[subsample_choice]
+        return ntrees, subsample
+
+    @staticmethod
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        """Fidelity space available --- specifies the fidelity dimensions
+
+        If fidelity_choice is 0
+            Fidelity space is the maximal fidelity, akin to a black-box function
+        If fidelity_choice is 1
+            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
+        If fidelity_choice is 2
+            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
+        If fidelity_choice is >2
+            Fidelity space is multi-multi fidelity, all possible fidelities
+        """
+        raise NotImplementedError()
 
     def init_model(self, config, fidelity=None, rng=None):
         """ Function that returns the model initialized based on the configuration and fidelity
@@ -100,3 +97,47 @@ def init_model(self, config, fidelity=None, rng=None):
             random_state=rng
         )
         return model
+
+
+class HistGBSearchSpace0Benchmark(HistGBBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # black-box setting (full fidelity)
+            HistGBBenchmark._get_fidelity_choices(ntrees_choice='fixed', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class HistGBSearchSpace1Benchmark(HistGBBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - ntrees
+            HistGBBenchmark._get_fidelity_choices(ntrees_choice='variable', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class HistGBSearchSpace2Benchmark(HistGBBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - subsample
+            HistGBBenchmark._get_fidelity_choices(ntrees_choice='fixed', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+class HistGBSearchSpace3Benchmark(HistGBBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            HistGBBenchmark._get_fidelity_choices(ntrees_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+__all__ = [HistGBSearchSpace0Benchmark, HistGBSearchSpace1Benchmark,
+           HistGBSearchSpace2Benchmark, HistGBSearchSpace3Benchmark]

From 3e95d191f379baecb759e3dff81a62d8838a5359 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 16 Aug 2021 18:11:35 +0200
Subject: [PATCH 53/95] Write OpenML Datamanager

---
 hpobench/dependencies/ml/data_manager.py      | 73 ++++++++++++++-----
 .../dependencies/ml/ml_benchmark_template.py  |  7 +-
 2 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/hpobench/dependencies/ml/data_manager.py b/hpobench/dependencies/ml/data_manager.py
index d65ee62c..244cd0cf 100644
--- a/hpobench/dependencies/ml/data_manager.py
+++ b/hpobench/dependencies/ml/data_manager.py
@@ -11,7 +11,7 @@
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
-
+from oslo_concurrency import lockutils
 
 from hpobench.util.data_manager import DataManager
 
@@ -48,9 +48,13 @@ def __init__(self, task_id: int,
         if data_path is None:
             data_path = config_file.data_dir / "OpenML"
 
-        self.data_path = data_path
+        self.data_path = Path(data_path)
+        openml.config.set_cache_directory(str(self.data_path))
+
         super(OpenMLDataManager, self).__init__()
 
+    @lockutils.synchronized('not_thread_process_safe', external=True,
+                            lock_path=f'{config_file.cache_dir}/openml_dm_lock', delay=0.2)
     def load(self, valid_size=None, verbose=False):
         """Fetches data from OpenML and initializes the train-validation-test data splits
 
@@ -66,25 +70,42 @@ def load(self, valid_size=None, verbose=False):
             self.logger.debug(self.task)
             self.logger.debug(self.dataset)
 
-        # check if the path to data splits is valid
-        if self.data_path is not None and self.data_path.is_dir():
-            data_path = self.data_path / str(self.task_id)
-            required_file_list = [
-                ("train", "x"), ("train", "y"),
-                ("valid", "x"), ("valid", "y"),
-                ("test", "x"), ("test", "y")
-            ]
-            for files in required_file_list:
-                data_str = "{}_{}.parquet.gzip".format(*files)
-                if not (data_path / data_str).exists():
-                    raise FileNotFoundError("{} not found!".format(data_str.format(*files)))
-            # ignore the remaining data loaders and preprocessors as valid data splits available
+        data_set_path = self.data_path / "org/openml/www/datasets" / str(self.task.dataset_id)
+        successfully_loaded = self.try_to_load_data(data_set_path)
+        if successfully_loaded:
+            self.logger.info(f'Successfully loaded the preprocessed splits from '
+                             f'{data_set_path}')
             return
 
+        # If the data is not available, download it.
+        self.__download_data(verbose=verbose, valid_size=valid_size)
+
+        # Save the preprocessed splits to file for later usage.
+        self.generate_openml_splits(data_set_path)
+
+        return
+
+    def try_to_load_data(self, data_path: Path) -> bool:
+        path_str = "{}_{}.parquet.gzip"
+        try:
+            self.train_X = pd.read_parquet(data_path / path_str.format("train", "x")).to_numpy()
+            self.train_y = pd.read_parquet(data_path / path_str.format("train", "y")).squeeze(axis=1)
+            self.valid_X = pd.read_parquet(data_path / path_str.format("valid", "x")).to_numpy()
+            self.valid_y = pd.read_parquet(data_path / path_str.format("valid", "y")).squeeze(axis=1)
+            self.test_X = pd.read_parquet(data_path / path_str.format("test", "x")).to_numpy()
+            self.test_y = pd.read_parquet(data_path / path_str.format("test", "y")).squeeze(axis=1)
+        except FileNotFoundError:
+            return False
+        return True
+
+    def __download_data(self, valid_size: Union[int, float, None], verbose: bool):
+        self.logger.info(f'Start to download the OpenML dataset')
+
         # loads full data
-        X, y, categorical_ind, feature_names = self.dataset.get_data(
-            target=self.task.target_name, dataset_format="dataframe"
-        )
+        X, y, categorical_ind, feature_names = self.dataset.get_data(target=self.task.target_name,
+                                                                     dataset_format="dataframe")
+        assert Path(self.dataset.data_file).exists(), f'The datafile {self.dataset.data_file} does not exists.'
+
         categorical_ind = np.array(categorical_ind)
         (cat_idx,) = np.where(categorical_ind)
         (cont_idx,) = np.where(~categorical_ind)
@@ -147,7 +168,21 @@ def load(self, valid_size=None, verbose=False):
             self.logger.debug("Validation data (X, y): ({}, {})".format(self.valid_X.shape, self.valid_y.shape))
             self.logger.debug("Test data (X, y): ({}, {})".format(self.test_X.shape, self.test_y.shape))
             self.logger.debug("\nData loading complete!\n")
-        return
+
+    def generate_openml_splits(self, data_path):
+        """ Store the created splits to file for later use… """
+        self.logger.info(f'Save the splits to {data_path}')
+
+        path_str = "{}_{}.parquet.gzip"
+        colnames = np.arange(self.train_X.shape[1]).astype(str)
+        label_name = str(self.task.target_name)
+
+        pd.DataFrame(self.train_X, columns=colnames).to_parquet(data_path / path_str.format("train", "x"))
+        self.train_y.to_frame(label_name).to_parquet(data_path / path_str.format("train", "y"))
+        pd.DataFrame(self.valid_X, columns=colnames).to_parquet(data_path / path_str.format("valid", "x"))
+        self.valid_y.to_frame(label_name).to_parquet(data_path / path_str.format("valid", "y"))
+        pd.DataFrame(self.test_X, columns=colnames).to_parquet(data_path / path_str.format("test", "x"))
+        self.test_y.to_frame(label_name).to_parquet(data_path / path_str.format("test", "y"))
 
     @staticmethod
     def _convert_labels(labels):
diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
index 54029736..c773e830 100644
--- a/hpobench/dependencies/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -53,7 +53,12 @@ def __init__(
         self.scorers = dict()
         for k, v in metrics.items():
             self.scorers[k] = make_scorer(v, **metrics_kwargs[k])
-        self.data_path = data_path
+
+        if data_path is None:
+            from hpobench import config_file
+            data_path = config_file.data_dir / "OpenML"
+
+        self.data_path = Path(data_path)
 
         dm = OpenMLDataManager(task_id, valid_size, data_path, global_seed)
         dm.load()

From f3fbd584249707292be1c354c565fcfea6f03bac Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 16 Aug 2021 18:52:17 +0200
Subject: [PATCH 54/95] Unify interface for the other ml benchmarks.

---
 hpobench/benchmarks/ml/histgb_benchmark.py  |  37 +++----
 hpobench/benchmarks/ml/lr_benchmark.py      | 104 +++++++++++++------
 hpobench/benchmarks/ml/nn_benchmark.py      | 105 ++++++++++++-------
 hpobench/benchmarks/ml/rf_benchmark.py      | 109 +++++++++++++-------
 hpobench/benchmarks/ml/svm_benchmark.py     |  67 +++++++-----
 hpobench/benchmarks/ml/xgboost_benchmark.py | 108 ++++++++++++-------
 6 files changed, 337 insertions(+), 193 deletions(-)

diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
index 332dadc4..929f2bbf 100644
--- a/hpobench/benchmarks/ml/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -1,10 +1,11 @@
+from typing import Union, Tuple
+
 import ConfigSpace as CS
 import numpy as np
-from typing import Union
-
+from ConfigSpace.hyperparameters import Hyperparameter
+from sklearn.ensemble import HistGradientBoostingClassifier
 # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
-from sklearn.ensemble import HistGradientBoostingClassifier
 
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
@@ -39,18 +40,23 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
         return cs
 
     @staticmethod
-    def _get_fidelity_choices(ntrees_choice: str, subsample_choice: str):
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Fidelity space available --- specifies the fidelity dimensions
 
-        If SearchSpace is 0
+        If fidelity_choice is 0
             Fidelity space is the maximal fidelity, akin to a black-box function
-        If SearchSpace is 1
+        If fidelity_choice is 1
             Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
-        If SearchSpace is 2
+        If fidelity_choice is 2
             Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
-        If SearchSpace is >2
+        If fidelity_choice is >2
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
+        raise NotImplementedError()
+
+    @staticmethod
+    def _get_fidelity_choices(ntrees_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
+
         assert ntrees_choice in ['fixed', 'variable']
         assert subsample_choice in ['fixed', 'variable']
 
@@ -71,21 +77,6 @@ def _get_fidelity_choices(ntrees_choice: str, subsample_choice: str):
         subsample = fidelity2[subsample_choice]
         return ntrees, subsample
 
-    @staticmethod
-    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """Fidelity space available --- specifies the fidelity dimensions
-
-        If fidelity_choice is 0
-            Fidelity space is the maximal fidelity, akin to a black-box function
-        If fidelity_choice is 1
-            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
-        If fidelity_choice is 2
-            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
-        If fidelity_choice is >2
-            Fidelity space is multi-multi fidelity, all possible fidelities
-        """
-        raise NotImplementedError()
-
     def init_model(self, config, fidelity=None, rng=None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
diff --git a/hpobench/benchmarks/ml/lr_benchmark.py b/hpobench/benchmarks/ml/lr_benchmark.py
index de791aa6..a7e1f857 100644
--- a/hpobench/benchmarks/ml/lr_benchmark.py
+++ b/hpobench/benchmarks/ml/lr_benchmark.py
@@ -1,22 +1,22 @@
-import ConfigSpace as CS
-from typing import Union, List, Dict
+from typing import Union, Tuple
 
+import ConfigSpace as CS
+import numpy as np
+from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.linear_model import SGDClassifier
 
-from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class LRBenchmark(MLBenchmark):
-    def __init__(
-            self,
-            task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
-            valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            data_path: Union[str, None] = None
-    ):
-        super(LRBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice, data_path)
-        self.cache_size = 500
+class LRBaseBenchmark(MLBenchmark):
+    def __init__(self,
+                 task_id: Union[int, None] = None,
+                 rng: Union[np.random.RandomState, int, None] = None,
+                 valid_size: float = 0.33,
+                 data_path: Union[str, None] = None):
+
+        super(LRBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+        self.cache_size = 500  # TODO: Do we need this?
 
     @staticmethod
     def get_configuration_space(seed=None):
@@ -33,8 +33,11 @@ def get_configuration_space(seed=None):
         ])
         return cs
 
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        raise NotImplementedError()
+
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=None):
+    def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
         """Fidelity space available --- specifies the fidelity dimensions
 
         For SVM, only a single fidelity exists, i.e., subsample fraction.
@@ -44,7 +47,10 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
             parameterizes the fraction of data to subsample
 
         """
-        z_cs = CS.ConfigurationSpace(seed=seed)
+
+        assert iter_choice in ['fixed', 'variable']
+        assert subsample_choice in ['fixed', 'variable']
+
         fidelity1 = dict(
             fixed=CS.Constant('iter', value=1000),
             variable=CS.UniformIntegerHyperparameter(
@@ -57,24 +63,10 @@ def get_fidelity_space(seed=None, fidelity_choice=None):
                 'subsample', lower=0.1, upper=1.0, default_value=1.0, log=False
             )
         )
-        if fidelity_choice == 0:
-            # black-box setting (full fidelity)
-            iter = fidelity1["fixed"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 1:
-            # gray-box setting (multi-fidelity) - iterations
-            iter = fidelity1["variable"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 2:
-            # gray-box setting (multi-fidelity) - data subsample
-            iter = fidelity1["fixed"]
-            subsample = fidelity2["variable"]
-        else:
-            # gray-box setting (multi-multi-fidelity) - iterations + data subsample
-            iter = fidelity1["variable"]
-            subsample = fidelity2["variable"]
-        z_cs.add_hyperparameters([iter, subsample])
-        return z_cs
+
+        iter = fidelity1[iter_choice]
+        subsample = fidelity2[subsample_choice]
+        return iter, subsample
 
     def init_model(self, config, fidelity=None, rng=None):
         # initializing model
@@ -89,3 +81,47 @@ def init_model(self, config, fidelity=None, rng=None):
             random_state=rng,
         )
         return model
+
+
+class LRSearchSpace0Benchmark(LRBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # black-box setting (full fidelity)
+            LRBaseBenchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class LRSearchSpace1Benchmark(LRBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - iterations
+            LRBaseBenchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class LRSearchSpace2Benchmark(LRBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - data subsample
+            LRBaseBenchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+class LRSearchSpace3Benchmark(LRBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - iterations + data subsample
+            LRBaseBenchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+__all__ = [LRSearchSpace0Benchmark, LRSearchSpace1Benchmark,
+           LRSearchSpace2Benchmark, LRSearchSpace3Benchmark]
diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py
index 2c92b371..fd6e0a51 100644
--- a/hpobench/benchmarks/ml/nn_benchmark.py
+++ b/hpobench/benchmarks/ml/nn_benchmark.py
@@ -1,25 +1,21 @@
-import numpy as np
-import ConfigSpace as CS
 from copy import deepcopy
 from typing import Union, Tuple
+
+import ConfigSpace as CS
+import numpy as np
+from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.neural_network import MLPClassifier
 
-from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class NNBenchmark(MLBenchmark):
-    def __init__(
-            self,
-            task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
-            valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            data_path: Union[str, None] = None
-    ):
-        super(NNBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, data_path
-        )
-        pass
+class NNBaseBenchmark(MLBenchmark):
+    def __init__(self,
+                 task_id: Union[int, None] = None,
+                 rng: Union[np.random.RandomState, int, None] = None,
+                 valid_size: float = 0.33,
+                 data_path: Union[str, None] = None):
+        super(NNBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
     def get_configuration_space(seed=None):
@@ -28,7 +24,9 @@ def get_configuration_space(seed=None):
         cs = CS.ConfigurationSpace(seed=seed)
 
         cs.add_hyperparameters([
-            CS.UniformIntegerHyperparameter('depth', default_value=3, lower=1, upper=3, log=False),
+            CS.UniformIntegerHyperparameter(
+                'depth', default_value=3, lower=1, upper=3, log=False
+            ),
             CS.UniformIntegerHyperparameter(
                 'width', default_value=64, lower=16, upper=1024, log=True
             ),
@@ -45,7 +43,7 @@ def get_configuration_space(seed=None):
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=1):
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Fidelity space available --- specifies the fidelity dimensions
 
         If fidelity_choice is 0
@@ -57,7 +55,11 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         If fidelity_choice is >2
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
-        z_cs = CS.ConfigurationSpace(seed=seed)
+        raise NotImplementedError()
+
+    @staticmethod
+    def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
+
         fidelity1 = dict(
             fixed=CS.Constant('iter', value=100),
             variable=CS.UniformIntegerHyperparameter(
@@ -70,24 +72,9 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         )
-        if fidelity_choice == 0:
-            # black-box setting (full fidelity)
-            iter = fidelity1["fixed"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 1:
-            # gray-box setting (multi-fidelity) - epochs/iteration
-            iter = fidelity1["variable"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 2:
-            # gray-box setting (multi-fidelity) - data subsample
-            iter = fidelity1["fixed"]
-            subsample = fidelity2["variable"]
-        else:
-            # gray-box setting (multi-multi-fidelity) - epochs + data subsample
-            iter = fidelity1["variable"]
-            subsample = fidelity2["variable"]
-        z_cs.add_hyperparameters([iter, subsample])
-        return z_cs
+        iter = fidelity1[iter_choice]
+        subsample = fidelity2[subsample_choice]
+        return iter, subsample
 
     def init_model(self, config, fidelity=None, rng=None):
         """ Function that returns the model initialized based on the configuration and fidelity
@@ -108,3 +95,47 @@ def init_model(self, config, fidelity=None, rng=None):
             random_state=rng
         )
         return model
+
+
+class NNSearchSpace0Benchmark(NNBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # black-box setting (full fidelity)
+            NNSearchSpace0Benchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class NNSearchSpace1Benchmark(NNBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - iterations
+            NNSearchSpace1Benchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class NNSearchSpace2Benchmark(NNBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - subsample
+            NNSearchSpace2Benchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+class NNSearchSpace3Benchmark(NNBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - iterations + data subsample
+            NNSearchSpace3Benchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+__all__ = [NNSearchSpace0Benchmark, NNSearchSpace1Benchmark,
+           NNSearchSpace2Benchmark, NNSearchSpace3Benchmark]
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
index 70e02bdb..0ae819a6 100644
--- a/hpobench/benchmarks/ml/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -1,25 +1,21 @@
-import numpy as np
-import ConfigSpace as CS
-from typing import Union
 from copy import deepcopy
+from typing import Union, Tuple
+
+import ConfigSpace as CS
+import numpy as np
+from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.ensemble import RandomForestClassifier
 
-from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class RandomForestBenchmark(MLBenchmark):
-    def __init__(
-            self,
-            task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
-            valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            data_path: Union[str, None] = None
-    ):
-        super(RandomForestBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, data_path
-        )
-        pass
+class RandomForestBaseBenchmark(MLBenchmark):
+    def __init__(self,
+                 task_id: Union[int, None] = None,
+                 rng: Union[np.random.RandomState, int, None] = None,
+                 valid_size: float = 0.33,
+                 data_path: Union[str, None] = None):
+        super(RandomForestBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
     def get_configuration_space(seed=None):
@@ -44,7 +40,7 @@ def get_configuration_space(seed=None):
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=1):
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Fidelity space available --- specifies the fidelity dimensions
 
         If fidelity_choice is 0
@@ -56,37 +52,30 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         If fidelity_choice is >2
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
-        z_cs = CS.ConfigurationSpace(seed=seed)
+        raise NotImplementedError()
+
+    @staticmethod
+    def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
+
+        assert n_estimators_choice in ['fixed', 'variable']
+        assert subsample_choice in ['fixed', 'variable']
+
         fidelity1 = dict(
-            fixed=CS.Constant('n_estimators', value=100),
+            fixed=CS.Constant('n_estimators', value=100),  # TODO: is the default value here 100 or 512?
             variable=CS.UniformIntegerHyperparameter(
                 'n_estimators', lower=16, upper=512, default_value=512, log=False
             )
         )
+
         fidelity2 = dict(
             fixed=CS.Constant('subsample', value=1),
             variable=CS.UniformFloatHyperparameter(
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         )
-        if fidelity_choice == 0:
-            # black-box setting (full fidelity)
-            ntrees = fidelity1["fixed"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 1:
-            # gray-box setting (multi-fidelity) - ntrees
-            ntrees = fidelity1["variable"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 2:
-            # gray-box setting (multi-fidelity) - data subsample
-            ntrees = fidelity1["fixed"]
-            subsample = fidelity2["variable"]
-        else:
-            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
-            ntrees = fidelity1["variable"]
-            subsample = fidelity2["variable"]
-        z_cs.add_hyperparameters([ntrees, subsample])
-        return z_cs
+        n_estimators = fidelity1[n_estimators_choice]
+        subsample = fidelity2[subsample_choice]
+        return n_estimators, subsample
 
     def init_model(self, config, fidelity=None, rng=None):
         """ Function that returns the model initialized based on the configuration and fidelity
@@ -102,3 +91,47 @@ def init_model(self, config, fidelity=None, rng=None):
             random_state=rng
         )
         return model
+
+
+class RandomForestSearchSpace0Benchmark(RandomForestBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # black-box setting (full fidelity)
+            RandomForestBaseBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class RandomForestSearchSpace1Benchmark(RandomForestBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - ntrees
+            RandomForestBaseBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class RandomForestSearchSpace2Benchmark(RandomForestBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - data subsample
+            RandomForestBaseBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+class RandomForestSearchSpace3Benchmark(RandomForestBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            RandomForestBaseBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+__all__ = [RandomForestSearchSpace0Benchmark, RandomForestSearchSpace1Benchmark,
+           RandomForestSearchSpace2Benchmark, RandomForestSearchSpace3Benchmark]
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
index fe1afb66..a61515f5 100644
--- a/hpobench/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -1,21 +1,21 @@
-import ConfigSpace as CS
 from typing import Union
 
+import ConfigSpace as CS
+import numpy as np
+from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.svm import SVC
 
-from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class SVMBenchmark(MLBenchmark):
-    def __init__(
-            self,
-            task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
-            valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            data_path: Union[str, None] = None
-    ):
-        super(SVMBenchmark, self).__init__(task_id, seed, valid_size, fidelity_choice, data_path)
+class SVMBaseBenchmark(MLBenchmark):
+    def __init__(self,
+                 task_id: Union[int, None] = None,
+                 rng: Union[np.random.RandomState, int, None] = None,
+                 valid_size: float = 0.33,
+                 data_path: Union[str, None] = None):
+        super(SVMBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+
         self.cache_size = 200
 
     @staticmethod
@@ -35,31 +35,32 @@ def get_configuration_space(seed=None):
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=None):
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Fidelity space available --- specifies the fidelity dimensions
 
         For SVM, only a single fidelity exists, i.e., subsample fraction.
         if fidelity_choice == 0
             uses the entire data (subsample=1), reflecting the black-box setup
         else
-            parameterizes the fraction of data to subsample
+            parameterize the fraction of data to subsample
 
         """
-        z_cs = CS.ConfigurationSpace(seed=seed)
+        raise NotImplementedError()
+
+    @staticmethod
+    def _get_fidelity_choices(subsample_choice: str) -> Hyperparameter:
+
+        assert subsample_choice in ['fixed', 'variable']
+
         fidelity = dict(
             fixed=CS.Constant('subsample', value=1),
             variable=CS.UniformFloatHyperparameter(
                 'subsample', lower=0.1, upper=1.0, default_value=1.0, log=False
             )
         )
-        if fidelity_choice == 0:
-            # black-box setting (full fidelity)
-            subsample = fidelity["fixed"]
-        else:
-            # gray-box setting (multi-fidelity) - data subsample
-            subsample = fidelity["variable"]
-        z_cs.add_hyperparameter(subsample)
-        return z_cs
+        subsample = fidelity[subsample_choice]
+
+        return subsample
 
     def init_model(self, config, fidelity=None, rng=None):
         # initializing model
@@ -71,3 +72,23 @@ def init_model(self, config, fidelity=None, rng=None):
             cache_size=self.cache_size
         )
         return model
+
+
+class SVMSearchSpace0Benchmark(SVMBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameter(
+            # uses the entire data (subsample=1), reflecting the black-box setup
+            SVMBaseBenchmark._get_fidelity_choices(subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class SVMSearchSpace1Benchmark(SVMBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameter(
+            # parameterize the fraction of data to subsample
+            SVMBaseBenchmark._get_fidelity_choices(subsample_choice='fixed')
+        )
+        return fidelity_space
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
index 0fe3f07c..ca395f92 100644
--- a/hpobench/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -1,25 +1,20 @@
-import numpy as np
-import ConfigSpace as CS
-from typing import Union, Dict
+from typing import Union, Tuple
 
+import ConfigSpace as CS
+import numpy as np
 import xgboost as xgb
+from ConfigSpace.hyperparameters import Hyperparameter
 
-from hpobench.benchmarks.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class XGBoostBenchmark(MLBenchmark):
-    def __init__(
-            self,
-            task_id: Union[int, None] = None,
-            seed: Union[int, None] = None,  # Union[np.random.RandomState, int, None] = None,
-            valid_size: float = 0.33,
-            fidelity_choice: int = 1,
-            data_path: Union[str, None] = None
-    ):
-        super(XGBoostBenchmark, self).__init__(
-            task_id, seed, valid_size, fidelity_choice, data_path
-        )
-        pass
+class XGBoostBaseBenchmark(MLBenchmark):
+    def __init__(self,
+                 task_id: Union[int, None] = None,
+                 rng: Union[np.random.RandomState, int, None] = None,
+                 valid_size: float = 0.33,
+                 data_path: Union[str, None] = None):
+        super(XGBoostBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
     def get_configuration_space(seed=None):
@@ -44,7 +39,7 @@ def get_configuration_space(seed=None):
         return cs
 
     @staticmethod
-    def get_fidelity_space(seed=None, fidelity_choice=1):
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Fidelity space available --- specifies the fidelity dimensions
 
         If fidelity_choice is 0
@@ -56,9 +51,16 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
         If fidelity_choice is >2
             Fidelity space is multi-multi fidelity, all possible fidelities
         """
-        z_cs = CS.ConfigurationSpace(seed=seed)
+        raise NotImplementedError()
+
+    @staticmethod
+    def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
+
+        assert n_estimators_choice in ['fixed', 'variable']
+        assert subsample_choice in ['fixed', 'variable']
+
         fidelity1 = dict(
-            fixed=CS.Constant('n_estimators', value=100),
+            fixed=CS.Constant('n_estimators', value=100),  # TODO: Should this be 1000 or 100?
             variable=CS.UniformIntegerHyperparameter(
                 'n_estimators', lower=50, upper=2000, default_value=1000, log=False
             )
@@ -69,24 +71,10 @@ def get_fidelity_space(seed=None, fidelity_choice=1):
                 'subsample', lower=0.1, upper=1, default_value=1, log=False
             )
         )
-        if fidelity_choice == 0:
-            # black-box setting (full fidelity)
-            ntrees = fidelity1["fixed"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 1:
-            # gray-box setting (multi-fidelity) - ntrees
-            ntrees = fidelity1["variable"]
-            subsample = fidelity2["fixed"]
-        elif fidelity_choice == 2:
-            # gray-box setting (multi-fidelity) - data subsample
-            ntrees = fidelity1["fixed"]
-            subsample = fidelity2["variable"]
-        else:
-            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
-            ntrees = fidelity1["variable"]
-            subsample = fidelity2["variable"]
-        z_cs.add_hyperparameters([ntrees, subsample])
-        return z_cs
+
+        n_estimators = fidelity1[n_estimators_choice]
+        subsample = fidelity2[subsample_choice]
+        return n_estimators, subsample
 
     def init_model(self, config, fidelity=None, rng=None):
         """ Function that returns the model initialized based on the configuration and fidelity
@@ -108,3 +96,47 @@ def init_model(self, config, fidelity=None, rng=None):
             **extra_args
         )
         return model
+
+
+class XGBoostSearchSpace0Benchmark(XGBoostBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # black-box setting (full fidelity)
+            XGBoostBaseBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class XGBoostSearchSpace1Benchmark(XGBoostBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - ntrees
+            XGBoostBaseBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed')
+        )
+        return fidelity_space
+
+
+class XGBoostSearchSpace2Benchmark(XGBoostBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-fidelity) - data subsample
+            XGBoostBaseBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+class XGBoostSearchSpace3Benchmark(XGBoostBaseBenchmark):
+    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            XGBoostBaseBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
+
+
+__all__ = [XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark,
+           XGBoostSearchSpace2Benchmark, XGBoostSearchSpace3Benchmark]

From e57fbcbb52bdba3097f7456673e6c1da04b5e168 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 16 Aug 2021 18:52:24 +0200
Subject: [PATCH 55/95] Flake + Pep

---
 hpobench/dependencies/ml/data_manager.py          |  2 +-
 hpobench/dependencies/ml/ml_benchmark_template.py | 12 +++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/hpobench/dependencies/ml/data_manager.py b/hpobench/dependencies/ml/data_manager.py
index 244cd0cf..55210933 100644
--- a/hpobench/dependencies/ml/data_manager.py
+++ b/hpobench/dependencies/ml/data_manager.py
@@ -99,7 +99,7 @@ def try_to_load_data(self, data_path: Path) -> bool:
         return True
 
     def __download_data(self, valid_size: Union[int, float, None], verbose: bool):
-        self.logger.info(f'Start to download the OpenML dataset')
+        self.logger.info('Start to download the OpenML dataset')
 
         # loads full data
         X, y, categorical_ind, feature_names = self.dataset.get_data(target=self.task.target_name,
diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
index c773e830..41be449a 100644
--- a/hpobench/dependencies/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -1,12 +1,10 @@
-import os
 import time
-import openml
-import numpy as np
-import pandas as pd
-import ConfigSpace as CS
-from typing import Union, Dict
 from pathlib import Path
+from typing import Union, Dict
 
+import ConfigSpace as CS
+import numpy as np
+import pandas as pd
 from sklearn.metrics import make_scorer, accuracy_score, balanced_accuracy_score, \
     precision_score, f1_score
 
@@ -89,7 +87,7 @@ def get_configuration_space(seed=None):
         raise NotImplementedError()
 
     @staticmethod
-    def get_fidelity_space(seed=None):
+    def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Fidelity space available --- specifies the fidelity dimensions
 
         If fidelity_choice is 0

From f6131ea29e49a6a6ec3511ecc4d368f346299ed0 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 16 Aug 2021 19:14:38 +0200
Subject: [PATCH 56/95] Add Container Interface

---
 hpobench/benchmarks/{ml => ml_mmfb}/README.md |  0
 hpobench/benchmarks/ml_mmfb/__init__.py       |  0
 .../{ml => ml_mmfb}/histgb_benchmark.py       |  0
 .../{ml => ml_mmfb}/lr_benchmark.py           |  0
 .../{ml => ml_mmfb}/nn_benchmark.py           |  0
 .../{ml => ml_mmfb}/rf_benchmark.py           |  0
 .../{ml => ml_mmfb}/svm_benchmark.py          |  0
 .../{ml => ml_mmfb}/tabular_benchmark.py      |  0
 .../{ml => ml_mmfb}/xgboost_benchmark.py      |  0
 .../container/benchmarks/ml_mmfb/__init__.py  |  0
 .../benchmarks/ml_mmfb/histgb_benchmark.py    | 42 +++++++++++++++++++
 .../benchmarks/ml_mmfb/lr_benchmark.py        | 42 +++++++++++++++++++
 .../benchmarks/ml_mmfb/nn_benchmark.py        | 42 +++++++++++++++++++
 .../benchmarks/ml_mmfb/rf_benchmark.py        | 42 +++++++++++++++++++
 .../benchmarks/ml_mmfb/svm_benchmark.py       | 25 +++++++++++
 .../benchmarks/ml_mmfb/tabular_benchmark.py   | 25 +++++++++++
 .../benchmarks/ml_mmfb/xgboost_benchmark.py   | 42 +++++++++++++++++++
 17 files changed, 260 insertions(+)
 rename hpobench/benchmarks/{ml => ml_mmfb}/README.md (100%)
 create mode 100644 hpobench/benchmarks/ml_mmfb/__init__.py
 rename hpobench/benchmarks/{ml => ml_mmfb}/histgb_benchmark.py (100%)
 rename hpobench/benchmarks/{ml => ml_mmfb}/lr_benchmark.py (100%)
 rename hpobench/benchmarks/{ml => ml_mmfb}/nn_benchmark.py (100%)
 rename hpobench/benchmarks/{ml => ml_mmfb}/rf_benchmark.py (100%)
 rename hpobench/benchmarks/{ml => ml_mmfb}/svm_benchmark.py (100%)
 rename hpobench/benchmarks/{ml => ml_mmfb}/tabular_benchmark.py (100%)
 rename hpobench/benchmarks/{ml => ml_mmfb}/xgboost_benchmark.py (100%)
 create mode 100644 hpobench/container/benchmarks/ml_mmfb/__init__.py
 create mode 100644 hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
 create mode 100644 hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
 create mode 100644 hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
 create mode 100644 hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
 create mode 100644 hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
 create mode 100644 hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
 create mode 100644 hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py

diff --git a/hpobench/benchmarks/ml/README.md b/hpobench/benchmarks/ml_mmfb/README.md
similarity index 100%
rename from hpobench/benchmarks/ml/README.md
rename to hpobench/benchmarks/ml_mmfb/README.md
diff --git a/hpobench/benchmarks/ml_mmfb/__init__.py b/hpobench/benchmarks/ml_mmfb/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/hpobench/benchmarks/ml/histgb_benchmark.py b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
similarity index 100%
rename from hpobench/benchmarks/ml/histgb_benchmark.py
rename to hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
diff --git a/hpobench/benchmarks/ml/lr_benchmark.py b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
similarity index 100%
rename from hpobench/benchmarks/ml/lr_benchmark.py
rename to hpobench/benchmarks/ml_mmfb/lr_benchmark.py
diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
similarity index 100%
rename from hpobench/benchmarks/ml/nn_benchmark.py
rename to hpobench/benchmarks/ml_mmfb/nn_benchmark.py
diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
similarity index 100%
rename from hpobench/benchmarks/ml/rf_benchmark.py
rename to hpobench/benchmarks/ml_mmfb/rf_benchmark.py
diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
similarity index 100%
rename from hpobench/benchmarks/ml/svm_benchmark.py
rename to hpobench/benchmarks/ml_mmfb/svm_benchmark.py
diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
similarity index 100%
rename from hpobench/benchmarks/ml/tabular_benchmark.py
rename to hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
similarity index 100%
rename from hpobench/benchmarks/ml/xgboost_benchmark.py
rename to hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
diff --git a/hpobench/container/benchmarks/ml_mmfb/__init__.py b/hpobench/container/benchmarks/ml_mmfb/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
new file mode 100644
index 00000000..77ed4bbb
--- /dev/null
+++ b/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the HistGB Benchmarks from hpobench/benchmarks/ml_mmfb/histgb_benchmark.py """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class HistGBSearchSpace0Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBSearchSpace0Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(HistGBSearchSpace0Benchmark, self).__init__(**kwargs)
+
+
+class HistGBSearchSpace1Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBSearchSpace1Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(HistGBSearchSpace1Benchmark, self).__init__(**kwargs)
+
+
+class HistGBSearchSpace2Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBSearchSpace2Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(HistGBSearchSpace2Benchmark, self).__init__(**kwargs)
+
+
+class HistGBSearchSpace3Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBSearchSpace3Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(HistGBSearchSpace3Benchmark, self).__init__(**kwargs)
+
+
+__all__ = [HistGBSearchSpace0Benchmark, HistGBSearchSpace1Benchmark,
+           HistGBSearchSpace2Benchmark, HistGBSearchSpace3Benchmark]
diff --git a/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
new file mode 100644
index 00000000..fd1b4015
--- /dev/null
+++ b/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the learning rate Benchmarks from hpobench/benchmarks/ml_mmfb/lr_benchmarks.py """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class LRSearchSpace0Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRSearchSpace0Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(LRSearchSpace0Benchmark, self).__init__(**kwargs)
+
+
+class LRSearchSpace1Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRSearchSpace1Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(LRSearchSpace1Benchmark, self).__init__(**kwargs)
+
+
+class LRSearchSpace2Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRSearchSpace2Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(LRSearchSpace2Benchmark, self).__init__(**kwargs)
+
+
+class LRSearchSpace3Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRSearchSpace3Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(LRSearchSpace3Benchmark, self).__init__(**kwargs)
+
+
+__all__ = [LRSearchSpace0Benchmark, LRSearchSpace1Benchmark,
+           LRSearchSpace2Benchmark, LRSearchSpace3Benchmark]
diff --git a/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
new file mode 100644
index 00000000..818fb606
--- /dev/null
+++ b/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the Neural Network Benchmarks from hpobench/benchmarks/ml_mmfb/nn_benchmark.py """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class NNSearchSpace0Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNSearchSpace0Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(NNSearchSpace0Benchmark, self).__init__(**kwargs)
+
+
+class NNSearchSpace1Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNSearchSpace1Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(NNSearchSpace1Benchmark, self).__init__(**kwargs)
+
+
+class NNSearchSpace2Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNSearchSpace2Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(NNSearchSpace2Benchmark, self).__init__(**kwargs)
+
+
+class NNSearchSpace3Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNSearchSpace3Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(NNSearchSpace3Benchmark, self).__init__(**kwargs)
+
+
+__all__ = [NNSearchSpace0Benchmark, NNSearchSpace1Benchmark,
+           NNSearchSpace2Benchmark, NNSearchSpace3Benchmark]
diff --git a/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
new file mode 100644
index 00000000..3c7ced83
--- /dev/null
+++ b/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the Random Forest Benchmarks from hpobench/benchmarks/ml_mmfb/rf_benchmark.py """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class RandomForestSearchSpace0Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestSearchSpace0Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(RandomForestSearchSpace0Benchmark, self).__init__(**kwargs)
+
+
+class RandomForestSearchSpace1Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestSearchSpace1Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(RandomForestSearchSpace1Benchmark, self).__init__(**kwargs)
+
+
+class RandomForestSearchSpace2Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestSearchSpace2Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(RandomForestSearchSpace2Benchmark, self).__init__(**kwargs)
+
+
+class RandomForestSearchSpace3Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestSearchSpace3Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(RandomForestSearchSpace3Benchmark, self).__init__(**kwargs)
+
+
+__all__ = [RandomForestSearchSpace0Benchmark, RandomForestSearchSpace1Benchmark,
+           RandomForestSearchSpace2Benchmark, RandomForestSearchSpace3Benchmark]
diff --git a/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
new file mode 100644
index 00000000..b2c46e75
--- /dev/null
+++ b/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the SVM Benchmarks from hpobench/benchmarks/ml_mmfb/svm_benchmark.py """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class SVMSearchSpace0Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMSearchSpace0Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(SVMSearchSpace0Benchmark, self).__init__(**kwargs)
+
+
+class SVMSearchSpace1Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMSearchSpace1Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(SVMSearchSpace1Benchmark, self).__init__(**kwargs)
+
+
+__all__ = [SVMSearchSpace0Benchmark, SVMSearchSpace1Benchmark]
diff --git a/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
new file mode 100644
index 00000000..f4a855d5
--- /dev/null
+++ b/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the Tabular Benchmarks from hpobench/benchmarks/ml_mmfb/tabular_benchmark.py """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class TabularBenchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'TabularBenchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_tabular_benchmarks')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(TabularBenchmark, self).__init__(**kwargs)
+
+
+class OriginalTabularBenchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'OriginalTabularBenchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_tabular_benchmarks')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(OriginalTabularBenchmark, self).__init__(**kwargs)
+
+
+__all__ = [TabularBenchmark, OriginalTabularBenchmark]
diff --git a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
new file mode 100644
index 00000000..72438d37
--- /dev/null
+++ b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the XGB Benchmarks from hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class XGBoostSearchSpace0Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace0Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(XGBoostSearchSpace0Benchmark, self).__init__(**kwargs)
+
+
+class XGBoostSearchSpace1Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace1Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(XGBoostSearchSpace1Benchmark, self).__init__(**kwargs)
+
+
+class XGBoostSearchSpace2Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace2Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(XGBoostSearchSpace2Benchmark, self).__init__(**kwargs)
+
+
+class XGBoostSearchSpace3Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace3Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(XGBoostSearchSpace3Benchmark, self).__init__(**kwargs)
+
+
+__all__ = [XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark,
+           XGBoostSearchSpace2Benchmark, XGBoostSearchSpace3Benchmark]

From 36bc391c2e06c25718ae3f5ac43c957a30054b9f Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 16 Aug 2021 21:56:06 +0200
Subject: [PATCH 57/95] Mark `task_id` as required.

---
 extra_requirements/ml.json                        |  3 ---
 extra_requirements/ml_mfbb.json                   |  4 ++++
 hpobench/benchmarks/ml_mmfb/entry_point.py        | 12 ++++++++++++
 hpobench/benchmarks/ml_mmfb/histgb_benchmark.py   |  2 +-
 hpobench/benchmarks/ml_mmfb/lr_benchmark.py       |  2 +-
 hpobench/benchmarks/ml_mmfb/nn_benchmark.py       |  2 +-
 hpobench/benchmarks/ml_mmfb/rf_benchmark.py       |  2 +-
 hpobench/benchmarks/ml_mmfb/svm_benchmark.py      |  2 +-
 hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py  |  2 +-
 hpobench/dependencies/ml/ml_benchmark_template.py |  2 +-
 10 files changed, 23 insertions(+), 10 deletions(-)
 delete mode 100644 extra_requirements/ml.json
 create mode 100644 extra_requirements/ml_mfbb.json
 create mode 100644 hpobench/benchmarks/ml_mmfb/entry_point.py

diff --git a/extra_requirements/ml.json b/extra_requirements/ml.json
deleted file mode 100644
index 8a68761f..00000000
--- a/extra_requirements/ml.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  "ml_tabular_benchmarks": ["pandas>=1.0.0"]
-}
\ No newline at end of file
diff --git a/extra_requirements/ml_mfbb.json b/extra_requirements/ml_mfbb.json
new file mode 100644
index 00000000..68b4a557
--- /dev/null
+++ b/extra_requirements/ml_mfbb.json
@@ -0,0 +1,4 @@
+{
+  "ml_tabular_benchmarks": ["pandas==1.2.4"],
+  "ml_mfbb": ["pandas==1.2.4","sklearn==0.24.2"]
+}
\ No newline at end of file
diff --git a/hpobench/benchmarks/ml_mmfb/entry_point.py b/hpobench/benchmarks/ml_mmfb/entry_point.py
new file mode 100644
index 00000000..4ec917a6
--- /dev/null
+++ b/hpobench/benchmarks/ml_mmfb/entry_point.py
@@ -0,0 +1,12 @@
+from hpobench.benchmarks.ml_mmfb.histgb_benchmark import HistGBSearchSpace0Benchmark, HistGBSearchSpace1Benchmark, \
+    HistGBSearchSpace2Benchmark, HistGBSearchSpace3Benchmark
+from hpobench.benchmarks.ml_mmfb.lr_benchmark import LRSearchSpace0Benchmark, LRSearchSpace1Benchmark, \
+    LRSearchSpace2Benchmark, LRSearchSpace3Benchmark
+from hpobench.benchmarks.ml_mmfb.nn_benchmark import NNSearchSpace0Benchmark, NNSearchSpace1Benchmark, \
+    NNSearchSpace2Benchmark, NNSearchSpace3Benchmark
+from hpobench.benchmarks.ml_mmfb.rf_benchmark import RandomForestSearchSpace0Benchmark, \
+    RandomForestSearchSpace1Benchmark, RandomForestSearchSpace2Benchmark, RandomForestSearchSpace3Benchmark
+from hpobench.benchmarks.ml_mmfb.svm_benchmark import SVMSearchSpace0Benchmark, SVMSearchSpace1Benchmark
+from hpobench.benchmarks.ml_mmfb.tabular_benchmark import TabularBenchmark, OriginalTabularBenchmark
+from hpobench.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark,\
+    XGBoostSearchSpace2Benchmark, XGBoostSearchSpace3Benchmark
diff --git a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
index 929f2bbf..442476ed 100644
--- a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -12,7 +12,7 @@
 
 class HistGBBenchmark(MLBenchmark):
     def __init__(self,
-                 task_id: Union[int, None] = None,
+                 task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
diff --git a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
index a7e1f857..ff85629d 100644
--- a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
@@ -10,7 +10,7 @@
 
 class LRBaseBenchmark(MLBenchmark):
     def __init__(self,
-                 task_id: Union[int, None] = None,
+                 task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
diff --git a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
index fd6e0a51..6c3344f9 100644
--- a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
@@ -11,7 +11,7 @@
 
 class NNBaseBenchmark(MLBenchmark):
     def __init__(self,
-                 task_id: Union[int, None] = None,
+                 task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
diff --git a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
index 0ae819a6..fca50eb7 100644
--- a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
@@ -11,7 +11,7 @@
 
 class RandomForestBaseBenchmark(MLBenchmark):
     def __init__(self,
-                 task_id: Union[int, None] = None,
+                 task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
diff --git a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
index a61515f5..fa8e324d 100644
--- a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
@@ -10,7 +10,7 @@
 
 class SVMBaseBenchmark(MLBenchmark):
     def __init__(self,
-                 task_id: Union[int, None] = None,
+                 task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
diff --git a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
index ca395f92..d77b0938 100644
--- a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -10,7 +10,7 @@
 
 class XGBoostBaseBenchmark(MLBenchmark):
     def __init__(self,
-                 task_id: Union[int, None] = None,
+                 task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
index 41be449a..59e348d6 100644
--- a/hpobench/dependencies/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -31,7 +31,7 @@ class MLBenchmark(AbstractBenchmark):
 
     def __init__(
             self,
-            task_id: Union[int, None] = None,
+            task_id: int,
             rng: Union[np.random.RandomState, int, None] = None,
             valid_size: float = 0.33,
             data_path: Union[str, Path, None] = None,

From a5c7d6200a517a1bf1f11325b3300df082cecaaa Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 16 Aug 2021 22:33:13 +0200
Subject: [PATCH 58/95] Adapt Interfaces

---
 .../benchmarks/ml_mmfb/histgb_benchmark.py    |  8 ++---
 hpobench/benchmarks/ml_mmfb/lr_benchmark.py   |  8 ++---
 hpobench/benchmarks/ml_mmfb/nn_benchmark.py   |  8 ++---
 hpobench/benchmarks/ml_mmfb/rf_benchmark.py   |  8 ++---
 hpobench/benchmarks/ml_mmfb/svm_benchmark.py  |  8 ++---
 .../benchmarks/ml_mmfb/tabular_benchmark.py   | 12 +++----
 .../benchmarks/ml_mmfb/xgboost_benchmark.py   | 17 +++++++---
 .../dependencies/ml/ml_benchmark_template.py  | 31 ++++++++++++-------
 8 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
index 442476ed..50b4fe5b 100644
--- a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -1,11 +1,11 @@
-from typing import Union, Tuple
+from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
 import numpy as np
 from ConfigSpace.hyperparameters import Hyperparameter
-from sklearn.ensemble import HistGradientBoostingClassifier
 # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingClassifier
 
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
@@ -77,12 +77,12 @@ def _get_fidelity_choices(ntrees_choice: str, subsample_choice: str) -> Tuple[Hy
         subsample = fidelity2[subsample_choice]
         return ntrees, subsample
 
-    def init_model(self, config, fidelity=None, rng=None):
+    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
         model = HistGradientBoostingClassifier(
-            **config.get_dictionary(),
+            **config,
             max_iter=fidelity['n_estimators'],  # a fidelity being used during initialization
             early_stopping=False,
             random_state=rng
diff --git a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
index ff85629d..944e77c4 100644
--- a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
@@ -1,4 +1,4 @@
-from typing import Union, Tuple
+from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
 import numpy as np
@@ -19,7 +19,7 @@ def __init__(self,
         self.cache_size = 500  # TODO: Do we need this?
 
     @staticmethod
-    def get_configuration_space(seed=None):
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
@@ -68,12 +68,12 @@ def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hype
         subsample = fidelity2[subsample_choice]
         return iter, subsample
 
-    def init_model(self, config, fidelity=None, rng=None):
+    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
         # initializing model
         rng = self.rng if rng is None else rng
         # https://scikit-learn.org/stable/modules/sgd.html
         model = SGDClassifier(
-            **config.get_dictionary(),
+            **config,
             loss="log",  # performs Logistic Regression
             max_iter=fidelity["iter"],
             learning_rate="adaptive",
diff --git a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
index 6c3344f9..4826f7fe 100644
--- a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
@@ -1,5 +1,5 @@
 from copy import deepcopy
-from typing import Union, Tuple
+from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
 import numpy as np
@@ -18,7 +18,7 @@ def __init__(self,
         super(NNBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
-    def get_configuration_space(seed=None):
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
@@ -76,11 +76,11 @@ def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hype
         subsample = fidelity2[subsample_choice]
         return iter, subsample
 
-    def init_model(self, config, fidelity=None, rng=None):
+    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
-        config = deepcopy(config.get_dictionary())
+        config = deepcopy(config)
         depth = config["depth"]
         width = config["width"]
         config.pop("depth")
diff --git a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
index fca50eb7..b17f74d1 100644
--- a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
@@ -1,5 +1,5 @@
 from copy import deepcopy
-from typing import Union, Tuple
+from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
 import numpy as np
@@ -18,7 +18,7 @@ def __init__(self,
         super(RandomForestBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
-    def get_configuration_space(seed=None):
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
@@ -77,11 +77,11 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
         subsample = fidelity2[subsample_choice]
         return n_estimators, subsample
 
-    def init_model(self, config, fidelity=None, rng=None):
+    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
-        config = deepcopy(config.get_dictionary())
+        config = deepcopy(config)
         n_features = self.train_X.shape[1]
         config["max_features"] = int(np.rint(np.power(n_features, config["max_features"])))
         model = RandomForestClassifier(
diff --git a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
index fa8e324d..bc439fed 100644
--- a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Dict
 
 import ConfigSpace as CS
 import numpy as np
@@ -19,7 +19,7 @@ def __init__(self,
         self.cache_size = 200
 
     @staticmethod
-    def get_configuration_space(seed=None):
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
@@ -62,10 +62,10 @@ def _get_fidelity_choices(subsample_choice: str) -> Hyperparameter:
 
         return subsample
 
-    def init_model(self, config, fidelity=None, rng=None):
+    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
         # initializing model
         rng = self.rng if rng is None else rng
-        config = config.get_dictionary()
+        config = config
         model = SVC(
             **config,
             random_state=rng,
diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index dd07ec02..9c8e0739 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -67,7 +67,7 @@ def get_meta_information(self) -> Dict:
                 'model': self.model
                 }
 
-    def _preprocess_configspace(self, config_space):
+    def _preprocess_configspace(self, config_space: CS.ConfigurationSpace) -> CS.ConfigurationSpace:
         """ Converts floats to np.float32 """
         for hp in config_space.get_hyperparameters():
             hp.sequence = tuple(np.array(hp.sequence).astype(np.float32))
@@ -83,7 +83,7 @@ def _total_number_of_configurations(self, space: str = "hyperparameters") -> int
             count *= len(hp.sequence)
         return count
 
-    def _seeds_used(self):
+    def _seeds_used(self) -> List:
         return self.table.seed.unique().tolist()
 
     def sample_hyperparamer(self, n: int = 1) -> Union[CS.Configuration, List]:
@@ -105,7 +105,7 @@ def get_max_fidelity(self) -> Dict:
             max_fidelity[hp.name] = np.sort(hp.sequence)[-1]
         return max_fidelity
 
-    def get_fidelity_range(self):
+    def get_fidelity_range(self) -> List:
         fidelities = []
         for hp in self.fidelity_space.get_hyperparameters():
             if not isinstance(hp, CS.Constant) and len(hp.sequence) > 1:
@@ -126,11 +126,11 @@ def _search_dataframe(self, row_dict, df):
 
     def _objective(
             self,
-            config: CS.Configuration,
-            fidelity: CS.Configuration,
+            config: Dict,
+            fidelity: Dict,
             seed: Union[int, None] = None,
             metric: Union[str, None] = "acc",
-            evaluation: Union[str] = ""
+            evaluation: Union[str, None] = ""
     ) -> Dict:
 
         metric_str = ', '.join(list(metrics.keys))
diff --git a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
index d77b0938..a5735b2c 100644
--- a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -1,4 +1,4 @@
-from typing import Union, Tuple
+from typing import Union, Tuple, Dict
 
 import ConfigSpace as CS
 import numpy as np
@@ -17,7 +17,7 @@ def __init__(self,
         super(XGBoostBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
-    def get_configuration_space(seed=None):
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Parameter space to be optimized --- contains the hyperparameters
         """
         cs = CS.ConfigurationSpace(seed=seed)
@@ -76,9 +76,18 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
         subsample = fidelity2[subsample_choice]
         return n_estimators, subsample
 
-    def init_model(self, config, fidelity=None, rng=None):
+    def init_model(self,
+                   config: Union[CS.Configuration, Dict],
+                   fidelity: Union[CS.Configuration, Dict, None] = None,
+                   rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
+        if isinstance(config, CS.Configuration):
+            config = config.get_dictionary()
+        if isinstance(fidelity, CS.Configuration):
+            fidelity = fidelity.get_dictionary()
+
+        # TODO: This seems to be wrong. (AND-condition)
         rng = rng if (rng is None and isinstance(rng, int)) else self.seed
         extra_args = dict(
             booster="gbtree",
@@ -92,7 +101,7 @@ def init_model(self, config, fidelity=None, rng=None):
             extra_args.update({"num_class": self.n_classes})
 
         model = xgb.XGBClassifier(
-            **config.get_dictionary(),
+            **config,
             **extra_args
         )
         return model
diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
index 59e348d6..ff2ba55e 100644
--- a/hpobench/dependencies/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -1,6 +1,6 @@
 import time
 from pathlib import Path
-from typing import Union, Dict
+from typing import Union, Dict, Iterable
 
 import ConfigSpace as CS
 import numpy as np
@@ -10,6 +10,7 @@
 
 from hpobench.abstract_benchmark import AbstractBenchmark
 from hpobench.dependencies.ml.data_manager import OpenMLDataManager
+from hpobench.util.rng_helper import get_rng
 
 metrics = dict(
     acc=accuracy_score,
@@ -81,7 +82,7 @@ def __init__(
         self.configuration_space = self.get_configuration_space(self.seed)
 
     @staticmethod
-    def get_configuration_space(seed=None):
+    def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         """Parameter space to be optimized --- contains the hyperparameters
         """
         raise NotImplementedError()
@@ -112,32 +113,40 @@ def get_meta_information(self):
             'task_id': self.task_id
         }
 
-    def init_model(self, config, fidelity=None, rng=None):
+    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         raise NotImplementedError()
 
-    def get_config(self, size=None):
+    def get_config(self, size: Union[int, None] = None):
         """Samples configuration(s) from the (hyper) parameter space
         """
         if size is None:  # return only one config
             return self.configuration_space.sample_configuration()
         return [self.configuration_space.sample_configuration() for i in range(size)]
 
-    def get_fidelity(self, size=None):
+    def get_fidelity(self, size: Union[int, None] = None):
         """Samples candidate fidelities from the fidelity space
         """
         if size is None:  # return only one config
             return self.fidelity_space.sample_configuration()
         return [self.fidelity_space.sample_configuration() for i in range(size)]
 
-    def shuffle_data_idx(self, train_idx=None, rng=None):
+    def shuffle_data_idx(self, train_idx: Iterable = None, rng: Union[np.random.RandomState, None] = None) -> Iterable:
         rng = self.rng if rng is None else rng
         train_idx = self.train_idx if train_idx is None else train_idx
         rng.shuffle(train_idx)
         return train_idx
 
-    def _train_objective(self, config, fidelity, shuffle, rng, evaluation="valid"):
+    def _train_objective(self, config: Dict,
+                         fidelity: Dict,
+                         shuffle: bool,
+                         rng: Union[np.random.RandomState, int, None] = None,
+                         evaluation: Union[str, None] = "valid"):
+
+        if rng is not None:
+            rng = get_rng(rng, self.rng)
+
         # initializing model
         model = self.init_model(config, fidelity, rng)
 
@@ -226,8 +235,8 @@ def objective_function(self,
             'test_scores': test_scores,
             'test_costs': test_score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
+            'fidelity': fidelity,
+            'config': configuration,
         }
 
         return {
@@ -269,8 +278,8 @@ def objective_function_test(self,
             'test_scores': test_scores,
             'test_costs': test_score_cost,
             # storing as dictionary and not ConfigSpace saves tremendous memory
-            'fidelity': fidelity.get_dictionary(),
-            'config': configuration.get_dictionary()
+            'fidelity': fidelity,
+            'config': configuration,
         }
 
         return {

From c5f6979926cef5dc06998f36a42807708156c07b Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 16 Aug 2021 23:42:36 +0200
Subject: [PATCH 59/95] Fix minor errors.

---
 .../benchmarks/ml_mmfb/histgb_benchmark.py    |  6 ++-
 .../benchmarks/ml_mmfb/tabular_benchmark.py   | 49 ++++++++++++++-----
 .../dependencies/ml/ml_benchmark_template.py  |  3 +-
 hpobench/util/data_manager.py                 | 15 +++---
 4 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
index 50b4fe5b..9507d81f 100644
--- a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -27,11 +27,13 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
             CS.UniformIntegerHyperparameter(
                 'max_depth', lower=6, upper=30, default_value=6, log=True
             ),
+            # TODO: The parameter max_leaf_node is not accepted. Changed it from max_leaf_node to max_leaf_nodes
             CS.UniformIntegerHyperparameter(
-                'max_leaf_node', lower=2, upper=64, default_value=32, log=True
+                'max_leaf_nodes', lower=2, upper=64, default_value=32, log=True
             ),
+            # TODO: The parameter eta is not accepted. Do you mean learning_rate? Changed it from eta to learning_rate
             CS.UniformFloatHyperparameter(
-                'eta', lower=2**-10, upper=1, default_value=0.1, log=True
+                'learning_rate', lower=2**-10, upper=1, default_value=0.1, log=True
             ),
             CS.UniformFloatHyperparameter(
                 'l2_regularization', lower=2**-10, upper=2**10, default_value=0.1, log=True
diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index 9c8e0739..0225b361 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -12,20 +12,25 @@
 
 class BaseTabularBenchmark(AbstractBenchmark):
 
-    def __init__(self, model: str, task_id: int, data_dir: Union[Path, str, None] = None,
+    def __init__(self,
+                 model: str, task_id: int,
+                 data_dir: Union[Path, str, None] = None,
                  rng: Union[int, np.random.RandomState, None] = None, **kwargs):
 
-        super(BaseTabularBenchmark, self).__init__(rng=rng, **kwargs)
+        assert model in ['lr', 'svm', 'xgb'], f'Parameter `model` has to be one of [lr, svm, xgb] but was {model}'
 
         self.task_id = task_id
         self.model = model
 
-        self.table, self.metadata = TabularDataManager(model, task_id, data_dir)
+        self.dm = TabularDataManager(model, task_id, data_dir)
+        self.table, self.metadata = self.dm.load()
 
         self.exp_args = self.metadata["exp_args"]
         self.config_spaces = self.metadata["config_spaces"]
         self.global_minimums = self.metadata["global_min"]
 
+        super(BaseTabularBenchmark, self).__init__(rng=rng, **kwargs)
+
     @AbstractBenchmark.check_parameters
     def objective_function(self,
                            configuration: Union[CS.Configuration, Dict],
@@ -113,16 +118,33 @@ def get_fidelity_range(self) -> List:
         return fidelities
 
     def _search_dataframe(self, row_dict, df):
-        # https://stackoverflow.com/a/46165056/8363967
-        mask = np.array([True] * df.shape[0])
-        for i, param in enumerate(df.drop("result", axis=1).columns):
-            mask *= df[param].values == row_dict[param]
-        idx = np.where(mask)
-        if len(idx) != 1:
+        query_stmt = self._build_query(row_dict)
+        result = df.query(query_stmt)
+        # TODO: What happens in this case? The objective function raises a TypeError.
+        if len(result) == 0:
             return None
-        idx = idx[0][0]
-        result = df.iloc[idx]["result"]
-        return result
+        return result.iloc[0].loc['result']
+
+        # TODO: This created an out-of-bounds error. The idx mask should have been 2d, but was 1d.
+        # # https://stackoverflow.com/a/46165056/8363967
+        # mask = np.array([True] * df.shape[0])
+        # for i, param in enumerate(df.drop("result", axis=1).columns):
+        #     mask *= df[param].values == row_dict[param]
+        # idx = np.where(mask)
+        # if len(idx) != 1:
+        #     return None
+        # idx = idx[0][0]
+        # result = df.iloc[idx]["result"]
+        # return result
+
+    @staticmethod
+    def _build_query(row_dict: Dict) -> str:
+        query = ''
+        for i, (param_name, param_value) in enumerate(row_dict.items()):
+            if i != 0:
+                query += ' & '
+            query += f'{param_name} == {param_value}'
+        return query
 
     def _objective(
             self,
@@ -133,12 +155,13 @@ def _objective(
             evaluation: Union[str, None] = ""
     ) -> Dict:
 
-        metric_str = ', '.join(list(metrics.keys))
+        metric_str = ', '.join(list(metrics.keys()))
         assert metric in list(metrics.keys()), f"metric not found among: {metric_str}"
         score_key = f"{evaluation}_scores"
         cost_key = f"{evaluation}_scores"
 
         key_path = dict()
+        # TODO: Dicts are unordered. This does not have to have an effect.
         for name in np.sort(self.configuration_space.get_hyperparameter_names()):
             key_path[str(name)] = config[str(name)]
         for name in np.sort(self.fidelity_space.get_hyperparameter_names()):
diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
index ff2ba55e..d256c081 100644
--- a/hpobench/dependencies/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -138,7 +138,8 @@ def shuffle_data_idx(self, train_idx: Iterable = None, rng: Union[np.random.Rand
         rng.shuffle(train_idx)
         return train_idx
 
-    def _train_objective(self, config: Dict,
+    def _train_objective(self,
+                         config: Dict,
                          fidelity: Dict,
                          shuffle: bool,
                          rng: Union[np.random.RandomState, int, None] = None,
diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 9e6f8fb9..258f7d56 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -93,7 +93,7 @@ def _download_file_with_progressbar(self, data_url: str, data_file: Path):
                 if chunk:
                     _ = f.write(chunk)
                     f.flush()
-        self.logger.info("Finished downloading")
+        self.logger.info(f"Finished downloading to {data_file}")
 
     @lockutils.synchronized('not_thread_process_safe', external=True,
                             lock_path=f'{hpobench.config_file.cache_dir}/lock_unzip_file', delay=0.5)
@@ -929,20 +929,21 @@ class TabularDataManager(DataManager):
     def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None] = None):
         super(TabularDataManager, self).__init__()
 
-        assert model in ['lr', 'svm']
+        assert model in ['lr', 'svm', 'xgb'], f'Model has to be one of [lr, svm, xgb] but was {model}'
 
         self.model = model
         self.task_id = str(task_id)
 
-        url_svm = 'https://figshare.com/s/5a0929ad9b2ccd8dda58'
-        url_lr = 'https://ndownloader.figshare.com/files/29027112?private_link=d644493a93dbab4b4ee1'
+        url_dict = dict(xgb='https://ndownloader.figshare.com/files/29113257?private_link=c817bed4e7efc6daee91',
+                        svm='https://ndownloader.figshare.com/files/29102307?private_link=5a0929ad9b2ccd8dda58',
+                        lr='https://ndownloader.figshare.com/files/29027112?private_link=d644493a93dbab4b4ee1')
 
-        self.url_to_use = url_svm if model == 'svm' else url_lr
+        self.url_to_use = url_dict.get(model)
 
         if data_dir is None:
-            data_dir = hpobench.config_file.data_dir / "TabularData"
+            data_dir = hpobench.config_file.data_dir
 
-        self._save_dir = Path(data_dir)
+        self._save_dir = Path(data_dir) / "TabularData" / self.model
         self.create_save_directory(self._save_dir)
 
         self.parquet_file = self._save_dir / self.task_id / f'{self.model}_{self.task_id}_data.parquet.gzip'

From 48af58decdc329924ff6425f7193551cf3fa0d10 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 16 Aug 2021 23:45:17 +0200
Subject: [PATCH 60/95] Fix minor errors.

---
 hpobench/benchmarks/ml_mmfb/entry_point.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/hpobench/benchmarks/ml_mmfb/entry_point.py b/hpobench/benchmarks/ml_mmfb/entry_point.py
index 4ec917a6..1b380a51 100644
--- a/hpobench/benchmarks/ml_mmfb/entry_point.py
+++ b/hpobench/benchmarks/ml_mmfb/entry_point.py
@@ -10,3 +10,15 @@
 from hpobench.benchmarks.ml_mmfb.tabular_benchmark import TabularBenchmark, OriginalTabularBenchmark
 from hpobench.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark,\
     XGBoostSearchSpace2Benchmark, XGBoostSearchSpace3Benchmark
+
+
+__all__ = [HistGBSearchSpace0Benchmark, HistGBSearchSpace1Benchmark, HistGBSearchSpace2Benchmark,
+           HistGBSearchSpace3Benchmark,
+           LRSearchSpace0Benchmark, LRSearchSpace1Benchmark, LRSearchSpace2Benchmark, LRSearchSpace3Benchmark,
+           NNSearchSpace0Benchmark, NNSearchSpace1Benchmark, NNSearchSpace2Benchmark, NNSearchSpace3Benchmark,
+           RandomForestSearchSpace0Benchmark, RandomForestSearchSpace1Benchmark, RandomForestSearchSpace2Benchmark,
+           RandomForestSearchSpace3Benchmark,
+           SVMSearchSpace0Benchmark, SVMSearchSpace1Benchmark,
+           TabularBenchmark, OriginalTabularBenchmark,
+           XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark, XGBoostSearchSpace2Benchmark,
+           XGBoostSearchSpace3Benchmark]

From cf24488d0aabe0bac71eecd71b010b65f3b76491 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 00:05:18 +0200
Subject: [PATCH 61/95] Pylint

---
 hpobench/benchmarks/ml_mmfb/tabular_benchmark.py | 2 ++
 hpobench/dependencies/ml/data_manager.py         | 1 +
 hpobench/util/data_manager.py                    | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index 0225b361..907b5e51 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -31,6 +31,7 @@ def __init__(self,
 
         super(BaseTabularBenchmark, self).__init__(rng=rng, **kwargs)
 
+    # pylint: disable=arguments-differ
     @AbstractBenchmark.check_parameters
     def objective_function(self,
                            configuration: Union[CS.Configuration, Dict],
@@ -43,6 +44,7 @@ def objective_function(self,
         result = self._objective(configuration, fidelity, seed, metric, evaluation="val")
         return result
 
+    # pylint: disable=arguments-differ
     @AbstractBenchmark.check_parameters
     def objective_function_test(self,
                                 configuration: Union[CS.Configuration, Dict],
diff --git a/hpobench/dependencies/ml/data_manager.py b/hpobench/dependencies/ml/data_manager.py
index 55210933..84d8b587 100644
--- a/hpobench/dependencies/ml/data_manager.py
+++ b/hpobench/dependencies/ml/data_manager.py
@@ -53,6 +53,7 @@ def __init__(self, task_id: int,
 
         super(OpenMLDataManager, self).__init__()
 
+    # pylint: disable=arguments-differ
     @lockutils.synchronized('not_thread_process_safe', external=True,
                             lock_path=f'{config_file.cache_dir}/openml_dm_lock', delay=0.2)
     def load(self, valid_size=None, verbose=False):
diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 258f7d56..d390218e 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -646,6 +646,7 @@ def _check_availability_and_download(self):
                     f.flush()
         self.logger.info("Finished downloading")
 
+    # pylint: disable=arguments-differ
     @lockutils.synchronized('not_thread_process_safe', external=True,
                             lock_path=f'{hpobench.config_file.cache_dir}/lock_surrogates_unzip_data', delay=0.5)
     def _unzip_data(self):
@@ -949,6 +950,7 @@ def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None]
         self.parquet_file = self._save_dir / self.task_id / f'{self.model}_{self.task_id}_data.parquet.gzip'
         self.metadata_file = self._save_dir / self.task_id / f'{self.model}_{self.task_id}_metadata.json'
 
+    # pylint: disable=arguments-differ
     def load(self):
         # Can we directly load the files?
         if self.parquet_file.exists() and self.metadata_file.exists():

From 528dde18d7ff8b211c8ede1b4f2e2f0e67212713 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 16:34:05 +0200
Subject: [PATCH 62/95] Init Model can handle now Configurations

---
 hpobench/benchmarks/ml_mmfb/histgb_benchmark.py   | 10 +++++++++-
 hpobench/benchmarks/ml_mmfb/lr_benchmark.py       | 10 +++++++++-
 hpobench/benchmarks/ml_mmfb/nn_benchmark.py       | 10 +++++++++-
 hpobench/benchmarks/ml_mmfb/rf_benchmark.py       |  9 ++++++++-
 hpobench/benchmarks/ml_mmfb/svm_benchmark.py      |  7 +++++--
 hpobench/dependencies/ml/ml_benchmark_template.py |  4 +++-
 6 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
index 9507d81f..dd36694c 100644
--- a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -79,10 +79,18 @@ def _get_fidelity_choices(ntrees_choice: str, subsample_choice: str) -> Tuple[Hy
         subsample = fidelity2[subsample_choice]
         return ntrees, subsample
 
-    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
+    def init_model(self, config: Union[CS.Configuration, Dict],
+                   fidelity: Union[CS.Configuration, Dict, None] = None,
+                   rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
+
+        if isinstance(config, CS.Configuration):
+            config = config.get_dictionary()
+        if isinstance(fidelity, CS.Configuration):
+            fidelity = fidelity.get_dictionary()
+
         model = HistGradientBoostingClassifier(
             **config,
             max_iter=fidelity['n_estimators'],  # a fidelity being used during initialization
diff --git a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
index 944e77c4..5b9d054b 100644
--- a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
@@ -68,9 +68,17 @@ def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hype
         subsample = fidelity2[subsample_choice]
         return iter, subsample
 
-    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
+    def init_model(self, config: Union[CS.Configuration, Dict],
+                   fidelity: Union[CS.Configuration, Dict, None] = None,
+                   rng: Union[int, np.random.RandomState, None] = None):
         # initializing model
         rng = self.rng if rng is None else rng
+
+        if isinstance(config, CS.Configuration):
+            config = config.get_dictionary()
+        if isinstance(fidelity, CS.Configuration):
+            fidelity = fidelity.get_dictionary()
+
         # https://scikit-learn.org/stable/modules/sgd.html
         model = SGDClassifier(
             **config,
diff --git a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
index 4826f7fe..601efe23 100644
--- a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
@@ -76,10 +76,18 @@ def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hype
         subsample = fidelity2[subsample_choice]
         return iter, subsample
 
-    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
+    def init_model(self, config: Union[CS.Configuration, Dict],
+                   fidelity: Union[CS.Configuration, Dict, None] = None,
+                   rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
+
+        if isinstance(config, CS.Configuration):
+            config = config.get_dictionary()
+        if isinstance(fidelity, CS.Configuration):
+            fidelity = fidelity.get_dictionary()
+
         config = deepcopy(config)
         depth = config["depth"]
         width = config["width"]
diff --git a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
index b17f74d1..838d956d 100644
--- a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
@@ -77,10 +77,17 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
         subsample = fidelity2[subsample_choice]
         return n_estimators, subsample
 
-    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
+    def init_model(self, config: Union[CS.Configuration, Dict],
+                   fidelity: Union[CS.Configuration, Dict, None] = None,
+                   rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         rng = self.rng if rng is None else rng
+        if isinstance(config, CS.Configuration):
+            config = config.get_dictionary()
+        if isinstance(fidelity, CS.Configuration):
+            fidelity = fidelity.get_dictionary()
+
         config = deepcopy(config)
         n_features = self.train_X.shape[1]
         config["max_features"] = int(np.rint(np.power(n_features, config["max_features"])))
diff --git a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
index bc439fed..0ae25e18 100644
--- a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
@@ -62,10 +62,13 @@ def _get_fidelity_choices(subsample_choice: str) -> Hyperparameter:
 
         return subsample
 
-    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
+    def init_model(self, config: Union[CS.Configuration, Dict],
+                   fidelity: Union[CS.Configuration, Dict, None] = None,
+                   rng: Union[int, np.random.RandomState, None] = None):
         # initializing model
         rng = self.rng if rng is None else rng
-        config = config
+        if isinstance(config, CS.Configuration):
+            config = config.get_dictionary()
         model = SVC(
             **config,
             random_state=rng,
diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
index d256c081..8460b113 100644
--- a/hpobench/dependencies/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -113,7 +113,9 @@ def get_meta_information(self):
             'task_id': self.task_id
         }
 
-    def init_model(self, config: Dict, fidelity: Dict = None, rng: Union[int, np.random.RandomState, None] = None):
+    def init_model(self, config: Union[CS.Configuration, Dict],
+                   fidelity: Union[CS.Configuration, Dict, None] = None,
+                   rng: Union[int, np.random.RandomState, None] = None):
         """ Function that returns the model initialized based on the configuration and fidelity
         """
         raise NotImplementedError()

From 6bdf5c019c6904e8960416b87e6e95e9dbaa9ab1 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 17:22:32 +0200
Subject: [PATCH 63/95] PR Requests: Rename Classes

---
 hpobench/benchmarks/ml_mmfb/entry_point.py    | 33 +++++-------
 .../benchmarks/ml_mmfb/histgb_benchmark.py    | 47 ++++------------
 hpobench/benchmarks/ml_mmfb/lr_benchmark.py   | 45 ++++++----------
 hpobench/benchmarks/ml_mmfb/nn_benchmark.py   | 54 +++++--------------
 hpobench/benchmarks/ml_mmfb/rf_benchmark.py   | 53 +++++-------------
 hpobench/benchmarks/ml_mmfb/svm_benchmark.py  | 33 ++++--------
 .../benchmarks/ml_mmfb/xgboost_benchmark.py   | 53 +++++-------------
 .../benchmarks/ml_mmfb/histgb_benchmark.py    | 29 ++++------
 .../benchmarks/ml_mmfb/lr_benchmark.py        | 29 ++++------
 .../benchmarks/ml_mmfb/nn_benchmark.py        | 29 ++++------
 .../benchmarks/ml_mmfb/rf_benchmark.py        | 29 ++++------
 .../benchmarks/ml_mmfb/svm_benchmark.py       | 22 +++++---
 .../benchmarks/ml_mmfb/xgboost_benchmark.py   | 21 ++++----
 13 files changed, 153 insertions(+), 324 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/entry_point.py b/hpobench/benchmarks/ml_mmfb/entry_point.py
index 1b380a51..0114acaa 100644
--- a/hpobench/benchmarks/ml_mmfb/entry_point.py
+++ b/hpobench/benchmarks/ml_mmfb/entry_point.py
@@ -1,24 +1,17 @@
-from hpobench.benchmarks.ml_mmfb.histgb_benchmark import HistGBSearchSpace0Benchmark, HistGBSearchSpace1Benchmark, \
-    HistGBSearchSpace2Benchmark, HistGBSearchSpace3Benchmark
-from hpobench.benchmarks.ml_mmfb.lr_benchmark import LRSearchSpace0Benchmark, LRSearchSpace1Benchmark, \
-    LRSearchSpace2Benchmark, LRSearchSpace3Benchmark
-from hpobench.benchmarks.ml_mmfb.nn_benchmark import NNSearchSpace0Benchmark, NNSearchSpace1Benchmark, \
-    NNSearchSpace2Benchmark, NNSearchSpace3Benchmark
-from hpobench.benchmarks.ml_mmfb.rf_benchmark import RandomForestSearchSpace0Benchmark, \
-    RandomForestSearchSpace1Benchmark, RandomForestSearchSpace2Benchmark, RandomForestSearchSpace3Benchmark
-from hpobench.benchmarks.ml_mmfb.svm_benchmark import SVMSearchSpace0Benchmark, SVMSearchSpace1Benchmark
+from hpobench.benchmarks.ml_mmfb.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
+    RandomForestBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
 from hpobench.benchmarks.ml_mmfb.tabular_benchmark import TabularBenchmark, OriginalTabularBenchmark
-from hpobench.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark,\
-    XGBoostSearchSpace2Benchmark, XGBoostSearchSpace3Benchmark
+from hpobench.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
 
 
-__all__ = [HistGBSearchSpace0Benchmark, HistGBSearchSpace1Benchmark, HistGBSearchSpace2Benchmark,
-           HistGBSearchSpace3Benchmark,
-           LRSearchSpace0Benchmark, LRSearchSpace1Benchmark, LRSearchSpace2Benchmark, LRSearchSpace3Benchmark,
-           NNSearchSpace0Benchmark, NNSearchSpace1Benchmark, NNSearchSpace2Benchmark, NNSearchSpace3Benchmark,
-           RandomForestSearchSpace0Benchmark, RandomForestSearchSpace1Benchmark, RandomForestSearchSpace2Benchmark,
-           RandomForestSearchSpace3Benchmark,
-           SVMSearchSpace0Benchmark, SVMSearchSpace1Benchmark,
+__all__ = [HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF,
+           LRBenchmark, LRBenchmarkBB, LRBenchmarkMF,
+           NNBenchmark, NNBenchmarkBB, NNBenchmarkMF,
+           RandomForestBenchmark, RandomForestBenchmarkBB, RandomForestBenchmarkMF,
+           SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF,
            TabularBenchmark, OriginalTabularBenchmark,
-           XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark, XGBoostSearchSpace2Benchmark,
-           XGBoostSearchSpace3Benchmark]
+           XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF]
diff --git a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
index dd36694c..5d164503 100644
--- a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -27,11 +27,9 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
             CS.UniformIntegerHyperparameter(
                 'max_depth', lower=6, upper=30, default_value=6, log=True
             ),
-            # TODO: The parameter max_leaf_node is not accepted. Changed it from max_leaf_node to max_leaf_nodes
             CS.UniformIntegerHyperparameter(
                 'max_leaf_nodes', lower=2, upper=64, default_value=32, log=True
             ),
-            # TODO: The parameter eta is not accepted. Do you mean learning_rate? Changed it from eta to learning_rate
             CS.UniformFloatHyperparameter(
                 'learning_rate', lower=2**-10, upper=1, default_value=0.1, log=True
             ),
@@ -43,18 +41,12 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
 
     @staticmethod
     def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """Fidelity space available --- specifies the fidelity dimensions
-
-        If fidelity_choice is 0
-            Fidelity space is the maximal fidelity, akin to a black-box function
-        If fidelity_choice is 1
-            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
-        If fidelity_choice is 2
-            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
-        If fidelity_choice is >2
-            Fidelity space is multi-multi fidelity, all possible fidelities
-        """
-        raise NotImplementedError()
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            HistGBBenchmark._get_fidelity_choices(ntrees_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
 
     @staticmethod
     def _get_fidelity_choices(ntrees_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
@@ -100,7 +92,7 @@ def init_model(self, config: Union[CS.Configuration, Dict],
         return model
 
 
-class HistGBSearchSpace0Benchmark(HistGBBenchmark):
+class HistGBBenchmarkBB(HistGBBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
@@ -110,7 +102,7 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
         return fidelity_space
 
 
-class HistGBSearchSpace1Benchmark(HistGBBenchmark):
+class HistGBBenchmarkMF(HistGBBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
@@ -120,25 +112,4 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
         return fidelity_space
 
 
-class HistGBSearchSpace2Benchmark(HistGBBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-fidelity) - subsample
-            HistGBBenchmark._get_fidelity_choices(ntrees_choice='fixed', subsample_choice='variable')
-        )
-        return fidelity_space
-
-
-class HistGBSearchSpace3Benchmark(HistGBBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
-            HistGBBenchmark._get_fidelity_choices(ntrees_choice='variable', subsample_choice='variable')
-        )
-        return fidelity_space
-
-
-__all__ = [HistGBSearchSpace0Benchmark, HistGBSearchSpace1Benchmark,
-           HistGBSearchSpace2Benchmark, HistGBSearchSpace3Benchmark]
+__all__ = [HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF]
diff --git a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
index 5b9d054b..0154e623 100644
--- a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
@@ -8,15 +8,15 @@
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class LRBaseBenchmark(MLBenchmark):
+class LRBenchmark(MLBenchmark):
     def __init__(self,
                  task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
 
-        super(LRBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
-        self.cache_size = 500  # TODO: Do we need this?
+        super(LRBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+        self.cache_size = 500
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -34,7 +34,12 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
         return cs
 
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        raise NotImplementedError()
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - iterations + data subsample
+            LRBenchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
 
     @staticmethod
     def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
@@ -87,49 +92,29 @@ def init_model(self, config: Union[CS.Configuration, Dict],
             learning_rate="adaptive",
             tol=None,
             random_state=rng,
+
         )
         return model
 
 
-class LRSearchSpace0Benchmark(LRBaseBenchmark):
+class LRBenchmarkBB(LRBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # black-box setting (full fidelity)
-            LRBaseBenchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='fixed')
+            LRBenchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='fixed')
         )
         return fidelity_space
 
 
-class LRSearchSpace1Benchmark(LRBaseBenchmark):
+class LRBenchmarkMF(LRBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-fidelity) - iterations
-            LRBaseBenchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='fixed')
-        )
-        return fidelity_space
-
-
-class LRSearchSpace2Benchmark(LRBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-fidelity) - data subsample
-            LRBaseBenchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='variable')
-        )
-        return fidelity_space
-
-
-class LRSearchSpace3Benchmark(LRBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-multi-fidelity) - iterations + data subsample
-            LRBaseBenchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='variable')
+            LRBenchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='fixed')
         )
         return fidelity_space
 
 
-__all__ = [LRSearchSpace0Benchmark, LRSearchSpace1Benchmark,
-           LRSearchSpace2Benchmark, LRSearchSpace3Benchmark]
+__all__ = [LRBenchmark, LRBenchmarkBB, LRBenchmarkMF]
diff --git a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
index 601efe23..ca3afa7c 100644
--- a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
@@ -9,13 +9,13 @@
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class NNBaseBenchmark(MLBenchmark):
+class NNBenchmark(MLBenchmark):
     def __init__(self,
                  task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
-        super(NNBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+        super(NNBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -44,18 +44,13 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
 
     @staticmethod
     def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """Fidelity space available --- specifies the fidelity dimensions
-
-        If fidelity_choice is 0
-            Fidelity space is the maximal fidelity, akin to a black-box function
-        If fidelity_choice is 1
-            Fidelity space is a single fidelity, in this case the number of epochs (max_iter)
-        If fidelity_choice is 2
-            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
-        If fidelity_choice is >2
-            Fidelity space is multi-multi fidelity, all possible fidelities
-        """
-        raise NotImplementedError()
+
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - iterations + data subsample
+            NNBenchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
 
     @staticmethod
     def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
@@ -105,45 +100,24 @@ def init_model(self, config: Union[CS.Configuration, Dict],
         return model
 
 
-class NNSearchSpace0Benchmark(NNBaseBenchmark):
+class NNBenchmarkBB(NNBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # black-box setting (full fidelity)
-            NNSearchSpace0Benchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='fixed')
+            NNBenchmarkBB._get_fidelity_choices(iter_choice='fixed', subsample_choice='fixed')
         )
         return fidelity_space
 
 
-class NNSearchSpace1Benchmark(NNBaseBenchmark):
+class NNBenchmarkMF(NNBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-fidelity) - iterations
-            NNSearchSpace1Benchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='fixed')
-        )
-        return fidelity_space
-
-
-class NNSearchSpace2Benchmark(NNBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-fidelity) - subsample
-            NNSearchSpace2Benchmark._get_fidelity_choices(iter_choice='fixed', subsample_choice='variable')
-        )
-        return fidelity_space
-
-
-class NNSearchSpace3Benchmark(NNBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-multi-fidelity) - iterations + data subsample
-            NNSearchSpace3Benchmark._get_fidelity_choices(iter_choice='variable', subsample_choice='variable')
+            NNBenchmarkMF._get_fidelity_choices(iter_choice='variable', subsample_choice='fixed')
         )
         return fidelity_space
 
 
-__all__ = [NNSearchSpace0Benchmark, NNSearchSpace1Benchmark,
-           NNSearchSpace2Benchmark, NNSearchSpace3Benchmark]
+__all__ = [NNBenchmark, NNBenchmarkBB, NNBenchmarkMF]
diff --git a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
index 838d956d..8b6a64d8 100644
--- a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
@@ -9,13 +9,13 @@
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class RandomForestBaseBenchmark(MLBenchmark):
+class RandomForestBenchmark(MLBenchmark):
     def __init__(self,
                  task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
-        super(RandomForestBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+        super(RandomForestBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -41,18 +41,12 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
 
     @staticmethod
     def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """Fidelity space available --- specifies the fidelity dimensions
-
-        If fidelity_choice is 0
-            Fidelity space is the maximal fidelity, akin to a black-box function
-        If fidelity_choice is 1
-            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
-        If fidelity_choice is 2
-            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
-        If fidelity_choice is >2
-            Fidelity space is multi-multi fidelity, all possible fidelities
-        """
-        raise NotImplementedError()
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
 
     @staticmethod
     def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
@@ -100,45 +94,24 @@ def init_model(self, config: Union[CS.Configuration, Dict],
         return model
 
 
-class RandomForestSearchSpace0Benchmark(RandomForestBaseBenchmark):
+class RandomForestBenchmarkBB(RandomForestBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # black-box setting (full fidelity)
-            RandomForestBaseBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed')
+            RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed')
         )
         return fidelity_space
 
 
-class RandomForestSearchSpace1Benchmark(RandomForestBaseBenchmark):
+class RandomForestBenchmarkMF(RandomForestBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-fidelity) - ntrees
-            RandomForestBaseBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed')
-        )
-        return fidelity_space
-
-
-class RandomForestSearchSpace2Benchmark(RandomForestBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-fidelity) - data subsample
-            RandomForestBaseBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='variable')
-        )
-        return fidelity_space
-
-
-class RandomForestSearchSpace3Benchmark(RandomForestBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
-            RandomForestBaseBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable')
+            RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed')
         )
         return fidelity_space
 
 
-__all__ = [RandomForestSearchSpace0Benchmark, RandomForestSearchSpace1Benchmark,
-           RandomForestSearchSpace2Benchmark, RandomForestSearchSpace3Benchmark]
+__all__ = [RandomForestBenchmark, RandomForestBenchmarkBB, RandomForestBenchmarkMF]
diff --git a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
index 0ae25e18..fa129cce 100644
--- a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
@@ -8,13 +8,13 @@
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class SVMBaseBenchmark(MLBenchmark):
+class SVMBenchmark(MLBenchmark):
     def __init__(self,
                  task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
-        super(SVMBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+        super(SVMBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
         self.cache_size = 200
 
@@ -36,16 +36,11 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
 
     @staticmethod
     def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """Fidelity space available --- specifies the fidelity dimensions
-
-        For SVM, only a single fidelity exists, i.e., subsample fraction.
-        if fidelity_choice == 0
-            uses the entire data (subsample=1), reflecting the black-box setup
-        else
-            parameterize the fraction of data to subsample
-
-        """
-        raise NotImplementedError()
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameter(
+            SVMBenchmark._get_fidelity_choices(subsample_choice='variable')
+        )
+        return fidelity_space
 
     @staticmethod
     def _get_fidelity_choices(subsample_choice: str) -> Hyperparameter:
@@ -77,21 +72,15 @@ def init_model(self, config: Union[CS.Configuration, Dict],
         return model
 
 
-class SVMSearchSpace0Benchmark(SVMBaseBenchmark):
+class SVMBenchmarkBB(SVMBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameter(
             # uses the entire data (subsample=1), reflecting the black-box setup
-            SVMBaseBenchmark._get_fidelity_choices(subsample_choice='fixed')
+            SVMBenchmark._get_fidelity_choices(subsample_choice='fixed')
         )
         return fidelity_space
 
 
-class SVMSearchSpace1Benchmark(SVMBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameter(
-            # parameterize the fraction of data to subsample
-            SVMBaseBenchmark._get_fidelity_choices(subsample_choice='fixed')
-        )
-        return fidelity_space
+# To keep the parity of the the overall design
+SVMBenchmarkMF = SVMBenchmark
diff --git a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
index a5735b2c..b22827de 100644
--- a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -8,13 +8,13 @@
 from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
-class XGBoostBaseBenchmark(MLBenchmark):
+class XGBoostBenchmark(MLBenchmark):
     def __init__(self,
                  task_id: int,
                  rng: Union[np.random.RandomState, int, None] = None,
                  valid_size: float = 0.33,
                  data_path: Union[str, None] = None):
-        super(XGBoostBaseBenchmark, self).__init__(task_id, rng, valid_size, data_path)
+        super(XGBoostBenchmark, self).__init__(task_id, rng, valid_size, data_path)
 
     @staticmethod
     def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
@@ -40,18 +40,12 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp
 
     @staticmethod
     def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        """Fidelity space available --- specifies the fidelity dimensions
-
-        If fidelity_choice is 0
-            Fidelity space is the maximal fidelity, akin to a black-box function
-        If fidelity_choice is 1
-            Fidelity space is a single fidelity, in this case the number of trees (n_estimators)
-        If fidelity_choice is 2
-            Fidelity space is a single fidelity, in this case the fraction of dataset (subsample)
-        If fidelity_choice is >2
-            Fidelity space is multi-multi fidelity, all possible fidelities
-        """
-        raise NotImplementedError()
+        fidelity_space = CS.ConfigurationSpace(seed=seed)
+        fidelity_space.add_hyperparameters(
+            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
+            XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable')
+        )
+        return fidelity_space
 
     @staticmethod
     def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
@@ -107,45 +101,24 @@ def init_model(self,
         return model
 
 
-class XGBoostSearchSpace0Benchmark(XGBoostBaseBenchmark):
+class XGBoostBenchmarkBB(XGBoostBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # black-box setting (full fidelity)
-            XGBoostBaseBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed')
+            XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed')
         )
         return fidelity_space
 
 
-class XGBoostSearchSpace1Benchmark(XGBoostBaseBenchmark):
+class XGBoostBenchmarkMF(XGBoostBenchmark):
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         fidelity_space = CS.ConfigurationSpace(seed=seed)
         fidelity_space.add_hyperparameters(
             # gray-box setting (multi-fidelity) - ntrees
-            XGBoostBaseBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed')
-        )
-        return fidelity_space
-
-
-class XGBoostSearchSpace2Benchmark(XGBoostBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-fidelity) - data subsample
-            XGBoostBaseBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='variable')
-        )
-        return fidelity_space
-
-
-class XGBoostSearchSpace3Benchmark(XGBoostBaseBenchmark):
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        fidelity_space = CS.ConfigurationSpace(seed=seed)
-        fidelity_space.add_hyperparameters(
-            # gray-box setting (multi-multi-fidelity) - ntrees + data subsample
-            XGBoostBaseBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable')
+            XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed')
         )
         return fidelity_space
 
 
-__all__ = [XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark,
-           XGBoostSearchSpace2Benchmark, XGBoostSearchSpace3Benchmark]
+__all__ = [XGBoostBenchmarkBB, XGBoostBenchmarkMF, XGBoostBenchmark]
diff --git a/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
index 77ed4bbb..47886eb1 100644
--- a/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -6,37 +6,28 @@
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
 
 
-class HistGBSearchSpace0Benchmark(AbstractBenchmarkClient):
+class HistGBBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBSearchSpace0Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBBenchmark')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(HistGBSearchSpace0Benchmark, self).__init__(**kwargs)
+        super(HistGBBenchmark, self).__init__(**kwargs)
 
 
-class HistGBSearchSpace1Benchmark(AbstractBenchmarkClient):
+class HistGBBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBSearchSpace1Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBBenchmarkBB')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(HistGBSearchSpace1Benchmark, self).__init__(**kwargs)
+        super(HistGBBenchmarkBB, self).__init__(**kwargs)
 
 
-class HistGBSearchSpace2Benchmark(AbstractBenchmarkClient):
+class HistGBBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBSearchSpace2Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBBenchmarkMF')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(HistGBSearchSpace2Benchmark, self).__init__(**kwargs)
+        super(HistGBBenchmarkMF, self).__init__(**kwargs)
 
 
-class HistGBSearchSpace3Benchmark(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'HistGBSearchSpace3Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(HistGBSearchSpace3Benchmark, self).__init__(**kwargs)
-
-
-__all__ = [HistGBSearchSpace0Benchmark, HistGBSearchSpace1Benchmark,
-           HistGBSearchSpace2Benchmark, HistGBSearchSpace3Benchmark]
+__all__ = [HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF]
diff --git a/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
index fd1b4015..74092e71 100644
--- a/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
@@ -6,37 +6,28 @@
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
 
 
-class LRSearchSpace0Benchmark(AbstractBenchmarkClient):
+class LRBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRSearchSpace0Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmark')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(LRSearchSpace0Benchmark, self).__init__(**kwargs)
+        super(LRBenchmark, self).__init__(**kwargs)
 
 
-class LRSearchSpace1Benchmark(AbstractBenchmarkClient):
+class LRBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRSearchSpace1Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmarkBB')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(LRSearchSpace1Benchmark, self).__init__(**kwargs)
+        super(LRBenchmarkBB, self).__init__(**kwargs)
 
 
-class LRSearchSpace2Benchmark(AbstractBenchmarkClient):
+class LRBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRSearchSpace2Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmarkMF')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(LRSearchSpace2Benchmark, self).__init__(**kwargs)
+        super(LRBenchmarkMF, self).__init__(**kwargs)
 
 
-class LRSearchSpace3Benchmark(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRSearchSpace3Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(LRSearchSpace3Benchmark, self).__init__(**kwargs)
-
-
-__all__ = [LRSearchSpace0Benchmark, LRSearchSpace1Benchmark,
-           LRSearchSpace2Benchmark, LRSearchSpace3Benchmark]
+__all__ = [LRBenchmark, LRBenchmarkBB, LRBenchmarkMF]
diff --git a/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
index 818fb606..8a444c11 100644
--- a/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
@@ -6,37 +6,28 @@
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
 
 
-class NNSearchSpace0Benchmark(AbstractBenchmarkClient):
+class NNBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNSearchSpace0Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmark')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(NNSearchSpace0Benchmark, self).__init__(**kwargs)
+        super(NNBenchmark, self).__init__(**kwargs)
 
 
-class NNSearchSpace1Benchmark(AbstractBenchmarkClient):
+class NNBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNSearchSpace1Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmarkBB')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(NNSearchSpace1Benchmark, self).__init__(**kwargs)
+        super(NNBenchmarkBB, self).__init__(**kwargs)
 
 
-class NNSearchSpace2Benchmark(AbstractBenchmarkClient):
+class NNBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNSearchSpace2Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmarkMF')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(NNSearchSpace2Benchmark, self).__init__(**kwargs)
+        super(NNBenchmarkMF, self).__init__(**kwargs)
 
 
-class NNSearchSpace3Benchmark(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNSearchSpace3Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(NNSearchSpace3Benchmark, self).__init__(**kwargs)
-
-
-__all__ = [NNSearchSpace0Benchmark, NNSearchSpace1Benchmark,
-           NNSearchSpace2Benchmark, NNSearchSpace3Benchmark]
+__all__ = [NNBenchmark, NNBenchmarkBB, NNBenchmarkMF]
diff --git a/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
index 3c7ced83..4f59f6a0 100644
--- a/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
@@ -6,37 +6,28 @@
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
 
 
-class RandomForestSearchSpace0Benchmark(AbstractBenchmarkClient):
+class RandomForestBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestSearchSpace0Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmark')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(RandomForestSearchSpace0Benchmark, self).__init__(**kwargs)
+        super(RandomForestBenchmark, self).__init__(**kwargs)
 
 
-class RandomForestSearchSpace1Benchmark(AbstractBenchmarkClient):
+class RandomForestBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestSearchSpace1Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmarkBB')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(RandomForestSearchSpace1Benchmark, self).__init__(**kwargs)
+        super(RandomForestBenchmarkBB, self).__init__(**kwargs)
 
 
-class RandomForestSearchSpace2Benchmark(AbstractBenchmarkClient):
+class RandomForestBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestSearchSpace2Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmarkMF')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(RandomForestSearchSpace2Benchmark, self).__init__(**kwargs)
+        super(RandomForestBenchmarkMF, self).__init__(**kwargs)
 
 
-class RandomForestSearchSpace3Benchmark(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestSearchSpace3Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(RandomForestSearchSpace3Benchmark, self).__init__(**kwargs)
-
-
-__all__ = [RandomForestSearchSpace0Benchmark, RandomForestSearchSpace1Benchmark,
-           RandomForestSearchSpace2Benchmark, RandomForestSearchSpace3Benchmark]
+__all__ = [RandomForestBenchmark, RandomForestBenchmarkBB, RandomForestBenchmarkMF]
diff --git a/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
index b2c46e75..328b26f3 100644
--- a/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
@@ -6,20 +6,28 @@
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
 
 
-class SVMSearchSpace0Benchmark(AbstractBenchmarkClient):
+class SVMBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMSearchSpace0Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmark')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(SVMSearchSpace0Benchmark, self).__init__(**kwargs)
+        super(SVMBenchmark, self).__init__(**kwargs)
 
 
-class SVMSearchSpace1Benchmark(AbstractBenchmarkClient):
+class SVMBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMSearchSpace1Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkMF')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(SVMSearchSpace1Benchmark, self).__init__(**kwargs)
+        super(SVMBenchmarkMF, self).__init__(**kwargs)
 
 
-__all__ = [SVMSearchSpace0Benchmark, SVMSearchSpace1Benchmark]
+class SVMBenchmarkBB(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkBB')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(SVMBenchmarkBB, self).__init__(**kwargs)
+
+
+__all__ = [SVMBenchmark, SVMBenchmarkMF, SVMBenchmarkBB]
diff --git a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
index 72438d37..1f09ead9 100644
--- a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -6,28 +6,28 @@
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
 
 
-class XGBoostSearchSpace0Benchmark(AbstractBenchmarkClient):
+class XGBoostBenchmark(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace0Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmark')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(XGBoostSearchSpace0Benchmark, self).__init__(**kwargs)
+        super(XGBoostBenchmark, self).__init__(**kwargs)
 
 
-class XGBoostSearchSpace1Benchmark(AbstractBenchmarkClient):
+class XGBoostBenchmarkBB(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace1Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkBB')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(XGBoostSearchSpace1Benchmark, self).__init__(**kwargs)
+        super(XGBoostBenchmarkBB, self).__init__(**kwargs)
 
 
-class XGBoostSearchSpace2Benchmark(AbstractBenchmarkClient):
+class XGBoostBenchmarkMF(AbstractBenchmarkClient):
     def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace2Benchmark')
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkMF')
         kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
         kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(XGBoostSearchSpace2Benchmark, self).__init__(**kwargs)
+        super(XGBoostBenchmarkMF, self).__init__(**kwargs)
 
 
 class XGBoostSearchSpace3Benchmark(AbstractBenchmarkClient):
@@ -38,5 +38,4 @@ def __init__(self, **kwargs):
         super(XGBoostSearchSpace3Benchmark, self).__init__(**kwargs)
 
 
-__all__ = [XGBoostSearchSpace0Benchmark, XGBoostSearchSpace1Benchmark,
-           XGBoostSearchSpace2Benchmark, XGBoostSearchSpace3Benchmark]
+__all__ = [XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF]
\ No newline at end of file

From b8b30a535448c87f70b2931f41950f7dd0662977 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 17:41:33 +0200
Subject: [PATCH 64/95] PR Requests: Move dependencies to correct directory

---
 hpobench/benchmarks/ml_mmfb/histgb_benchmark.py               | 2 +-
 hpobench/benchmarks/ml_mmfb/lr_benchmark.py                   | 2 +-
 hpobench/benchmarks/ml_mmfb/nn_benchmark.py                   | 2 +-
 hpobench/benchmarks/ml_mmfb/rf_benchmark.py                   | 2 +-
 hpobench/benchmarks/ml_mmfb/svm_benchmark.py                  | 2 +-
 hpobench/benchmarks/ml_mmfb/tabular_benchmark.py              | 2 +-
 hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py              | 4 ++--
 hpobench/dependencies/{ml => ml_mmfb}/__init__.py             | 0
 hpobench/dependencies/{ml => ml_mmfb}/data_manager.py         | 0
 .../dependencies/{ml => ml_mmfb}/ml_benchmark_template.py     | 2 +-
 10 files changed, 9 insertions(+), 9 deletions(-)
 rename hpobench/dependencies/{ml => ml_mmfb}/__init__.py (100%)
 rename hpobench/dependencies/{ml => ml_mmfb}/data_manager.py (100%)
 rename hpobench/dependencies/{ml => ml_mmfb}/ml_benchmark_template.py (99%)

diff --git a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
index 5d164503..7a697129 100644
--- a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -7,7 +7,7 @@
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.ensemble import HistGradientBoostingClassifier
 
-from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
 
 
 class HistGBBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
index 0154e623..a8ef771f 100644
--- a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
@@ -5,7 +5,7 @@
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.linear_model import SGDClassifier
 
-from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
 
 
 class LRBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
index ca3afa7c..c8341e8a 100644
--- a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
@@ -6,7 +6,7 @@
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.neural_network import MLPClassifier
 
-from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
 
 
 class NNBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
index 8b6a64d8..781c7ed4 100644
--- a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
@@ -6,7 +6,7 @@
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.ensemble import RandomForestClassifier
 
-from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
 
 
 class RandomForestBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
index fa129cce..6dccf605 100644
--- a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
@@ -5,7 +5,7 @@
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.svm import SVC
 
-from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
 
 
 class SVMBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index 907b5e51..f572d0dc 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -6,7 +6,7 @@
 from ConfigSpace.read_and_write import json as json_cs
 
 from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.dependencies.ml.ml_benchmark_template import metrics
+from hpobench.dependencies.ml_mmfb.ml_benchmark_template import metrics
 from hpobench.util.data_manager import TabularDataManager
 
 
diff --git a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
index b22827de..57218598 100644
--- a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -5,7 +5,7 @@
 import xgboost as xgb
 from ConfigSpace.hyperparameters import Hyperparameter
 
-from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
 
 
 class XGBoostBenchmark(MLBenchmark):
@@ -54,7 +54,7 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
         assert subsample_choice in ['fixed', 'variable']
 
         fidelity1 = dict(
-            fixed=CS.Constant('n_estimators', value=100),  # TODO: Should this be 1000 or 100?
+            fixed=CS.Constant('n_estimators', value=1000),
             variable=CS.UniformIntegerHyperparameter(
                 'n_estimators', lower=50, upper=2000, default_value=1000, log=False
             )
diff --git a/hpobench/dependencies/ml/__init__.py b/hpobench/dependencies/ml_mmfb/__init__.py
similarity index 100%
rename from hpobench/dependencies/ml/__init__.py
rename to hpobench/dependencies/ml_mmfb/__init__.py
diff --git a/hpobench/dependencies/ml/data_manager.py b/hpobench/dependencies/ml_mmfb/data_manager.py
similarity index 100%
rename from hpobench/dependencies/ml/data_manager.py
rename to hpobench/dependencies/ml_mmfb/data_manager.py
diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml_mmfb/ml_benchmark_template.py
similarity index 99%
rename from hpobench/dependencies/ml/ml_benchmark_template.py
rename to hpobench/dependencies/ml_mmfb/ml_benchmark_template.py
index 8460b113..b67078e7 100644
--- a/hpobench/dependencies/ml/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml_mmfb/ml_benchmark_template.py
@@ -9,7 +9,7 @@
     precision_score, f1_score
 
 from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.dependencies.ml.data_manager import OpenMLDataManager
+from hpobench.dependencies.ml_mmfb.data_manager import OpenMLDataManager
 from hpobench.util.rng_helper import get_rng
 
 metrics = dict(

From 875c594924fe6059500bec970076e61e1ddebefb Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 17:42:16 +0200
Subject: [PATCH 65/95] PR Requests: Tabular Benchmarks - Remove unnecessary
 class definition

---
 .../benchmarks/ml_mmfb/tabular_benchmark.py   | 68 ++++++-------------
 1 file changed, 20 insertions(+), 48 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index f572d0dc..2f346624 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -10,7 +10,7 @@
 from hpobench.util.data_manager import TabularDataManager
 
 
-class BaseTabularBenchmark(AbstractBenchmark):
+class TabularBenchmark(AbstractBenchmark):
 
     def __init__(self,
                  model: str, task_id: int,
@@ -29,7 +29,10 @@ def __init__(self,
         self.config_spaces = self.metadata["config_spaces"]
         self.global_minimums = self.metadata["global_min"]
 
-        super(BaseTabularBenchmark, self).__init__(rng=rng, **kwargs)
+        self.original_cs = json_cs.read(self.config_spaces['x'])
+        self.original_fs = json_cs.read(self.config_spaces['z'])
+
+        super(TabularBenchmark, self).__init__(rng=rng, **kwargs)
 
     # pylint: disable=arguments-differ
     @AbstractBenchmark.check_parameters
@@ -59,19 +62,26 @@ def objective_function_test(self,
 
     # pylint: disable=arguments-differ
     def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        raise NotImplementedError
+        cs = json_cs.read(self.config_spaces['x_discrete'])
+        cs = self._preprocess_configspace(cs)
+        cs.seed(seed)
+        return cs
 
     # pylint: disable=arguments-differ
     def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        raise NotImplementedError
+        cs = json_cs.read(self.config_spaces['z_discrete'])
+        cs.seed(seed=seed)
+        return cs
 
     # pylint: disable=arguments-differ
     def get_meta_information(self) -> Dict:
         """ Returns the meta information for the benchmark """
-        return {'name': 'BaseTabularBenchmark',
+        return {'name': 'TabularBenchmark',
                 'references': [],
                 'task_id': self.task_id,
-                'model': self.model
+                'model': self.model,
+                'original_configuration_space': self.original_cs,
+                'original_fidelity_space': self.original_fs,
                 }
 
     def _preprocess_configspace(self, config_space: CS.ConfigurationSpace) -> CS.ConfigurationSpace:
@@ -93,7 +103,7 @@ def _total_number_of_configurations(self, space: str = "hyperparameters") -> int
     def _seeds_used(self) -> List:
         return self.table.seed.unique().tolist()
 
-    def sample_hyperparamer(self, n: int = 1) -> Union[CS.Configuration, List]:
+    def sample_hyperparameter(self, n: int = 1) -> Union[CS.Configuration, List]:
         return self.configuration_space.sample_configuration(n)
 
     def sample_fidelity(self, n: int = 1) -> Union[CS.Configuration, List]:
@@ -163,10 +173,9 @@ def _objective(
         cost_key = f"{evaluation}_scores"
 
         key_path = dict()
-        # TODO: Dicts are unordered. This does not have to have an effect.
-        for name in np.sort(self.configuration_space.get_hyperparameter_names()):
+        for name in self.configuration_space.get_hyperparameter_names():
             key_path[str(name)] = config[str(name)]
-        for name in np.sort(self.fidelity_space.get_hyperparameter_names()):
+        for name in self.fidelity_space.get_hyperparameter_names():
             key_path[str(name)] = fidelity[str(name)]
 
         if seed is not None:
@@ -190,41 +199,4 @@ def _objective(
         return result
 
 
-class TabularBenchmark(BaseTabularBenchmark):
-    def __init__(self, model: str, task_id: int, data_dir: Union[Path, str, None] = None,
-                 rng: Union[int, np.random.RandomState, None] = None, **kwargs):
-        super(TabularBenchmark, self).__init__(model, task_id, data_dir, rng, **kwargs)
-
-    # pylint: disable=arguments-differ
-    def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        cs = json_cs.read(self.config_spaces['x_discrete'])
-        cs = self._preprocess_configspace(cs)
-        cs.seed(seed)
-        return cs
-
-    # pylint: disable=arguments-differ
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        cs = json_cs.read(self.config_spaces['z_discrete'])
-        cs.seed(seed=seed)
-        return cs
-
-
-class OriginalTabularBenchmark(BaseTabularBenchmark):
-    def __init__(self, model: str, task_id: int, data_dir: Union[Path, str, None] = None,
-                 rng: Union[int, np.random.RandomState, None] = None, **kwargs):
-        super(OriginalTabularBenchmark, self).__init__(model, task_id, data_dir, rng, **kwargs)
-
-    # pylint: disable=arguments-differ
-    def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        cs = json_cs.read(self.config_spaces['x'])
-        cs.seed(seed)
-        return cs
-
-    # pylint: disable=arguments-differ
-    def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
-        cs = json_cs.read(self.config_spaces['z'])
-        cs.seed(seed=seed)
-        return cs
-
-
-__all__ = [TabularBenchmark, OriginalTabularBenchmark]
+__all__ = [TabularBenchmark]

From 8891e33a5312a1c07d3ae5a9f57c5f5a874798c6 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 17:42:34 +0200
Subject: [PATCH 66/95] PR Requests: Minor improvments

---
 hpobench/benchmarks/ml_mmfb/rf_benchmark.py      | 2 +-
 hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
index 781c7ed4..f0e70086 100644
--- a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
@@ -55,7 +55,7 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
         assert subsample_choice in ['fixed', 'variable']
 
         fidelity1 = dict(
-            fixed=CS.Constant('n_estimators', value=100),  # TODO: is the default value here 100 or 512?
+            fixed=CS.Constant('n_estimators', value=512),
             variable=CS.UniformIntegerHyperparameter(
                 'n_estimators', lower=16, upper=512, default_value=512, log=False
             )
diff --git a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
index 57218598..5a997241 100644
--- a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -81,8 +81,7 @@ def init_model(self,
         if isinstance(fidelity, CS.Configuration):
             fidelity = fidelity.get_dictionary()
 
-        # TODO: This seems to be wrong. (AND-condition)
-        rng = rng if (rng is None and isinstance(rng, int)) else self.seed
+        rng = rng if (rng is None or isinstance(rng, int)) else self.seed
         extra_args = dict(
             booster="gbtree",
             n_estimators=fidelity['n_estimators'],

From 75f345dac76b0de77578797189e512d9283243c5 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 17:44:34 +0200
Subject: [PATCH 67/95] PR Requests: Update upper bounds of the fidelities

---
 hpobench/benchmarks/ml_mmfb/nn_benchmark.py      | 2 +-
 hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
index c8341e8a..7da663bf 100644
--- a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
@@ -56,7 +56,7 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace:
     def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]:
 
         fidelity1 = dict(
-            fixed=CS.Constant('iter', value=100),
+            fixed=CS.Constant('iter', value=243),
             variable=CS.UniformIntegerHyperparameter(
                 'iter', lower=3, upper=243, default_value=243, log=False
             )
diff --git a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
index 5a997241..17e7c165 100644
--- a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -54,9 +54,9 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu
         assert subsample_choice in ['fixed', 'variable']
 
         fidelity1 = dict(
-            fixed=CS.Constant('n_estimators', value=1000),
+            fixed=CS.Constant('n_estimators', value=2000),
             variable=CS.UniformIntegerHyperparameter(
-                'n_estimators', lower=50, upper=2000, default_value=1000, log=False
+                'n_estimators', lower=50, upper=2000, default_value=2000, log=False
             )
         )
         fidelity2 = dict(

From 8c2ab6cf130d98aad629336cae3bb2b2640300e0 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 17:51:01 +0200
Subject: [PATCH 68/95] PR Requests: Remove OriginalTabBenchmarks

---
 .../container/benchmarks/ml_mmfb/tabular_benchmark.py  | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
index f4a855d5..54b2763f 100644
--- a/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -14,12 +14,4 @@ def __init__(self, **kwargs):
         super(TabularBenchmark, self).__init__(**kwargs)
 
 
-class OriginalTabularBenchmark(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'OriginalTabularBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_tabular_benchmarks')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(OriginalTabularBenchmark, self).__init__(**kwargs)
-
-
-__all__ = [TabularBenchmark, OriginalTabularBenchmark]
+__all__ = [TabularBenchmark]

From e24d53736a5b26af4258e8379f528bef2ba61338 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 18:06:02 +0200
Subject: [PATCH 69/95] PR Requests: Revert the query function

---
 .../benchmarks/ml_mmfb/tabular_benchmark.py   | 36 +++++--------------
 1 file changed, 9 insertions(+), 27 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index 2f346624..e2b16645 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -130,33 +130,15 @@ def get_fidelity_range(self) -> List:
         return fidelities
 
     def _search_dataframe(self, row_dict, df):
-        query_stmt = self._build_query(row_dict)
-        result = df.query(query_stmt)
-        # TODO: What happens in this case? The objective function raises a TypeError.
-        if len(result) == 0:
-            return None
-        return result.iloc[0].loc['result']
-
-        # TODO: This created an out-of-bounds error. The idx mask should have been 2d, but was 1d.
-        # # https://stackoverflow.com/a/46165056/8363967
-        # mask = np.array([True] * df.shape[0])
-        # for i, param in enumerate(df.drop("result", axis=1).columns):
-        #     mask *= df[param].values == row_dict[param]
-        # idx = np.where(mask)
-        # if len(idx) != 1:
-        #     return None
-        # idx = idx[0][0]
-        # result = df.iloc[idx]["result"]
-        # return result
-
-    @staticmethod
-    def _build_query(row_dict: Dict) -> str:
-        query = ''
-        for i, (param_name, param_value) in enumerate(row_dict.items()):
-            if i != 0:
-                query += ' & '
-            query += f'{param_name} == {param_value}'
-        return query
+        # https://stackoverflow.com/a/46165056/8363967
+        mask = np.array([True] * df.shape[0])
+        for i, param in enumerate(df.drop("result", axis=1).columns):
+            mask *= df[param].values == row_dict[param]
+        idx = np.where(mask)
+        assert len(idx) == 1, f'The query has resulted into mulitple matches. This should not happen.'
+        idx = idx[0][0]
+        result = df.iloc[idx]["result"]
+        return result
 
     def _objective(
             self,

From 3c4f37582bccee2ba6ecd549f6c3ea201655ab73 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 18:09:01 +0200
Subject: [PATCH 70/95] PR Requests: Minor improvements

---
 hpobench/benchmarks/ml_mmfb/__init__.py         | 17 +++++++++++++++++
 hpobench/benchmarks/ml_mmfb/entry_point.py      | 17 -----------------
 hpobench/benchmarks/ml_mmfb/histgb_benchmark.py |  2 +-
 hpobench/benchmarks/ml_mmfb/lr_benchmark.py     |  2 +-
 hpobench/benchmarks/ml_mmfb/nn_benchmark.py     |  2 +-
 hpobench/benchmarks/ml_mmfb/rf_benchmark.py     |  2 +-
 hpobench/benchmarks/ml_mmfb/svm_benchmark.py    |  2 ++
 .../benchmarks/ml_mmfb/tabular_benchmark.py     |  2 +-
 .../benchmarks/ml_mmfb/xgboost_benchmark.py     |  2 +-
 .../benchmarks/ml_mmfb/xgboost_benchmark.py     |  2 +-
 10 files changed, 26 insertions(+), 24 deletions(-)
 delete mode 100644 hpobench/benchmarks/ml_mmfb/entry_point.py

diff --git a/hpobench/benchmarks/ml_mmfb/__init__.py b/hpobench/benchmarks/ml_mmfb/__init__.py
index e69de29b..0d13c728 100644
--- a/hpobench/benchmarks/ml_mmfb/__init__.py
+++ b/hpobench/benchmarks/ml_mmfb/__init__.py
@@ -0,0 +1,17 @@
+from hpobench.benchmarks.ml_mmfb.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
+    RandomForestBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
+from hpobench.benchmarks.ml_mmfb.tabular_benchmark import TabularBenchmark
+from hpobench.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
+
+
+__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
+           'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
+           'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
+           'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
+           'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
+           'TabularBenchmark',
+           'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
diff --git a/hpobench/benchmarks/ml_mmfb/entry_point.py b/hpobench/benchmarks/ml_mmfb/entry_point.py
deleted file mode 100644
index 0114acaa..00000000
--- a/hpobench/benchmarks/ml_mmfb/entry_point.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from hpobench.benchmarks.ml_mmfb.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
-    RandomForestBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.tabular_benchmark import TabularBenchmark, OriginalTabularBenchmark
-from hpobench.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
-
-
-__all__ = [HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF,
-           LRBenchmark, LRBenchmarkBB, LRBenchmarkMF,
-           NNBenchmark, NNBenchmarkBB, NNBenchmarkMF,
-           RandomForestBenchmark, RandomForestBenchmarkBB, RandomForestBenchmarkMF,
-           SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF,
-           TabularBenchmark, OriginalTabularBenchmark,
-           XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF]
diff --git a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
index 7a697129..4947d022 100644
--- a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -112,4 +112,4 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
         return fidelity_space
 
 
-__all__ = [HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF]
+__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF']
diff --git a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
index a8ef771f..32a21be9 100644
--- a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
@@ -117,4 +117,4 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
         return fidelity_space
 
 
-__all__ = [LRBenchmark, LRBenchmarkBB, LRBenchmarkMF]
+__all__ = ['LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF']
diff --git a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
index 7da663bf..8179731c 100644
--- a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
@@ -120,4 +120,4 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
         return fidelity_space
 
 
-__all__ = [NNBenchmark, NNBenchmarkBB, NNBenchmarkMF]
+__all__ = ['NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF']
diff --git a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
index f0e70086..788ee64f 100644
--- a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
@@ -114,4 +114,4 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
         return fidelity_space
 
 
-__all__ = [RandomForestBenchmark, RandomForestBenchmarkBB, RandomForestBenchmarkMF]
+__all__ = ['RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF']
diff --git a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
index 6dccf605..b3bf7568 100644
--- a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
@@ -84,3 +84,5 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
 
 # To keep the parity of the the overall design
 SVMBenchmarkMF = SVMBenchmark
+
+__all__ = ['SVMBenchmark', 'SVMBenchmarkMF', 'SVMBenchmarkBB']
\ No newline at end of file
diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index e2b16645..bf43bf1b 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -181,4 +181,4 @@ def _objective(
         return result
 
 
-__all__ = [TabularBenchmark]
+__all__ = ['TabularBenchmark']
diff --git a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
index 17e7c165..d975857e 100644
--- a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -120,4 +120,4 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
         return fidelity_space
 
 
-__all__ = [XGBoostBenchmarkBB, XGBoostBenchmarkMF, XGBoostBenchmark]
+__all__ = ['XGBoostBenchmarkBB', 'XGBoostBenchmarkMF', 'XGBoostBenchmark']
diff --git a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
index 1f09ead9..547ce945 100644
--- a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -38,4 +38,4 @@ def __init__(self, **kwargs):
         super(XGBoostSearchSpace3Benchmark, self).__init__(**kwargs)
 
 
-__all__ = [XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF]
\ No newline at end of file
+__all__ = [XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF]

From 6fc7f576a6b9f935f40c9486620eb9649e2fb7af Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 18:10:22 +0200
Subject: [PATCH 71/95] Pycodestyle

---
 hpobench/benchmarks/ml_mmfb/svm_benchmark.py     | 2 +-
 hpobench/benchmarks/ml_mmfb/tabular_benchmark.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
index b3bf7568..b0bd7f65 100644
--- a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
@@ -85,4 +85,4 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS
 # To keep the parity of the the overall design
 SVMBenchmarkMF = SVMBenchmark
 
-__all__ = ['SVMBenchmark', 'SVMBenchmarkMF', 'SVMBenchmarkBB']
\ No newline at end of file
+__all__ = ['SVMBenchmark', 'SVMBenchmarkMF', 'SVMBenchmarkBB']
diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index bf43bf1b..9e6ec026 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -135,7 +135,8 @@ def _search_dataframe(self, row_dict, df):
         for i, param in enumerate(df.drop("result", axis=1).columns):
             mask *= df[param].values == row_dict[param]
         idx = np.where(mask)
-        assert len(idx) == 1, f'The query has resulted into mulitple matches. This should not happen.'
+        assert len(idx) == 1, 'The query has resulted into mulitple matches. This should not happen. ' \
+                              f'The Query was {row_dict}'
         idx = idx[0][0]
         result = df.iloc[idx]["result"]
         return result

From 0430c68c4dd2560af01444704bdc46bb214afd19 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 19:48:48 +0200
Subject: [PATCH 72/95] Add missing requirements

---
 extra_requirements/ml_mfbb.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extra_requirements/ml_mfbb.json b/extra_requirements/ml_mfbb.json
index 68b4a557..db67ecd0 100644
--- a/extra_requirements/ml_mfbb.json
+++ b/extra_requirements/ml_mfbb.json
@@ -1,4 +1,4 @@
 {
-  "ml_tabular_benchmarks": ["pandas==1.2.4"],
-  "ml_mfbb": ["pandas==1.2.4","sklearn==0.24.2"]
+  "ml_tabular_benchmarks": ["pandas==1.2.4","openml==0.12.2"],
+  "ml_mfbb": ["pandas==1.2.4","scikit-learn==0.24.2","openml==0.12.2","xgboost==1.3.1"]
 }
\ No newline at end of file

From 3eb3a2d587778e5c06d030dd3d84eea4683baeea Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 19:49:35 +0200
Subject: [PATCH 73/95] Minor Improvements

- cast return values to float
- improve the __all__ vars
---
 .../container/benchmarks/ml_mmfb/__init__.py  | 19 +++++++++++++++++++
 .../benchmarks/ml_mmfb/histgb_benchmark.py    |  2 +-
 .../benchmarks/ml_mmfb/lr_benchmark.py        |  2 +-
 .../benchmarks/ml_mmfb/nn_benchmark.py        |  2 +-
 .../benchmarks/ml_mmfb/rf_benchmark.py        |  2 +-
 .../benchmarks/ml_mmfb/svm_benchmark.py       |  2 +-
 .../benchmarks/ml_mmfb/tabular_benchmark.py   |  2 +-
 .../benchmarks/ml_mmfb/xgboost_benchmark.py   |  2 +-
 hpobench/dependencies/ml_mmfb/data_manager.py |  2 --
 .../ml_mmfb/ml_benchmark_template.py          |  4 ++--
 10 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/hpobench/container/benchmarks/ml_mmfb/__init__.py b/hpobench/container/benchmarks/ml_mmfb/__init__.py
index e69de29b..5f5cada5 100644
--- a/hpobench/container/benchmarks/ml_mmfb/__init__.py
+++ b/hpobench/container/benchmarks/ml_mmfb/__init__.py
@@ -0,0 +1,19 @@
+from hpobench.container.benchmarks.ml_mmfb.histgb_benchmark import HistGBBenchmarkMF, HistGBBenchmarkBB, HistGBBenchmark
+from hpobench.container.benchmarks.ml_mmfb.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
+from hpobench.container.benchmarks.ml_mmfb.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
+from hpobench.container.benchmarks.ml_mmfb.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
+from hpobench.container.benchmarks.ml_mmfb.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
+    RandomForestBenchmarkMF
+from hpobench.container.benchmarks.ml_mmfb.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
+from hpobench.container.benchmarks.ml_mmfb.tabular_benchmark import TabularBenchmark
+from hpobench.container.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, \
+    XGBoostBenchmarkMF
+
+
+__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
+           'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
+           'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
+           'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
+           'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
+           'TabularBenchmark',
+           'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
diff --git a/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
index 47886eb1..dc7af088 100644
--- a/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
@@ -30,4 +30,4 @@ def __init__(self, **kwargs):
         super(HistGBBenchmarkMF, self).__init__(**kwargs)
 
 
-__all__ = [HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF]
+__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF']
diff --git a/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
index 74092e71..979cda3e 100644
--- a/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
@@ -30,4 +30,4 @@ def __init__(self, **kwargs):
         super(LRBenchmarkMF, self).__init__(**kwargs)
 
 
-__all__ = [LRBenchmark, LRBenchmarkBB, LRBenchmarkMF]
+__all__ = ['LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF']
diff --git a/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
index 8a444c11..04955e82 100644
--- a/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
@@ -30,4 +30,4 @@ def __init__(self, **kwargs):
         super(NNBenchmarkMF, self).__init__(**kwargs)
 
 
-__all__ = [NNBenchmark, NNBenchmarkBB, NNBenchmarkMF]
+__all__ = ['NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF']
diff --git a/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
index 4f59f6a0..a414349d 100644
--- a/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
@@ -30,4 +30,4 @@ def __init__(self, **kwargs):
         super(RandomForestBenchmarkMF, self).__init__(**kwargs)
 
 
-__all__ = [RandomForestBenchmark, RandomForestBenchmarkBB, RandomForestBenchmarkMF]
+__all__ = ['RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF']
diff --git a/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
index 328b26f3..7547a81a 100644
--- a/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
@@ -30,4 +30,4 @@ def __init__(self, **kwargs):
         super(SVMBenchmarkBB, self).__init__(**kwargs)
 
 
-__all__ = [SVMBenchmark, SVMBenchmarkMF, SVMBenchmarkBB]
+__all__ = ['SVMBenchmark', 'SVMBenchmarkMF', 'SVMBenchmarkBB']
diff --git a/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
index 54b2763f..6d19953b 100644
--- a/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -14,4 +14,4 @@ def __init__(self, **kwargs):
         super(TabularBenchmark, self).__init__(**kwargs)
 
 
-__all__ = [TabularBenchmark]
+__all__ = ['TabularBenchmark']
diff --git a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
index 547ce945..c82ea606 100644
--- a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
@@ -38,4 +38,4 @@ def __init__(self, **kwargs):
         super(XGBoostSearchSpace3Benchmark, self).__init__(**kwargs)
 
 
-__all__ = [XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF]
+__all__ = ['XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
diff --git a/hpobench/dependencies/ml_mmfb/data_manager.py b/hpobench/dependencies/ml_mmfb/data_manager.py
index 84d8b587..526c6756 100644
--- a/hpobench/dependencies/ml_mmfb/data_manager.py
+++ b/hpobench/dependencies/ml_mmfb/data_manager.py
@@ -14,8 +14,6 @@
 from oslo_concurrency import lockutils
 
 from hpobench.util.data_manager import DataManager
-
-
 from hpobench import config_file
 
 
diff --git a/hpobench/dependencies/ml_mmfb/ml_benchmark_template.py b/hpobench/dependencies/ml_mmfb/ml_benchmark_template.py
index b67078e7..3af13965 100644
--- a/hpobench/dependencies/ml_mmfb/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml_mmfb/ml_benchmark_template.py
@@ -286,7 +286,7 @@ def objective_function_test(self,
         }
 
         return {
-            'function_value': info['test_loss'],
-            'cost': model_fit_time + info['test_costs']['acc'],
+            'function_value': float(info['test_loss']),
+            'cost': float(model_fit_time + info['test_costs']['acc']),
             'info': info
         }

From fa691f7b72b80605f214fcd0834ffd0350761f80 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Tue, 17 Aug 2021 19:52:31 +0200
Subject: [PATCH 74/95] ADD container recipes

---
 .../recipes/ml_mmfb/Singularity.ml_mmfb       | 25 +++++++++++++++++++
 .../ml_mmfb/Singularity.ml_tabular_benchmark  | 25 +++++++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 hpobench/container/recipes/ml_mmfb/Singularity.ml_mmfb
 create mode 100644 hpobench/container/recipes/ml_mmfb/Singularity.ml_tabular_benchmark

diff --git a/hpobench/container/recipes/ml_mmfb/Singularity.ml_mmfb b/hpobench/container/recipes/ml_mmfb/Singularity.ml_mmfb
new file mode 100644
index 00000000..49f9a894
--- /dev/null
+++ b/hpobench/container/recipes/ml_mmfb/Singularity.ml_mmfb
@@ -0,0 +1,25 @@
+Bootstrap: docker
+From: python:3.8-slim
+
+%labels
+MAINTAINER muelleph@cs.uni-freiburg.de
+VERSION v0.0.1
+
+%post
+    apt update -y \
+    && apt install build-essential git -y \
+
+    cd /home \
+    && git clone https://github.com/automl/HPOBench.git \
+    && cd HPOBench \
+    && git checkout development \
+    && pip install ".[ml_mfbb]" \
+    && cd / \
+    && mkdir /var/lib/hpobench/ \
+    && chmod -R 777 /var/lib/hpobench/ \
+    && pip cache purge \
+    && rm -rf /var/lib/apt/lists/*
+
+
+%runscript
+    python -s /home/HPOBench/hpobench/container/server_abstract_benchmark.py ml_mmfb $@
diff --git a/hpobench/container/recipes/ml_mmfb/Singularity.ml_tabular_benchmark b/hpobench/container/recipes/ml_mmfb/Singularity.ml_tabular_benchmark
new file mode 100644
index 00000000..d128211a
--- /dev/null
+++ b/hpobench/container/recipes/ml_mmfb/Singularity.ml_tabular_benchmark
@@ -0,0 +1,25 @@
+Bootstrap: docker
+From: python:3.8-slim
+
+%labels
+MAINTAINER muelleph@cs.uni-freiburg.de
+VERSION v0.0.1
+
+%post
+    apt update -y \
+    && apt install build-essential git -y \
+
+    cd /home \
+    && git clone https://github.com/automl/HPOBench.git \
+    && cd HPOBench \
+    && git checkout development \
+    && pip install ".[ml_tabular_benchmarks]" \
+    && cd / \
+    && mkdir /var/lib/hpobench/ \
+    && chmod -R 777 /var/lib/hpobench/ \
+    && pip cache purge \
+    && rm -rf /var/lib/apt/lists/*
+
+
+%runscript
+    python -s /home/HPOBench/hpobench/container/server_abstract_benchmark.py ml_mmfb $@

From f64917e7c52f3ea22dcfbe4f2582da991dcb8edd Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Thu, 19 Aug 2021 14:44:29 +0200
Subject: [PATCH 75/95] PR: Fix path in tabular data loader

---
 .gitignore                    | 1 +
 hpobench/util/data_manager.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 37101fcb..5e77c268 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,3 +136,4 @@ experiments/
 # Vagrant
 .vagrant
 Vagrantfile
+/hpobench/container/recipes_local/
diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index d390218e..780bb06c 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -942,9 +942,9 @@ def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None]
         self.url_to_use = url_dict.get(model)
 
         if data_dir is None:
-            data_dir = hpobench.config_file.data_dir
+            data_dir = hpobench.config_file.data_dir / "TabularData"
 
-        self._save_dir = Path(data_dir) / "TabularData" / self.model
+        self._save_dir = Path(data_dir) / self.model
         self.create_save_directory(self._save_dir)
 
         self.parquet_file = self._save_dir / self.task_id / f'{self.model}_{self.task_id}_data.parquet.gzip'

From b95d2a5b703185ad2077eaef4040113828757829 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Thu, 19 Aug 2021 14:44:48 +0200
Subject: [PATCH 76/95] PR: Remove casting configspace to np.floats

---
 hpobench/benchmarks/ml_mmfb/tabular_benchmark.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
index 9e6ec026..50af2457 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
@@ -63,7 +63,7 @@ def objective_function_test(self,
     # pylint: disable=arguments-differ
     def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
         cs = json_cs.read(self.config_spaces['x_discrete'])
-        cs = self._preprocess_configspace(cs)
+        # cs = self._preprocess_configspace(cs)
         cs.seed(seed)
         return cs
 
@@ -178,7 +178,7 @@ def _objective(
             info[seed] = res["info"]
             key_path.pop("seed")
         loss = np.mean(loss)
-        result = dict(function_value=loss, cost=costs, info=info)
+        result = dict(function_value=float(loss), cost=costs, info=info)
         return result
 
 

From d7d7a2d49b73a1656dc93639ad972c296669d80e Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Thu, 19 Aug 2021 14:54:23 +0200
Subject: [PATCH 77/95] PR: Move everything back from ml_mmfb/ to ml/

---
 hpobench/benchmarks/{ml_mmfb => ml}/README.md |  0
 hpobench/benchmarks/ml/__init__.py            | 21 ++++++++++
 .../{ml_mmfb => ml}/histgb_benchmark.py       |  2 +-
 .../{ml_mmfb => ml}/lr_benchmark.py           |  2 +-
 .../{ml_mmfb => ml}/nn_benchmark.py           |  2 +-
 .../{ml_mmfb => ml}/rf_benchmark.py           |  2 +-
 .../{ml_mmfb => ml}/svm_benchmark.py          |  2 +-
 .../{ml_mmfb => ml}/tabular_benchmark.py      |  2 +-
 .../{ml_mmfb => ml}/xgboost_benchmark.py      |  2 +-
 hpobench/benchmarks/ml_mmfb/__init__.py       | 14 +++----
 hpobench/container/benchmarks/ml/__init__.py  | 17 ++++++++
 .../{ml_mmfb => ml}/histgb_benchmark.py       |  0
 .../{ml_mmfb => ml}/lr_benchmark.py           |  0
 .../{ml_mmfb => ml}/nn_benchmark.py           |  0
 .../{ml_mmfb => ml}/rf_benchmark.py           |  0
 .../container/benchmarks/ml/svm_benchmark.py  | 34 +++++++++++----
 .../benchmarks/ml/svm_benchmark_old.py        | 15 +++++++
 .../{ml_mmfb => ml}/tabular_benchmark.py      |  0
 .../benchmarks/ml/xgboost_benchmark.py        | 41 +++++++++++++------
 .../benchmarks/ml/xgboost_benchmark_old.py    | 24 +++++++++++
 .../container/benchmarks/ml_mmfb/__init__.py  | 19 ---------
 .../benchmarks/ml_mmfb/svm_benchmark.py       | 33 ---------------
 .../benchmarks/ml_mmfb/xgboost_benchmark.py   | 41 -------------------
 .../{ml_mmfb => ml}/Singularity.ml_mmfb       |  2 +-
 .../Singularity.ml_tabular_benchmark          |  2 +-
 .../dependencies/{ml_mmfb => ml}/__init__.py  |  0
 .../{ml_mmfb => ml}/data_manager.py           |  0
 .../{ml_mmfb => ml}/ml_benchmark_template.py  |  2 +-
 28 files changed, 149 insertions(+), 130 deletions(-)
 rename hpobench/benchmarks/{ml_mmfb => ml}/README.md (100%)
 rename hpobench/benchmarks/{ml_mmfb => ml}/histgb_benchmark.py (98%)
 rename hpobench/benchmarks/{ml_mmfb => ml}/lr_benchmark.py (98%)
 rename hpobench/benchmarks/{ml_mmfb => ml}/nn_benchmark.py (98%)
 rename hpobench/benchmarks/{ml_mmfb => ml}/rf_benchmark.py (98%)
 rename hpobench/benchmarks/{ml_mmfb => ml}/svm_benchmark.py (97%)
 rename hpobench/benchmarks/{ml_mmfb => ml}/tabular_benchmark.py (99%)
 rename hpobench/benchmarks/{ml_mmfb => ml}/xgboost_benchmark.py (98%)
 rename hpobench/container/benchmarks/{ml_mmfb => ml}/histgb_benchmark.py (100%)
 rename hpobench/container/benchmarks/{ml_mmfb => ml}/lr_benchmark.py (100%)
 rename hpobench/container/benchmarks/{ml_mmfb => ml}/nn_benchmark.py (100%)
 rename hpobench/container/benchmarks/{ml_mmfb => ml}/rf_benchmark.py (100%)
 create mode 100644 hpobench/container/benchmarks/ml/svm_benchmark_old.py
 rename hpobench/container/benchmarks/{ml_mmfb => ml}/tabular_benchmark.py (100%)
 create mode 100644 hpobench/container/benchmarks/ml/xgboost_benchmark_old.py
 delete mode 100644 hpobench/container/benchmarks/ml_mmfb/__init__.py
 delete mode 100644 hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
 delete mode 100644 hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
 rename hpobench/container/recipes/{ml_mmfb => ml}/Singularity.ml_mmfb (95%)
 rename hpobench/container/recipes/{ml_mmfb => ml}/Singularity.ml_tabular_benchmark (96%)
 rename hpobench/dependencies/{ml_mmfb => ml}/__init__.py (100%)
 rename hpobench/dependencies/{ml_mmfb => ml}/data_manager.py (100%)
 rename hpobench/dependencies/{ml_mmfb => ml}/ml_benchmark_template.py (99%)

diff --git a/hpobench/benchmarks/ml_mmfb/README.md b/hpobench/benchmarks/ml/README.md
similarity index 100%
rename from hpobench/benchmarks/ml_mmfb/README.md
rename to hpobench/benchmarks/ml/README.md
diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index e69de29b..966d2ed8 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -0,0 +1,21 @@
+from hpobench.benchmarks.ml.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
+from hpobench.benchmarks.ml.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
+from hpobench.benchmarks.ml.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
+from hpobench.benchmarks.ml.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
+    RandomForestBenchmarkMF
+from hpobench.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
+from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
+from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
+from hpobench.benchmarks.ml.pybnn import BNNOnToyFunction, BNNOnProteinStructure, BNNOnProteinStructure, \
+    BNNOnYearPrediction
+
+
+__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
+           'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
+           'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
+           'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
+           'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
+           'TabularBenchmark',
+           'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF',
+           'BNNOnToyFunction', 'BNNOnProteinStructure', 'BNNOnProteinStructure', 'BNNOnYearPrediction'
+           ]
diff --git a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/benchmarks/ml/histgb_benchmark.py
similarity index 98%
rename from hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
rename to hpobench/benchmarks/ml/histgb_benchmark.py
index 4947d022..f08882e8 100644
--- a/hpobench/benchmarks/ml_mmfb/histgb_benchmark.py
+++ b/hpobench/benchmarks/ml/histgb_benchmark.py
@@ -7,7 +7,7 @@
 from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.ensemble import HistGradientBoostingClassifier
 
-from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
 class HistGBBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/benchmarks/ml/lr_benchmark.py
similarity index 98%
rename from hpobench/benchmarks/ml_mmfb/lr_benchmark.py
rename to hpobench/benchmarks/ml/lr_benchmark.py
index 32a21be9..e99170d0 100644
--- a/hpobench/benchmarks/ml_mmfb/lr_benchmark.py
+++ b/hpobench/benchmarks/ml/lr_benchmark.py
@@ -5,7 +5,7 @@
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.linear_model import SGDClassifier
 
-from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
 class LRBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py
similarity index 98%
rename from hpobench/benchmarks/ml_mmfb/nn_benchmark.py
rename to hpobench/benchmarks/ml/nn_benchmark.py
index 8179731c..5c3d54fb 100644
--- a/hpobench/benchmarks/ml_mmfb/nn_benchmark.py
+++ b/hpobench/benchmarks/ml/nn_benchmark.py
@@ -6,7 +6,7 @@
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.neural_network import MLPClassifier
 
-from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
 class NNBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py
similarity index 98%
rename from hpobench/benchmarks/ml_mmfb/rf_benchmark.py
rename to hpobench/benchmarks/ml/rf_benchmark.py
index 788ee64f..b6424a6e 100644
--- a/hpobench/benchmarks/ml_mmfb/rf_benchmark.py
+++ b/hpobench/benchmarks/ml/rf_benchmark.py
@@ -6,7 +6,7 @@
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.ensemble import RandomForestClassifier
 
-from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
 class RandomForestBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py
similarity index 97%
rename from hpobench/benchmarks/ml_mmfb/svm_benchmark.py
rename to hpobench/benchmarks/ml/svm_benchmark.py
index b0bd7f65..582a3342 100644
--- a/hpobench/benchmarks/ml_mmfb/svm_benchmark.py
+++ b/hpobench/benchmarks/ml/svm_benchmark.py
@@ -5,7 +5,7 @@
 from ConfigSpace.hyperparameters import Hyperparameter
 from sklearn.svm import SVC
 
-from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
 class SVMBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
similarity index 99%
rename from hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
rename to hpobench/benchmarks/ml/tabular_benchmark.py
index 50af2457..166f8841 100644
--- a/hpobench/benchmarks/ml_mmfb/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -6,7 +6,7 @@
 from ConfigSpace.read_and_write import json as json_cs
 
 from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.dependencies.ml_mmfb.ml_benchmark_template import metrics
+from hpobench.dependencies.ml.ml_benchmark_template import metrics
 from hpobench.util.data_manager import TabularDataManager
 
 
diff --git a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py
similarity index 98%
rename from hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
rename to hpobench/benchmarks/ml/xgboost_benchmark.py
index d975857e..4a2dd2f4 100644
--- a/hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ b/hpobench/benchmarks/ml/xgboost_benchmark.py
@@ -5,7 +5,7 @@
 import xgboost as xgb
 from ConfigSpace.hyperparameters import Hyperparameter
 
-from hpobench.dependencies.ml_mmfb.ml_benchmark_template import MLBenchmark
+from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark
 
 
 class XGBoostBenchmark(MLBenchmark):
diff --git a/hpobench/benchmarks/ml_mmfb/__init__.py b/hpobench/benchmarks/ml_mmfb/__init__.py
index 0d13c728..b826bd24 100644
--- a/hpobench/benchmarks/ml_mmfb/__init__.py
+++ b/hpobench/benchmarks/ml_mmfb/__init__.py
@@ -1,11 +1,11 @@
-from hpobench.benchmarks.ml_mmfb.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
+from hpobench.benchmarks.ml.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
+from hpobench.benchmarks.ml.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
+from hpobench.benchmarks.ml.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
+from hpobench.benchmarks.ml.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
     RandomForestBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
-from hpobench.benchmarks.ml_mmfb.tabular_benchmark import TabularBenchmark
-from hpobench.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
+from hpobench.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
+from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
+from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
 
 
 __all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
diff --git a/hpobench/container/benchmarks/ml/__init__.py b/hpobench/container/benchmarks/ml/__init__.py
index e69de29b..ed2ce40f 100644
--- a/hpobench/container/benchmarks/ml/__init__.py
+++ b/hpobench/container/benchmarks/ml/__init__.py
@@ -0,0 +1,17 @@
+from hpobench.container.benchmarks.ml.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
+from hpobench.container.benchmarks.ml.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
+from hpobench.container.benchmarks.ml.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
+from hpobench.container.benchmarks.ml.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
+    RandomForestBenchmarkMF
+from hpobench.container.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
+from hpobench.container.benchmarks.ml.tabular_benchmark import TabularBenchmark
+from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
+
+
+__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
+           'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
+           'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
+           'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
+           'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
+           'TabularBenchmark',
+           'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
diff --git a/hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py b/hpobench/container/benchmarks/ml/histgb_benchmark.py
similarity index 100%
rename from hpobench/container/benchmarks/ml_mmfb/histgb_benchmark.py
rename to hpobench/container/benchmarks/ml/histgb_benchmark.py
diff --git a/hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py b/hpobench/container/benchmarks/ml/lr_benchmark.py
similarity index 100%
rename from hpobench/container/benchmarks/ml_mmfb/lr_benchmark.py
rename to hpobench/container/benchmarks/ml/lr_benchmark.py
diff --git a/hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py b/hpobench/container/benchmarks/ml/nn_benchmark.py
similarity index 100%
rename from hpobench/container/benchmarks/ml_mmfb/nn_benchmark.py
rename to hpobench/container/benchmarks/ml/nn_benchmark.py
diff --git a/hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py b/hpobench/container/benchmarks/ml/rf_benchmark.py
similarity index 100%
rename from hpobench/container/benchmarks/ml_mmfb/rf_benchmark.py
rename to hpobench/container/benchmarks/ml/rf_benchmark.py
diff --git a/hpobench/container/benchmarks/ml/svm_benchmark.py b/hpobench/container/benchmarks/ml/svm_benchmark.py
index 4955f057..7547a81a 100644
--- a/hpobench/container/benchmarks/ml/svm_benchmark.py
+++ b/hpobench/container/benchmarks/ml/svm_benchmark.py
@@ -1,15 +1,33 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 
-""" Benchmark for the XGBoost Benchmark from hpobench/benchmarks/ml/xgboost_benchmark """
+""" Benchmark for the SVM Benchmarks from hpobench/benchmarks/ml_mmfb/svm_benchmark.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
 
 
-class SupportVectorMachine(AbstractBenchmarkClient):
-    def __init__(self, task_id: int, **kwargs):
-        kwargs['task_id'] = task_id
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SupportVectorMachine')
-        kwargs['container_name'] = kwargs.get('container_name', 'svm_benchmark')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
-        super(SupportVectorMachine, self).__init__(**kwargs)
+class SVMBenchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(SVMBenchmark, self).__init__(**kwargs)
+
+
+class SVMBenchmarkMF(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkMF')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(SVMBenchmarkMF, self).__init__(**kwargs)
+
+
+class SVMBenchmarkBB(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkBB')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(SVMBenchmarkBB, self).__init__(**kwargs)
+
+
+__all__ = ['SVMBenchmark', 'SVMBenchmarkMF', 'SVMBenchmarkBB']
diff --git a/hpobench/container/benchmarks/ml/svm_benchmark_old.py b/hpobench/container/benchmarks/ml/svm_benchmark_old.py
new file mode 100644
index 00000000..4955f057
--- /dev/null
+++ b/hpobench/container/benchmarks/ml/svm_benchmark_old.py
@@ -0,0 +1,15 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the XGBoost Benchmark from hpobench/benchmarks/ml/xgboost_benchmark """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class SupportVectorMachine(AbstractBenchmarkClient):
+    def __init__(self, task_id: int, **kwargs):
+        kwargs['task_id'] = task_id
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SupportVectorMachine')
+        kwargs['container_name'] = kwargs.get('container_name', 'svm_benchmark')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
+        super(SupportVectorMachine, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py b/hpobench/container/benchmarks/ml/tabular_benchmark.py
similarity index 100%
rename from hpobench/container/benchmarks/ml_mmfb/tabular_benchmark.py
rename to hpobench/container/benchmarks/ml/tabular_benchmark.py
diff --git a/hpobench/container/benchmarks/ml/xgboost_benchmark.py b/hpobench/container/benchmarks/ml/xgboost_benchmark.py
index df475748..c82ea606 100644
--- a/hpobench/container/benchmarks/ml/xgboost_benchmark.py
+++ b/hpobench/container/benchmarks/ml/xgboost_benchmark.py
@@ -1,24 +1,41 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 
-""" Benchmark for the XGBoost Benchmark from hpobench/benchmarks/ml/xgboost_benchmark """
+""" Benchmark for the XGB Benchmarks from hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py """
 
 from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
 
 
 class XGBoostBenchmark(AbstractBenchmarkClient):
-    def __init__(self, task_id: int, **kwargs):
-        kwargs['task_id'] = task_id
+    def __init__(self, **kwargs):
         kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'xgboost_benchmark')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
         super(XGBoostBenchmark, self).__init__(**kwargs)
 
 
-class XGBoostExtendedBenchmark(AbstractBenchmarkClient):
-    def __init__(self, task_id: int, **kwargs):
-        kwargs['task_id'] = task_id
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostExtendedBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'xgboost_benchmark')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
-        super(XGBoostExtendedBenchmark, self).__init__(**kwargs)
+class XGBoostBenchmarkBB(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkBB')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(XGBoostBenchmarkBB, self).__init__(**kwargs)
+
+
+class XGBoostBenchmarkMF(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkMF')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(XGBoostBenchmarkMF, self).__init__(**kwargs)
+
+
+class XGBoostSearchSpace3Benchmark(AbstractBenchmarkClient):
+    def __init__(self, **kwargs):
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace3Benchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
+        super(XGBoostSearchSpace3Benchmark, self).__init__(**kwargs)
+
+
+__all__ = ['XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
diff --git a/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py b/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py
new file mode 100644
index 00000000..df475748
--- /dev/null
+++ b/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" Benchmark for the XGBoost Benchmark from hpobench/benchmarks/ml/xgboost_benchmark """
+
+from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
+
+
+class XGBoostBenchmark(AbstractBenchmarkClient):
+    def __init__(self, task_id: int, **kwargs):
+        kwargs['task_id'] = task_id
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'xgboost_benchmark')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
+        super(XGBoostBenchmark, self).__init__(**kwargs)
+
+
+class XGBoostExtendedBenchmark(AbstractBenchmarkClient):
+    def __init__(self, task_id: int, **kwargs):
+        kwargs['task_id'] = task_id
+        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostExtendedBenchmark')
+        kwargs['container_name'] = kwargs.get('container_name', 'xgboost_benchmark')
+        kwargs['latest'] = kwargs.get('container_tag', '0.0.3')
+        super(XGBoostExtendedBenchmark, self).__init__(**kwargs)
diff --git a/hpobench/container/benchmarks/ml_mmfb/__init__.py b/hpobench/container/benchmarks/ml_mmfb/__init__.py
deleted file mode 100644
index 5f5cada5..00000000
--- a/hpobench/container/benchmarks/ml_mmfb/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from hpobench.container.benchmarks.ml_mmfb.histgb_benchmark import HistGBBenchmarkMF, HistGBBenchmarkBB, HistGBBenchmark
-from hpobench.container.benchmarks.ml_mmfb.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
-from hpobench.container.benchmarks.ml_mmfb.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
-from hpobench.container.benchmarks.ml_mmfb.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
-from hpobench.container.benchmarks.ml_mmfb.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
-    RandomForestBenchmarkMF
-from hpobench.container.benchmarks.ml_mmfb.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
-from hpobench.container.benchmarks.ml_mmfb.tabular_benchmark import TabularBenchmark
-from hpobench.container.benchmarks.ml_mmfb.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, \
-    XGBoostBenchmarkMF
-
-
-__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
-           'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
-           'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
-           'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
-           'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
-           'TabularBenchmark',
-           'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
diff --git a/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
deleted file mode 100644
index 7547a81a..00000000
--- a/hpobench/container/benchmarks/ml_mmfb/svm_benchmark.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-""" Benchmark for the SVM Benchmarks from hpobench/benchmarks/ml_mmfb/svm_benchmark.py """
-
-from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
-
-
-class SVMBenchmark(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(SVMBenchmark, self).__init__(**kwargs)
-
-
-class SVMBenchmarkMF(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkMF')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(SVMBenchmarkMF, self).__init__(**kwargs)
-
-
-class SVMBenchmarkBB(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkBB')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(SVMBenchmarkBB, self).__init__(**kwargs)
-
-
-__all__ = ['SVMBenchmark', 'SVMBenchmarkMF', 'SVMBenchmarkBB']
diff --git a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py b/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
deleted file mode 100644
index c82ea606..00000000
--- a/hpobench/container/benchmarks/ml_mmfb/xgboost_benchmark.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-""" Benchmark for the XGB Benchmarks from hpobench/benchmarks/ml_mmfb/xgboost_benchmark.py """
-
-from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient
-
-
-class XGBoostBenchmark(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(XGBoostBenchmark, self).__init__(**kwargs)
-
-
-class XGBoostBenchmarkBB(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkBB')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(XGBoostBenchmarkBB, self).__init__(**kwargs)
-
-
-class XGBoostBenchmarkMF(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkMF')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(XGBoostBenchmarkMF, self).__init__(**kwargs)
-
-
-class XGBoostSearchSpace3Benchmark(AbstractBenchmarkClient):
-    def __init__(self, **kwargs):
-        kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace3Benchmark')
-        kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb')
-        kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
-        super(XGBoostSearchSpace3Benchmark, self).__init__(**kwargs)
-
-
-__all__ = ['XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
diff --git a/hpobench/container/recipes/ml_mmfb/Singularity.ml_mmfb b/hpobench/container/recipes/ml/Singularity.ml_mmfb
similarity index 95%
rename from hpobench/container/recipes/ml_mmfb/Singularity.ml_mmfb
rename to hpobench/container/recipes/ml/Singularity.ml_mmfb
index 49f9a894..cd8b3e6e 100644
--- a/hpobench/container/recipes/ml_mmfb/Singularity.ml_mmfb
+++ b/hpobench/container/recipes/ml/Singularity.ml_mmfb
@@ -22,4 +22,4 @@ VERSION v0.0.1
 
 
 %runscript
-    python -s /home/HPOBench/hpobench/container/server_abstract_benchmark.py ml_mmfb $@
+    python -s /home/HPOBench/hpobench/container/server_abstract_benchmark.py ml $@
diff --git a/hpobench/container/recipes/ml_mmfb/Singularity.ml_tabular_benchmark b/hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark
similarity index 96%
rename from hpobench/container/recipes/ml_mmfb/Singularity.ml_tabular_benchmark
rename to hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark
index d128211a..16f92de8 100644
--- a/hpobench/container/recipes/ml_mmfb/Singularity.ml_tabular_benchmark
+++ b/hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark
@@ -22,4 +22,4 @@ VERSION v0.0.1
 
 
 %runscript
-    python -s /home/HPOBench/hpobench/container/server_abstract_benchmark.py ml_mmfb $@
+    python -s /home/HPOBench/hpobench/container/server_abstract_benchmark.py ml $@
diff --git a/hpobench/dependencies/ml_mmfb/__init__.py b/hpobench/dependencies/ml/__init__.py
similarity index 100%
rename from hpobench/dependencies/ml_mmfb/__init__.py
rename to hpobench/dependencies/ml/__init__.py
diff --git a/hpobench/dependencies/ml_mmfb/data_manager.py b/hpobench/dependencies/ml/data_manager.py
similarity index 100%
rename from hpobench/dependencies/ml_mmfb/data_manager.py
rename to hpobench/dependencies/ml/data_manager.py
diff --git a/hpobench/dependencies/ml_mmfb/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py
similarity index 99%
rename from hpobench/dependencies/ml_mmfb/ml_benchmark_template.py
rename to hpobench/dependencies/ml/ml_benchmark_template.py
index 3af13965..3c6fcdaf 100644
--- a/hpobench/dependencies/ml_mmfb/ml_benchmark_template.py
+++ b/hpobench/dependencies/ml/ml_benchmark_template.py
@@ -9,7 +9,7 @@
     precision_score, f1_score
 
 from hpobench.abstract_benchmark import AbstractBenchmark
-from hpobench.dependencies.ml_mmfb.data_manager import OpenMLDataManager
+from hpobench.dependencies.ml.data_manager import OpenMLDataManager
 from hpobench.util.rng_helper import get_rng
 
 metrics = dict(

From be641f8bbf81a8394263255e32de72daac17d62b Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Thu, 19 Aug 2021 14:55:45 +0200
Subject: [PATCH 78/95] PR: Remove pybnn from the init.

This would cause an error since importing stuff for the tab benchmarks would require the pybnn stuff.
---
 hpobench/benchmarks/ml/__init__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index 966d2ed8..b44482c4 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -6,8 +6,6 @@
 from hpobench.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
 from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
 from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
-from hpobench.benchmarks.ml.pybnn import BNNOnToyFunction, BNNOnProteinStructure, BNNOnProteinStructure, \
-    BNNOnYearPrediction
 
 
 __all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
@@ -17,5 +15,4 @@
            'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
            'TabularBenchmark',
            'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF',
-           'BNNOnToyFunction', 'BNNOnProteinStructure', 'BNNOnProteinStructure', 'BNNOnYearPrediction'
            ]

From 7bc25bcaa153d0e8a2b97de3f58ecc043140b891 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Thu, 19 Aug 2021 14:56:49 +0200
Subject: [PATCH 79/95] PR: Cleanup

---
 hpobench/benchmarks/ml_mmfb/__init__.py | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 hpobench/benchmarks/ml_mmfb/__init__.py

diff --git a/hpobench/benchmarks/ml_mmfb/__init__.py b/hpobench/benchmarks/ml_mmfb/__init__.py
deleted file mode 100644
index b826bd24..00000000
--- a/hpobench/benchmarks/ml_mmfb/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from hpobench.benchmarks.ml.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF
-from hpobench.benchmarks.ml.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF
-from hpobench.benchmarks.ml.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF
-from hpobench.benchmarks.ml.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \
-    RandomForestBenchmarkMF
-from hpobench.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
-from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
-from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
-
-
-__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
-           'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
-           'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
-           'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
-           'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
-           'TabularBenchmark',
-           'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']

From b0d9b7f4bf626c3d5f0fa70a2c0c0ef10247bd01 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Thu, 19 Aug 2021 15:24:19 +0200
Subject: [PATCH 80/95] PR: Fix Tests

---
 tests/test_check_configuration.py | 1 -
 tests/test_server.py              | 2 +-
 tests/test_svm.py                 | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_check_configuration.py b/tests/test_check_configuration.py
index 382e9810..8d3db58f 100644
--- a/tests/test_check_configuration.py
+++ b/tests/test_check_configuration.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 import pytest
-import ConfigSpace as CS
 
 from ConfigSpace import ConfigurationSpace, Configuration, \
     UniformFloatHyperparameter, UniformIntegerHyperparameter, \
diff --git a/tests/test_server.py b/tests/test_server.py
index d175c09a..d78cb0cc 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -24,7 +24,7 @@ def test_debug_container():
 
     set_log_level(True)
 
-    from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+    from hpobench.container.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
     from hpobench.util.openml_data_manager import get_openmlcc18_taskids
 
     task_id = get_openmlcc18_taskids()[0]
diff --git a/tests/test_svm.py b/tests/test_svm.py
index a7a31307..c3acf007 100644
--- a/tests/test_svm.py
+++ b/tests/test_svm.py
@@ -1,6 +1,6 @@
 import pytest
 
-from hpobench.container.benchmarks.ml.svm_benchmark import SupportVectorMachine
+from hpobench.container.benchmarks.ml.svm_benchmark_old import SupportVectorMachine
 from hpobench.util.openml_data_manager import get_openmlcc18_taskids
 
 task_ids = get_openmlcc18_taskids()

From 59bd905370b4cec599c9fbcf6df2a40e8c1c490b Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 19 Aug 2021 21:44:53 +0200
Subject: [PATCH 81/95] Adding public URLs for tabular benchmark

---
 hpobench/util/data_manager.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index d390218e..d5f7e48f 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -41,6 +41,14 @@
 import hpobench
 
 
+tabular_multi_fidelity_urls = dict(
+    xgb="https://figshare.com/articles/dataset/XGBoost/15155919",
+    svm="https://figshare.com/articles/dataset/SupportVectorMachine/15098280",
+    lr="https://figshare.com/articles/dataset/LogisticRegression/15098283",
+    rf="https://figshare.com/articles/dataset/RandomForest/15173517",
+    nn="https://figshare.com/articles/dataset/NeuralNetwork/15156915"
+)
+
 class DataManager(abc.ABC, metaclass=abc.ABCMeta):
     """ Base Class for loading and managing the data.
 
@@ -930,14 +938,12 @@ class TabularDataManager(DataManager):
     def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None] = None):
         super(TabularDataManager, self).__init__()
 
-        assert model in ['lr', 'svm', 'xgb'], f'Model has to be one of [lr, svm, xgb] but was {model}'
+        assert model in tabular_multi_fidelity_urls.keys(), f'Model has to be one of [lr, svm, xgb] but was {model}'
 
         self.model = model
         self.task_id = str(task_id)
 
-        url_dict = dict(xgb='https://ndownloader.figshare.com/files/29113257?private_link=c817bed4e7efc6daee91',
-                        svm='https://ndownloader.figshare.com/files/29102307?private_link=5a0929ad9b2ccd8dda58',
-                        lr='https://ndownloader.figshare.com/files/29027112?private_link=d644493a93dbab4b4ee1')
+        url_dict = tabular_multi_fidelity_urls
 
         self.url_to_use = url_dict.get(model)
 

From f576fb36b4c8eb40a02653d2c60bea8c6c06da26 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 19 Aug 2021 22:27:42 +0200
Subject: [PATCH 82/95] Adding more models

---
 hpobench/benchmarks/ml/tabular_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index 166f8841..20dcf312 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -17,7 +17,7 @@ def __init__(self,
                  data_dir: Union[Path, str, None] = None,
                  rng: Union[int, np.random.RandomState, None] = None, **kwargs):
 
-        assert model in ['lr', 'svm', 'xgb'], f'Parameter `model` has to be one of [lr, svm, xgb] but was {model}'
+        assert model in ['lr', 'svm', 'xgb', 'rf', 'nn'], f'Parameter `model` has to be one of [lr, svm, xgb] but was {model}'
 
         self.task_id = task_id
         self.model = model

From 63f517782f86c9379dacb1cd6031098f1db2cc7d Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Fri, 20 Aug 2021 19:26:53 +0200
Subject: [PATCH 83/95] Updating figshare URLs with new public ones

---
 hpobench/util/data_manager.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 603c2334..0ee6d0aa 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -42,11 +42,11 @@
 
 
 tabular_multi_fidelity_urls = dict(
-    xgb="https://figshare.com/articles/dataset/XGBoost/15155919",
-    svm="https://figshare.com/articles/dataset/SupportVectorMachine/15098280",
-    lr="https://figshare.com/articles/dataset/LogisticRegression/15098283",
-    rf="https://figshare.com/articles/dataset/RandomForest/15173517",
-    nn="https://figshare.com/articles/dataset/NeuralNetwork/15156915"
+    xgb="https://ndownloader.figshare.com/files/29469231",
+    svm="https://ndownloader.figshare.com/files/29471790",
+    lr="https://ndownloader.figshare.com/files/29470119",
+    rf="https://ndownloader.figshare.com/files/29466012",
+    nn="https://ndownloader.figshare.com/files/29467902"
 )
 
 class DataManager(abc.ABC, metaclass=abc.ABCMeta):
@@ -938,7 +938,8 @@ class TabularDataManager(DataManager):
     def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None] = None):
         super(TabularDataManager, self).__init__()
 
-        assert model in tabular_multi_fidelity_urls.keys(), f'Model has to be one of [lr, svm, xgb] but was {model}'
+        assert model in tabular_multi_fidelity_urls.keys(), \
+            f'Model has to be one of {list(tabular_multi_fidelity_urls.keys())} but was {model}'
 
         self.model = model
         self.task_id = str(task_id)

From 53358314583e454ca0b47ca980bf68365640c4be Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Fri, 20 Aug 2021 21:58:11 +0200
Subject: [PATCH 84/95] PR Fix URLs and dependencies

---
 extra_requirements/ml_mfbb.json             |  4 ++--
 hpobench/benchmarks/ml/__init__.py          |  6 +++++-
 hpobench/benchmarks/ml/tabular_benchmark.py |  4 ++--
 hpobench/util/data_manager.py               | 15 ++++++++++-----
 4 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/extra_requirements/ml_mfbb.json b/extra_requirements/ml_mfbb.json
index db67ecd0..87de2c63 100644
--- a/extra_requirements/ml_mfbb.json
+++ b/extra_requirements/ml_mfbb.json
@@ -1,4 +1,4 @@
 {
-  "ml_tabular_benchmarks": ["pandas==1.2.4","openml==0.12.2"],
-  "ml_mfbb": ["pandas==1.2.4","scikit-learn==0.24.2","openml==0.12.2","xgboost==1.3.1"]
+  "ml_tabular_benchmarks": ["tqdm","pandas==1.2.4","scikit-learn==0.24.2","openml==0.12.2","xgboost==1.3.1"],
+  "ml_mfbb": ["tqdm","pandas==1.2.4","scikit-learn==0.24.2","openml==0.12.2","xgboost==1.3.1"]
 }
\ No newline at end of file
diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py
index b44482c4..64e399cd 100644
--- a/hpobench/benchmarks/ml/__init__.py
+++ b/hpobench/benchmarks/ml/__init__.py
@@ -5,7 +5,11 @@
     RandomForestBenchmarkMF
 from hpobench.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
 from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
-from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
+
+try:
+    from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
+except ImportError:
+    pass
 
 
 __all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index 166f8841..fa5421b2 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -16,8 +16,8 @@ def __init__(self,
                  model: str, task_id: int,
                  data_dir: Union[Path, str, None] = None,
                  rng: Union[int, np.random.RandomState, None] = None, **kwargs):
-
-        assert model in ['lr', 'svm', 'xgb'], f'Parameter `model` has to be one of [lr, svm, xgb] but was {model}'
+        models = ['lr', 'svm', 'xgb', 'rf', 'nn']
+        assert model in models, f'Parameter `model` has to be one of {models} but was {model}'
 
         self.task_id = task_id
         self.model = model
diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 780bb06c..cf3200fd 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -930,15 +930,20 @@ class TabularDataManager(DataManager):
     def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None] = None):
         super(TabularDataManager, self).__init__()
 
-        assert model in ['lr', 'svm', 'xgb'], f'Model has to be one of [lr, svm, xgb] but was {model}'
+        url_dict = dict(
+            xgb="https://ndownloader.figshare.com/files/29469231",
+            svm="https://ndownloader.figshare.com/files/29471790",
+            lr="https://ndownloader.figshare.com/files/29470119",
+            rf="https://ndownloader.figshare.com/files/29466012",
+            nn="https://ndownloader.figshare.com/files/29467902"
+        )
+
+        assert model in url_dict.keys(), \
+            f'Model has to be one of {list(url_dict.keys())} but was {model}'
 
         self.model = model
         self.task_id = str(task_id)
 
-        url_dict = dict(xgb='https://ndownloader.figshare.com/files/29113257?private_link=c817bed4e7efc6daee91',
-                        svm='https://ndownloader.figshare.com/files/29102307?private_link=5a0929ad9b2ccd8dda58',
-                        lr='https://ndownloader.figshare.com/files/29027112?private_link=d644493a93dbab4b4ee1')
-
         self.url_to_use = url_dict.get(model)
 
         if data_dir is None:

From cf9b4ef61b24569b2e3ed76fd35800ead9f65232 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Sun, 22 Aug 2021 01:59:20 +0200
Subject: [PATCH 85/95] Updating URL for SVM data

---
 hpobench/util/data_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 0ee6d0aa..f36bc234 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -43,7 +43,7 @@
 
 tabular_multi_fidelity_urls = dict(
     xgb="https://ndownloader.figshare.com/files/29469231",
-    svm="https://ndownloader.figshare.com/files/29471790",
+    svm="https://ndownloader.figshare.com/files/30300531",
     lr="https://ndownloader.figshare.com/files/29470119",
     rf="https://ndownloader.figshare.com/files/29466012",
     nn="https://ndownloader.figshare.com/files/29467902"

From ed7d23ed76c44a45b71354a33710350d86097f6a Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Mon, 23 Aug 2021 02:08:35 +0200
Subject: [PATCH 86/95] Updating Tabular bench URLs

---
 hpobench/util/data_manager.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index f36bc234..eafa00ba 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -42,11 +42,11 @@
 
 
 tabular_multi_fidelity_urls = dict(
-    xgb="https://ndownloader.figshare.com/files/29469231",
-    svm="https://ndownloader.figshare.com/files/30300531",
-    lr="https://ndownloader.figshare.com/files/29470119",
-    rf="https://ndownloader.figshare.com/files/29466012",
-    nn="https://ndownloader.figshare.com/files/29467902"
+    xgb="https://ndownloader.figshare.com/files/30378972",
+    svm="https://ndownloader.figshare.com/files/30379359",
+    lr="https://ndownloader.figshare.com/files/30379038",
+    rf="https://ndownloader.figshare.com/files/30378930",
+    nn="https://ndownloader.figshare.com/files/30379005"
 )
 
 class DataManager(abc.ABC, metaclass=abc.ABCMeta):

From 9181bbb3608e7159edbfc16a28e1e18da2ae45a5 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Wed, 25 Aug 2021 15:47:35 +0200
Subject: [PATCH 87/95] PR Fix URLs and dependencies

---
 hpobench/util/data_manager.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index cf3200fd..9712ff8e 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -931,11 +931,11 @@ def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None]
         super(TabularDataManager, self).__init__()
 
         url_dict = dict(
-            xgb="https://ndownloader.figshare.com/files/29469231",
-            svm="https://ndownloader.figshare.com/files/29471790",
-            lr="https://ndownloader.figshare.com/files/29470119",
-            rf="https://ndownloader.figshare.com/files/29466012",
-            nn="https://ndownloader.figshare.com/files/29467902"
+            xgb="https://ndownloader.figshare.com/files/30378972",
+            svm="https://ndownloader.figshare.com/files/30379359",
+            lr="https://ndownloader.figshare.com/files/30379038",
+            rf="https://ndownloader.figshare.com/files/30378930",
+            nn="https://ndownloader.figshare.com/files/30379005"
         )
 
         assert model in url_dict.keys(), \

From 451ff08bd56e105ec8a633443d2314e895048494 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Wed, 25 Aug 2021 16:52:30 +0200
Subject: [PATCH 88/95] PR Fix URLs and dependencies

---
 ci_scripts/install.sh         | 2 +-
 extra_requirements/tests.json | 3 ++-
 tests/test_whitebox.py        | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index 9ed0f5b6..d2799899 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -4,7 +4,7 @@ install_packages=""
 
 if [[ "$RUN_TESTS" == "true" ]]; then
     echo "Install tools for testing"
-    install_packages="${install_packages}xgboost,pytest,test_paramnet,"
+    install_packages="${install_packages}xgboost,pytest,test_paramnet,test_tabular_datamanager"
     pip install codecov
 
     # The param net benchmark does not work with a scikit-learn version != 0.23.2. (See notes in the benchmark)
diff --git a/extra_requirements/tests.json b/extra_requirements/tests.json
index fff27ee1..0b8deb77 100644
--- a/extra_requirements/tests.json
+++ b/extra_requirements/tests.json
@@ -1,5 +1,6 @@
 {
   "codestyle": ["pycodestyle","flake8","pylint"],
   "pytest": ["pytest>=4.6","pytest-cov"],
-  "test_paramnet": ["tqdm", "scikit-learn==0.23.2"]
+  "test_paramnet": ["tqdm", "scikit-learn==0.23.2"],
+  "test_tabular_datamanager": ["pyarrow"]
 }
\ No newline at end of file
diff --git a/tests/test_whitebox.py b/tests/test_whitebox.py
index c3f5e0ff..bff5a77e 100644
--- a/tests/test_whitebox.py
+++ b/tests/test_whitebox.py
@@ -39,7 +39,7 @@ def test_whitebox_without_container_xgb():
 
 @pytest.mark.skipif(skip_container_test, reason="Requires singularity and flask")
 def test_whitebox_with_container():
-    from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+    from hpobench.container.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
     b = Benchmark(container_name='xgboost_benchmark',
                   task_id=167199,
                   rng=0)

From 310b11e5c938638d81dfd8ffea65d94405e27e19 Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Thu, 26 Aug 2021 14:28:55 +0200
Subject: [PATCH 89/95] Updating RF benchmark URL

---
 hpobench/util/data_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index eafa00ba..f08ffb90 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -45,7 +45,7 @@
     xgb="https://ndownloader.figshare.com/files/30378972",
     svm="https://ndownloader.figshare.com/files/30379359",
     lr="https://ndownloader.figshare.com/files/30379038",
-    rf="https://ndownloader.figshare.com/files/30378930",
+    rf="https://ndownloader.figshare.com/files/30469089",
     nn="https://ndownloader.figshare.com/files/30379005"
 )
 

From f01286b56d7066614c212dbff13ad7ce12bd4432 Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Thu, 26 Aug 2021 15:30:23 +0200
Subject: [PATCH 90/95] Updating XGB URL

---
 hpobench/util/data_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index f08ffb90..1a36025f 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -42,7 +42,7 @@
 
 
 tabular_multi_fidelity_urls = dict(
-    xgb="https://ndownloader.figshare.com/files/30378972",
+    xgb="https://ndownloader.figshare.com/files/30469920",
     svm="https://ndownloader.figshare.com/files/30379359",
     lr="https://ndownloader.figshare.com/files/30379038",
     rf="https://ndownloader.figshare.com/files/30469089",

From 12b72b1d4d7be78798dda4e4a07f7f5c83463841 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Fri, 27 Aug 2021 19:39:29 +0200
Subject: [PATCH 91/95] PR Fix tests

---
 ci_scripts/install.sh         | 2 +-
 extra_requirements/tests.json | 2 +-
 tests/test_whitebox.py        | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index d2799899..03f3f185 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -4,7 +4,7 @@ install_packages=""
 
 if [[ "$RUN_TESTS" == "true" ]]; then
     echo "Install tools for testing"
-    install_packages="${install_packages}xgboost,pytest,test_paramnet,test_tabular_datamanager"
+    install_packages="${install_packages}xgboost,pytest,test_paramnet,test_tabular_datamanager,"
     pip install codecov
 
     # The param net benchmark does not work with a scikit-learn version != 0.23.2. (See notes in the benchmark)
diff --git a/extra_requirements/tests.json b/extra_requirements/tests.json
index 0b8deb77..6c27be97 100644
--- a/extra_requirements/tests.json
+++ b/extra_requirements/tests.json
@@ -2,5 +2,5 @@
   "codestyle": ["pycodestyle","flake8","pylint"],
   "pytest": ["pytest>=4.6","pytest-cov"],
   "test_paramnet": ["tqdm", "scikit-learn==0.23.2"],
-  "test_tabular_datamanager": ["pyarrow"]
+  "test_tabular_datamanager": ["pyarrow", "fastparquet"]
 }
\ No newline at end of file
diff --git a/tests/test_whitebox.py b/tests/test_whitebox.py
index bff5a77e..35a9a940 100644
--- a/tests/test_whitebox.py
+++ b/tests/test_whitebox.py
@@ -32,8 +32,8 @@ def test_whitebox_without_container_xgb():
     result_dict = b.objective_function_test(configuration, fidelity=dict(n_estimators=n_estimator), rng=0)
     test_loss = result_dict['function_value']
 
-    assert np.isclose(train_loss, 0.0223, atol=0.001)
-    assert np.isclose(valid_loss, 0.4234, atol=0.001)
+    assert np.isclose(train_loss, 0.02678, atol=0.001)
+    assert np.isclose(valid_loss, 0.49549, atol=0.001)
     assert np.isclose(test_loss, 0.43636, atol=0.001)
 
 

From 41aa96bf657be97693d9f31290d586957b84c749 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Fri, 27 Aug 2021 19:40:49 +0200
Subject: [PATCH 92/95] New Urls

---
 hpobench/util/data_manager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py
index 9712ff8e..a2e33121 100644
--- a/hpobench/util/data_manager.py
+++ b/hpobench/util/data_manager.py
@@ -931,10 +931,10 @@ def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None]
         super(TabularDataManager, self).__init__()
 
         url_dict = dict(
-            xgb="https://ndownloader.figshare.com/files/30378972",
+            xgb="https://ndownloader.figshare.com/files/30469920",
             svm="https://ndownloader.figshare.com/files/30379359",
             lr="https://ndownloader.figshare.com/files/30379038",
-            rf="https://ndownloader.figshare.com/files/30378930",
+            rf="https://ndownloader.figshare.com/files/30469089",
             nn="https://ndownloader.figshare.com/files/30379005"
         )
 

From c23e3545c1931fa9bd17d0d16b81b6db15de0937 Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 30 Aug 2021 17:40:52 +0200
Subject: [PATCH 93/95] Trigger Rebuild.

---
 ci_scripts/install.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index 03f3f185..20652052 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -65,7 +65,6 @@ if [[ "$USE_SINGULARITY" == "true" ]]; then
       sudo make -C builddir install
 
     cd ..
-    install_packages="${install_packages}singularity,"
 else
     echo "Skip installing Singularity"
 fi

From 1fa684c3e0a86b46c176a44db7ab84427b3603db Mon Sep 17 00:00:00 2001
From: PhMueller <muller-phil@gmx.net>
Date: Mon, 30 Aug 2021 18:15:40 +0200
Subject: [PATCH 94/95] Fix Dataloader Assertion

---
 ci_scripts/install.sh      | 2 +-
 tests/test_data_manager.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index 20652052..097d00d0 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -63,7 +63,7 @@ if [[ "$USE_SINGULARITY" == "true" ]]; then
     ./mconfig && \
       make -C builddir && \
       sudo make -C builddir install
-
+      install_packages="${install_packages}placeholder,"
     cd ..
 else
     echo "Skip installing Singularity"
diff --git a/tests/test_data_manager.py b/tests/test_data_manager.py
index fd57b627..7e32ce84 100644
--- a/tests/test_data_manager.py
+++ b/tests/test_data_manager.py
@@ -108,7 +108,7 @@ def test_tabular_datamanager():
 
     table, meta_data = dm.load()
 
-    assert (hpobench.config_file.data_dir / "TabularData" / str(3) / f'lr_3_data.parquet.gzip').exists()
-    assert (hpobench.config_file.data_dir / "TabularData" / str(3) / f'lr_3_metadata.json').exists()
+    assert (hpobench.config_file.data_dir / "TabularData" / 'lr' / str(3) / f'lr_3_data.parquet.gzip').exists()
+    assert (hpobench.config_file.data_dir / "TabularData" / 'lr' / str(3) / f'lr_3_metadata.json').exists()
 
     table_2, meta_data_2 = dm.load()

From 6394521080fa51d7df952129972905398f6a26fe Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 7 Oct 2021 01:18:42 +0200
Subject: [PATCH 95/95] inference cost key fix

---
 hpobench/benchmarks/ml/tabular_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py
index 72e5fb31..c5525bf5 100644
--- a/hpobench/benchmarks/ml/tabular_benchmark.py
+++ b/hpobench/benchmarks/ml/tabular_benchmark.py
@@ -163,7 +163,7 @@ def _objective(
         metric_str = ', '.join(list(metrics.keys()))
         assert metric in list(metrics.keys()), f"metric not found among: {metric_str}"
         score_key = f"{evaluation}_scores"
-        cost_key = f"{evaluation}_scores"
+        cost_key = f"{evaluation}_costs"
 
         key_path = dict()
         for name in self.configuration_space.get_hyperparameter_names():