Skip to content

Commit 58d6801

Browse files
authored
Merge pull request #189 from automl/development
Version 1.3.2
2 parents e1f8f04 + edeabbb commit 58d6801

21 files changed

+1325
-61
lines changed

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# Version 1.3.2
2+
3+
## Features
4+
- Allow Importance and ablation path analysis for multi-objective runs.
5+
16
# Version 1.3.1
27

38
## Quality of Life

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# are usually completed in github actions.
33

44
SHELL := /bin/bash
5-
VERSION := 1.3.1
5+
VERSION := 1.3.2
66

77
NAME := DeepCAVE
88
PACKAGE_NAME := deepcave

deepcave/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"Source Code": "https://github.com/automl/deepcave",
4545
}
4646
copyright = f"Copyright {datetime.date.today().strftime('%Y')}, {author}"
47-
version = "1.3.1"
47+
version = "1.3.2"
4848

4949
_exec_file = sys.argv[0]
5050
_exec_files = ["server.py", "worker.py", "sphinx-build"]

deepcave/evaluators/ablation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def _ablation(
245245
max_hp_difference = -np.inf
246246

247247
for hp in hp_it:
248-
if hp in hp in incumbent_config.keys() and hp in self.default_config.keys():
248+
if hp in incumbent_config.keys() and hp in self.default_config.keys():
249249
config_copy = copy.copy(self.default_config)
250250
config_copy[hp] = incumbent_config[hp]
251251

deepcave/evaluators/mo_ablation.py

+350
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
# Copyright 2021-2024 The DeepCAVE Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# noqa: D400
16+
"""
17+
# Ablation Paths
18+
19+
This module evaluates the ablation paths.
20+
21+
Ablation Paths is a method to analyze the importance of hyperparameters in a configuration space.
22+
Starting from a default configuration, the default configuration is iteratively changed to the
23+
incumbent configuration by changing one hyperparameter at a time, choosing the
24+
hyperparameter that leads to the largest improvement in the objective function at each step.
25+
26+
## Classes:
27+
- Ablation: Provide an evaluator of the ablation paths.
28+
"""
29+
30+
from typing import Any, List, Optional, Tuple, Union
31+
32+
import copy
33+
34+
import numpy as np
35+
import pandas as pd
36+
37+
from deepcave.evaluators.ablation import Ablation
38+
from deepcave.evaluators.epm.random_forest_surrogate import RandomForestSurrogate
39+
from deepcave.runs import AbstractRun
40+
from deepcave.runs.objective import Objective
41+
from deepcave.utils.multi_objective_importance import get_weightings
42+
43+
44+
class MOAblation(Ablation):
45+
"""
46+
Provide an evaluator of the ablation paths.
47+
48+
Override: Multi-Objective case
49+
50+
Properties
51+
----------
52+
run : AbstractRun
53+
The run to analyze.
54+
cs : ConfigurationSpace
55+
The configuration space of the run.
56+
hp_names : List[str]
57+
A list of the hyperparameter names.
58+
performances : Optional[Dict[Any, Any]]
59+
A dictionary containing the performances for each HP.
60+
improvements : Optional[Dict[Any, Any]]
61+
A dictionary containing the improvements over the respective previous step for each HP.
62+
objectives : Optional[Union[Objective, List[Objective]]]
63+
The objective(s) of the run.
64+
default_config : Configurations
65+
The default configuration of this configuration space.
66+
Gets changed step by step towards the incumbent configuration.
67+
"""
68+
69+
def __init__(self, run: AbstractRun):
70+
super().__init__(run)
71+
self.models: List = []
72+
self.df_importances = pd.DataFrame([])
73+
74+
def get_importances(self) -> str:
75+
"""
76+
Return the importance scores.
77+
78+
Returns
79+
-------
80+
Dict
81+
Dictionary with Hyperparameter names and the corresponding importance scores and
82+
variances.
83+
84+
Raises
85+
------
86+
RuntimeError
87+
If the important scores are not calculated.
88+
"""
89+
if self.df_importances is None:
90+
raise RuntimeError("Importance scores must be calculated first.")
91+
92+
return self.df_importances.to_json()
93+
94+
def predict(self, cfg: list[Any], weighting: np.ndarray) -> Tuple[float, float]:
95+
"""
96+
Predict the performance of the input configuration.
97+
98+
The model results are weighted by the input weightings and summed.
99+
100+
Parameters
101+
----------
102+
cfg : Dict
103+
Configuration.
104+
weighting : List[float]
105+
Weightings.
106+
107+
Returns
108+
-------
109+
mean : float
110+
The mean of the weighted sum of predictions.
111+
var : float
112+
The variance of the weighted sum of predictions.
113+
"""
114+
mean, var = 0, 0
115+
for model, w in zip(self.models, weighting):
116+
pred, var_ = model.predict(np.array([cfg]))
117+
mean += w * pred[0]
118+
var += w * var_[0]
119+
return mean, var
120+
121+
def calculate(
122+
self,
123+
objectives: Optional[Union[Objective, List[Objective]]], # noqa
124+
budget: Optional[Union[int, float]] = None, # noqa
125+
n_trees: int = 50, # noqa
126+
seed: int = 0, # noqa
127+
) -> None:
128+
"""
129+
Calculate the MO ablation path performances and improvements.
130+
131+
Parameters
132+
----------
133+
objectives : Optional[Union[Objective, List[Objective]]]
134+
The objective(s) to be considered.
135+
budget : Optional[Union[int, float]]
136+
The budget to be considered. If None, all budgets of the run are considered.
137+
Default is None.
138+
n_trees : int
139+
The number of trees for the surrogate model.
140+
Default is 50.
141+
seed : int
142+
The seed for the surrogate model.
143+
Default is 0.
144+
"""
145+
assert isinstance(objectives, list)
146+
for objective in objectives:
147+
assert isinstance(objective, Objective)
148+
149+
df = self.run.get_encoded_data(objectives, budget, specific=True, include_config_ids=True)
150+
151+
# Obtain all configurations with theirs costs
152+
df = df.dropna(subset=[obj.name for obj in objectives])
153+
X = df[list(self.run.configspace.keys())].to_numpy()
154+
155+
# normalize objectives
156+
objectives_normed = list()
157+
for obj in objectives:
158+
normed = obj.name + "_normed"
159+
df[normed] = (df[obj.name] - df[obj.name].min()) / (
160+
df[obj.name].max() - df[obj.name].min()
161+
)
162+
163+
if obj.optimize == "upper":
164+
df[normed] = 1 - df[normed]
165+
objectives_normed.append(normed)
166+
167+
# train one model per objective
168+
Y = df[normed].to_numpy()
169+
model = RandomForestSurrogate(self.cs, seed=seed, n_trees=n_trees)
170+
model._fit(X, Y)
171+
self.models.append(model)
172+
173+
weightings = get_weightings(objectives_normed, df)
174+
175+
# calculate importance for each weighting generated from the pareto efficient points
176+
for w in weightings:
177+
df_res = self.calculate_ablation_path(df, objectives_normed, w, budget)
178+
if df_res is None:
179+
columns = ["hp_name", "importance", "variance", "new_performance", "weight"]
180+
self.df_importances = pd.DataFrame(
181+
0, index=np.arange(len(self.hp_names) + 1), columns=columns
182+
)
183+
self.df_importances["hp_name"] = ["Default"] + self.hp_names
184+
return
185+
df_res["weight"] = w[0]
186+
self.df_importances = pd.concat([self.df_importances, df_res])
187+
self.df_importances = self.df_importances.reset_index(drop=True)
188+
189+
def calculate_ablation_path(
190+
self,
191+
df: pd.DataFrame,
192+
objectives_normed: List[str],
193+
weighting: np.ndarray,
194+
budget: Optional[Union[int, float]],
195+
) -> pd.DataFrame:
196+
"""
197+
Calculate the ablation path performances.
198+
199+
Parameters
200+
----------
201+
df : pd.DataFrame
202+
Dataframe with encoded data.
203+
objectives_normed : List[str]
204+
The normed objective names to be considered.
205+
weighting : np.ndarray
206+
The weighting of the objective values.
207+
budget : Optional[Union[int, float]]
208+
The budget to be considered. If None, all budgets of the run are considered.
209+
Default is None.
210+
211+
Returns
212+
-------
213+
df : pd.DataFrame
214+
Dataframe with results of the ablation calculation.
215+
"""
216+
# Get the incumbent configuration
217+
incumbent_cfg_id = np.argmin(
218+
sum(df[obj] * w for obj, w in zip(objectives_normed, weighting))
219+
)
220+
incumbent_config = self.run.get_config(df.iloc[incumbent_cfg_id]["config_id"])
221+
222+
# Get the default configuration
223+
self.default_config = self.cs.get_default_configuration()
224+
default_encode = self.run.encode_config(self.default_config, specific=True)
225+
226+
# Obtain the predicted cost of the default and incumbent configuration
227+
def_cost, def_std = self.predict(default_encode, weighting)
228+
inc_cost, _ = self.predict(
229+
self.run.encode_config(incumbent_config, specific=True), weighting
230+
)
231+
232+
if inc_cost > def_cost:
233+
self.logger.warning(
234+
"The predicted incumbent objective is worse than the predicted default "
235+
f"objective for budget: {budget}. Aborting ablation path calculation."
236+
)
237+
return None
238+
else:
239+
# Copy the hps names as to not remove objects from the original list
240+
hp_it = self.hp_names.copy()
241+
df_abl = pd.DataFrame([])
242+
df_abl = pd.concat(
243+
[
244+
df_abl,
245+
pd.DataFrame(
246+
{
247+
"hp_name": "Default",
248+
"importance": 0,
249+
"variance": def_std,
250+
"new_performance": def_cost,
251+
},
252+
index=[0],
253+
),
254+
]
255+
)
256+
257+
for i in range(len(hp_it)):
258+
# Get the results of the current ablation iteration
259+
continue_ablation, max_hp, max_hp_cost, max_hp_std = self.ablation(
260+
budget, incumbent_config, def_cost, hp_it, weighting
261+
)
262+
263+
if not continue_ablation:
264+
break
265+
266+
diff = def_cost - max_hp_cost
267+
def_cost = max_hp_cost
268+
269+
df_abl = pd.concat(
270+
[
271+
df_abl,
272+
pd.DataFrame(
273+
{
274+
"hp_name": max_hp,
275+
"importance": diff,
276+
"variance": max_hp_std,
277+
"new_performance": max_hp_cost,
278+
},
279+
index=[i + 1],
280+
),
281+
]
282+
)
283+
284+
# Remove the current best hp for keeping the order right
285+
hp_it.remove(max_hp)
286+
return df_abl.reset_index(drop=True)
287+
288+
def ablation(
289+
self,
290+
budget: Optional[Union[int, float]],
291+
incumbent_config: Any,
292+
def_cost: Any,
293+
hp_it: List[str],
294+
weighting: np.ndarray[Any, Any],
295+
) -> Tuple[Any, Any, Any, Any]:
296+
"""
297+
Calculate the ablation importance for each hyperparameter.
298+
299+
Parameters
300+
----------
301+
budget: Optional[Union[int, float]]
302+
The budget of the run.
303+
incumbent_config: Any
304+
The incumbent configuration.
305+
def_cost: Any
306+
The default cost.
307+
hp_it: List[str]
308+
A list of the HPs that still have to be looked at.
309+
weighting : np.ndarray[Any, Any]
310+
The weighting of the objective values.
311+
312+
Returns
313+
-------
314+
Tuple[Any, Any, Any, Any]
315+
continue_ablation, max_hp, max_hp_performance, max_hp_std
316+
"""
317+
max_hp = ""
318+
max_hp_difference = 0
319+
320+
for hp in hp_it:
321+
if hp in incumbent_config.keys() and hp in self.default_config.keys():
322+
config_copy = copy.copy(self.default_config)
323+
config_copy[hp] = incumbent_config[hp]
324+
325+
new_cost, _ = self.predict(
326+
self.run.encode_config(config_copy, specific=True), weighting
327+
)
328+
difference = def_cost - new_cost
329+
330+
# Check for the maximum difference hyperparameter in this round
331+
if difference > max_hp_difference:
332+
max_hp = hp
333+
max_hp_difference = difference
334+
else:
335+
continue
336+
hp_count = len(list(self.cs.keys()))
337+
if max_hp != "":
338+
# For the maximum impact hyperparameter, switch the default with the incumbent value
339+
self.default_config[max_hp] = incumbent_config[max_hp]
340+
max_hp_cost, max_hp_std = self.predict(
341+
self.run.encode_config(self.default_config, specific=True), weighting
342+
)
343+
return True, max_hp, max_hp_cost, max_hp_std
344+
else:
345+
self.logger.info(
346+
f"End ablation at step {hp_count - len(hp_it) + 1}/{hp_count} "
347+
f"for budget {budget} (remaining hyperparameters not activate in incumbent or "
348+
"default configuration)."
349+
)
350+
return False, None, None, None

0 commit comments

Comments
 (0)