From 6db92718414e2b90d00db867bceb9d1edee8534d Mon Sep 17 00:00:00 2001 From: Richard Zhang Date: Mon, 26 Feb 2024 13:01:30 -0800 Subject: [PATCH] Add minimum positive reward to boost reward signal upon convergence. PiperOrigin-RevId: 610501089 --- .../_src/algorithms/ensemble/ensemble_designer.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/vizier/_src/algorithms/ensemble/ensemble_designer.py b/vizier/_src/algorithms/ensemble/ensemble_designer.py index c27418f3e..b481982a9 100644 --- a/vizier/_src/algorithms/ensemble/ensemble_designer.py +++ b/vizier/_src/algorithms/ensemble/ensemble_designer.py @@ -44,6 +44,10 @@ class ObjectiveRewardGenerator: default=0.0, validator=[attrs.validators.instance_of(float), attrs.validators.ge(0)], ) + min_positive_reward: float = attrs.field( + default=0.01, + validator=[attrs.validators.instance_of(float), attrs.validators.ge(0)], + ) # Arguments passed to the hypervolume converter. reference_value: Optional[np.ndarray] = attrs.field( default=None, @@ -97,9 +101,12 @@ def curve_generator() -> analyzers.StatefulCurveConverter: + self.reward_regularization * objective_curve.ys[:, idx + 1] ) if np.isfinite(regularized_reward): - rewards.append( - max(self.min_reward, float(np.squeeze(regularized_reward))) - ) + final_reward = float(np.squeeze(regularized_reward)) + # If final_reward is positive, set it to at least min_positive_reward. + if final_reward > 1e-8: + final_reward = max(final_reward, self.min_positive_reward) + final_reward = max(self.min_reward, final_reward) + rewards.append(final_reward) else: rewards.append(self.min_reward)