From 030564ec8969e09c62bf20e09513f1f6fef56d79 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Mon, 27 Jan 2025 11:33:26 -0800 Subject: [PATCH 01/16] [controller] Set rebalance preference to prioritize evenness when creating the controller cluster. Also set capacity keys to enable top-state even distribution --- .../venice/controller/ZkHelixAdminClient.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index d08c8c551a7..b1f667c72ea 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -7,6 +7,7 @@ import com.linkedin.venice.utils.RetryUtils; import io.tehuti.metrics.MetricsRepository; import java.time.Duration; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -99,6 +100,32 @@ public void createVeniceControllerCluster() { // choose proper instance to hold the replica. clusterConfig.setTopologyAwareEnabled(false); + // We want to prioritize evenness over less movement when it comes to resource assignment, because the cost + // of rebalancing for the controller is cheap as it is stateless. + Map globalRebalancePreference = new HashMap<>(); + globalRebalancePreference.put(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, 10); + globalRebalancePreference.put(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, 1); + // This should be turned off, so it doesn't overpower other constraint calculations + globalRebalancePreference.put(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE, 0); + clusterConfig.setGlobalRebalancePreference(globalRebalancePreference); + + String resourceCapacityKey = "cluster_resource_weight"; + List instanceCapacityKeys = new ArrayList<>(); + instanceCapacityKeys.add(resourceCapacityKey); + clusterConfig.setInstanceCapacityKeys(instanceCapacityKeys); + + // This is how much capacity a participant can take. The Helix documentation recommends setting this to a high + // value to avoid rebalance failures. The primary goal of setting this is to enable a constraint that takes the + // current top-state distribution into account when rebalancing. + Map defaultInstanceCapacityMap = new HashMap<>(); + defaultInstanceCapacityMap.put(resourceCapacityKey, 10000); + clusterConfig.setDefaultInstanceCapacityMap(defaultInstanceCapacityMap); + + // This is how much weight each resource in a cluster has + Map defaultPartitionWeightMap = new HashMap<>(); + defaultPartitionWeightMap.put(resourceCapacityKey, 100); + clusterConfig.setDefaultPartitionWeightMap(defaultPartitionWeightMap); + updateClusterConfigs(controllerClusterName, clusterConfig); helixAdmin.addStateModelDef(controllerClusterName, LeaderStandbySMD.name, LeaderStandbySMD.build()); From 146798efcaf4ae4e824b847a5efc2b48de2aad3b Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Tue, 28 Jan 2025 17:04:07 -0800 Subject: [PATCH 02/16] Move everything to config --- .../com/linkedin/venice/ConfigConstants.java | 2 + .../java/com/linkedin/venice/ConfigKeys.java | 33 ++++++++++++++++ .../VeniceControllerClusterConfig.java | 38 +++++++++++++++++++ .../VeniceControllerMultiClusterConfig.java | 20 ++++++++++ .../venice/controller/ZkHelixAdminClient.java | 22 +++++++---- 5 files changed, 108 insertions(+), 7 deletions(-) diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigConstants.java b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigConstants.java index 41f658db65e..5bce58bf47a 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigConstants.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigConstants.java @@ -20,6 +20,8 @@ public class ConfigConstants { public static final long DEFAULT_PUSH_STATUS_STORE_HEARTBEAT_EXPIRATION_TIME_IN_SECONDS = TimeUnit.MINUTES.toSeconds(10); + public static final String DEFAULT_HELIX_RESOURCE_CAPACITY_KEY = "cluster_resource_weight"; + /** * End of controller config default value */ diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java index 8aacd4e5daa..5686369872b 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java @@ -2398,4 +2398,37 @@ private ConfigKeys() { * Default: {@value com.linkedin.venice.meta.NameRepository#DEFAULT_MAXIMUM_ENTRY_COUNT} */ public static final String NAME_REPOSITORY_MAX_ENTRY_COUNT = "name.repository.max.entry.count"; + + /** + * Specifies the value to use for Helix's rebalance preference for evenness when using Waged. + * The default value is 1. + */ + public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS = + "controller.helix.rebalance.preference.evenness"; + + /** + * Specifies the value to use for Helix's rebalance preference for less movement when using Waged. + * The default value is 1. + */ + public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT = + "controller.helix.rebalance.preference.less.movement"; + + /** + * Indicates whether to enable force baseline convergence in Helix's rebalance preference when using Waged. + * Default is false. + */ + public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE = + "controller.helix.rebalance.preference.enable.force.baseline.converge"; + + /** + * Specifies the capacity a controller instance can handle, determined by + * {@link ConfigKeys#CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT}. Default is 10000. + */ + public static final String CONTROLLER_HELIX_INSTANCE_CAPACITY = "controller.helix.instance.capacity"; + + /** + * Specifies the weight of each Helix resource. The maximum weight per instance is determined by + * {@link ConfigKeys#CONTROLLER_HELIX_INSTANCE_CAPACITY}. Default is 100. + */ + public static final String CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT = "controller.helix.default.instance.capacity"; } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java index ca516b90fcd..a66762925dd 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java @@ -57,6 +57,11 @@ import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_INFO_PROCESSOR_PACKAGE; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_INFO_SOURCES; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_PROVIDER; +import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY; +import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE; +import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS; +import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT; +import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REST_CUSTOMIZED_HEALTH_URL; import static com.linkedin.venice.ConfigKeys.CONTROLLER_INSTANCE_TAG_LIST; import static com.linkedin.venice.ConfigKeys.CONTROLLER_JETTY_CONFIG_OVERRIDE_PREFIX; @@ -545,6 +550,12 @@ public class VeniceControllerClusterConfig { private Set pushJobUserErrorCheckpoints; private boolean isHybridStorePartitionCountUpdateEnabled; + private final int helixRebalancePreferenceEvenness; + private final int helixRebalancePreferenceLessMovement; + private final boolean helixRebalancePreferenceEnableForceBaselineConverge; + private final int helixInstanceCapacity; + private final int helixResourceCapacityWeight; + public VeniceControllerClusterConfig(VeniceProperties props) { this.props = props; this.clusterName = props.getString(CLUSTER_NAME); @@ -996,6 +1007,13 @@ public VeniceControllerClusterConfig(VeniceProperties props) { this.pushJobUserErrorCheckpoints = parsePushJobUserErrorCheckpoints(props); this.isHybridStorePartitionCountUpdateEnabled = props.getBoolean(ConfigKeys.CONTROLLER_ENABLE_HYBRID_STORE_PARTITION_COUNT_UPDATE, false); + + this.helixRebalancePreferenceEvenness = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, 1); + this.helixRebalancePreferenceLessMovement = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, 1); + this.helixRebalancePreferenceEnableForceBaselineConverge = + props.getBoolean(CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE, false); + this.helixInstanceCapacity = props.getInt(CONTROLLER_HELIX_INSTANCE_CAPACITY, 10000); + this.helixResourceCapacityWeight = props.getInt(CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, 100); } public VeniceProperties getProps() { @@ -1833,4 +1851,24 @@ static Set parsePushJobUserErrorCheckpoints(VeniceProperties public Set getPushJobUserErrorCheckpoints() { return pushJobUserErrorCheckpoints; } + + public int getHelixRebalancePreferenceEvenness() { + return helixRebalancePreferenceEvenness; + } + + public int getHelixRebalancePreferenceLessMovement() { + return helixRebalancePreferenceLessMovement; + } + + public boolean isHelixRebalancePreferenceForceBaselineConvergeEnabled() { + return helixRebalancePreferenceEnableForceBaselineConverge; + } + + public int getHelixInstanceCapacity() { + return helixInstanceCapacity; + } + + public int getHelixResourceCapacityWeight() { + return helixResourceCapacityWeight; + } } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerMultiClusterConfig.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerMultiClusterConfig.java index 0de71246bae..6ee6e99f7ae 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerMultiClusterConfig.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerMultiClusterConfig.java @@ -302,4 +302,24 @@ public long getServiceDiscoveryRegistrationRetryMS() { public List getControllerInstanceTagList() { return getCommonConfig().getControllerInstanceTagList(); } + + public int getHelixRebalancePreferenceEvenness() { + return getCommonConfig().getHelixRebalancePreferenceEvenness(); + } + + public int getHelixRebalancePreferenceLessMovement() { + return getCommonConfig().getHelixRebalancePreferenceLessMovement(); + } + + public boolean isHelixRebalancePreferenceForceBaselineConvergeEnabled() { + return getCommonConfig().isHelixRebalancePreferenceForceBaselineConvergeEnabled(); + } + + public int getHelixInstanceCapacity() { + return getCommonConfig().getHelixInstanceCapacity(); + } + + public int getHelixResourceCapacityWeight() { + return getCommonConfig().getHelixResourceCapacityWeight(); + } } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index b1f667c72ea..5d9b464850f 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -1,5 +1,7 @@ package com.linkedin.venice.controller; +import static com.linkedin.venice.ConfigConstants.DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; + import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.exceptions.VeniceRetriableException; import com.linkedin.venice.helix.ZkClientFactory; @@ -103,27 +105,33 @@ public void createVeniceControllerCluster() { // We want to prioritize evenness over less movement when it comes to resource assignment, because the cost // of rebalancing for the controller is cheap as it is stateless. Map globalRebalancePreference = new HashMap<>(); - globalRebalancePreference.put(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, 10); - globalRebalancePreference.put(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, 1); + globalRebalancePreference.put( + ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, + commonConfig.getHelixRebalancePreferenceEvenness()); + globalRebalancePreference.put( + ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, + commonConfig.getHelixRebalancePreferenceLessMovement()); // This should be turned off, so it doesn't overpower other constraint calculations - globalRebalancePreference.put(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE, 0); + int forceBaseLineConverge = commonConfig.isHelixRebalancePreferenceForceBaselineConvergeEnabled() ? 1 : 0; + globalRebalancePreference + .put(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE, forceBaseLineConverge); clusterConfig.setGlobalRebalancePreference(globalRebalancePreference); - String resourceCapacityKey = "cluster_resource_weight"; List instanceCapacityKeys = new ArrayList<>(); - instanceCapacityKeys.add(resourceCapacityKey); + instanceCapacityKeys.add(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); clusterConfig.setInstanceCapacityKeys(instanceCapacityKeys); // This is how much capacity a participant can take. The Helix documentation recommends setting this to a high // value to avoid rebalance failures. The primary goal of setting this is to enable a constraint that takes the // current top-state distribution into account when rebalancing. Map defaultInstanceCapacityMap = new HashMap<>(); - defaultInstanceCapacityMap.put(resourceCapacityKey, 10000); + defaultInstanceCapacityMap.put(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixInstanceCapacity()); clusterConfig.setDefaultInstanceCapacityMap(defaultInstanceCapacityMap); // This is how much weight each resource in a cluster has Map defaultPartitionWeightMap = new HashMap<>(); - defaultPartitionWeightMap.put(resourceCapacityKey, 100); + defaultPartitionWeightMap + .put(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixResourceCapacityWeight()); clusterConfig.setDefaultPartitionWeightMap(defaultPartitionWeightMap); updateClusterConfigs(controllerClusterName, clusterConfig); From ad05e91bbf80e28b4a27cbebf38266fa7029ee24 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Tue, 28 Jan 2025 21:56:07 -0800 Subject: [PATCH 03/16] Add unit and integration tests and fix bug where config wasn't being updated properly --- .../venice/controller/TestHAASController.java | 59 +++++++++++++++++++ .../venice/controller/VeniceHelixAdmin.java | 4 ++ .../venice/controller/ZkHelixAdminClient.java | 16 ++--- .../controller/TestZkHelixAdminClient.java | 47 +++++++++++++++ 4 files changed, 116 insertions(+), 10 deletions(-) diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java index 47e53e776e2..8f2569cfbec 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java @@ -1,5 +1,6 @@ package com.linkedin.venice.controller; +import static com.linkedin.venice.ConfigConstants.DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; import static com.linkedin.venice.utils.TestUtils.assertCommand; import static com.linkedin.venice.utils.TestUtils.shutdownExecutor; import static com.linkedin.venice.utils.TestUtils.waitForNonDeterministicAssertion; @@ -16,6 +17,8 @@ import com.linkedin.venice.ConfigKeys; import com.linkedin.venice.controllerapi.JobStatusQueryResponse; import com.linkedin.venice.controllerapi.NewStoreResponse; +import com.linkedin.venice.helix.SafeHelixDataAccessor; +import com.linkedin.venice.helix.SafeHelixManager; import com.linkedin.venice.integration.utils.HelixAsAServiceWrapper; import com.linkedin.venice.integration.utils.ServiceFactory; import com.linkedin.venice.integration.utils.VeniceClusterWrapper; @@ -31,6 +34,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; @@ -40,6 +44,7 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.helix.HelixAdmin; +import org.apache.helix.PropertyKey; import org.apache.helix.cloud.constants.CloudProvider; import org.apache.helix.constants.InstanceConstants; import org.apache.helix.manager.zk.ZKHelixManager; @@ -255,6 +260,60 @@ public void testTransitionToHAASControllerAsStorageClusterLeader() { } } + @Test(timeOut = 60 * Time.MS_PER_SECOND) + public void testRebalancePreferenceAndCapacityKeys() { + try (VeniceClusterWrapper venice = ServiceFactory.getVeniceCluster(0, 0, 0, 1); + HelixAsAServiceWrapper helixAsAServiceWrapper = startAndWaitForHAASToBeAvailable(venice.getZk().getAddress())) { + String controllerClusterName = "venice-controllers"; + + int helixRebalancePreferenceEvenness = 10; + int helixRebalancePreferenceLessMovement = 2; + boolean helixRebalancePreferenceEnableForceBaselineConverge = true; + int helixInstanceCapacity = 1000; + int helixResourceCapacityWeight = 10; + + Properties clusterProperties = (Properties) enableControllerAndStorageClusterHAASProperties.clone(); + clusterProperties + .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); + clusterProperties + .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); + clusterProperties.put( + ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE, + helixRebalancePreferenceEnableForceBaselineConverge); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); + + VeniceControllerWrapper controllerWrapper = venice.addVeniceController(clusterProperties); + + VeniceHelixAdmin veniceHelixAdmin = controllerWrapper.getVeniceHelixAdmin(); + + SafeHelixManager helixManager = veniceHelixAdmin.getHelixManager(); + SafeHelixDataAccessor helixDataAccessor = helixManager.getHelixDataAccessor(); + PropertyKey.Builder propertyKeyBuilder = new PropertyKey.Builder(controllerClusterName); + ClusterConfig clusterConfig = helixDataAccessor.getProperty(propertyKeyBuilder.clusterConfig()); + + Map globalRebalancePreference = + clusterConfig.getGlobalRebalancePreference(); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), + helixRebalancePreferenceEvenness); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), + helixRebalancePreferenceLessMovement); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), + 1); + + Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); + assertEquals((int) defaultInstanceCapacityMap.get(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), helixInstanceCapacity); + + Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); + assertEquals( + (int) defaultPartitionWeightMap.get(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + helixResourceCapacityWeight); + } + } + private static class InitTask implements Callable { private final HelixAdminClient client; private final HashMap helixClusterProperties; diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java index 2e401fbb228..5e185fc357b 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java @@ -8777,4 +8777,8 @@ public void setPushJobDetailsStoreClient(AvroSpecificStoreClient helixClusterProperties = new HashMap<>(clusterConfig.getRecord().getSimpleFields()); - helixAdmin.setConfig(configScope, helixClusterProperties); + helixConfigAccessor.setClusterConfig(clusterName, clusterConfig); } /** @@ -261,10 +260,7 @@ public void updateClusterConfigs(String clusterName, ClusterConfig clusterConfig */ @Override public void updateRESTConfigs(String clusterName, RESTConfig restConfig) { - HelixConfigScope configScope = - new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.REST).forCluster(clusterName).build(); - Map helixRestProperties = new HashMap<>(restConfig.getRecord().getSimpleFields()); - helixAdmin.setConfig(configScope, helixRestProperties); + helixConfigAccessor.setRESTConfig(clusterName, restConfig); } /** diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index 66d7c5dedfd..0a908870ba8 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -1,5 +1,6 @@ package com.linkedin.venice.controller; +import static com.linkedin.venice.ConfigConstants.DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyString; @@ -221,4 +222,50 @@ public void testUpdateRESTConfigs() { zkHelixAdminClient.updateRESTConfigs(clusterName, restConfig); } + + @Test + public void testRebalancePreferenceAndCapacityKeys() { + String clusterName = "test-cluster"; + VeniceControllerClusterConfig mockClusterConfig = mock(VeniceControllerClusterConfig.class); + int helixInstanceCapacity = 10000; + int helixResourceCapacityWeight = 100; + + when(mockClusterConfig.getControllerResourceInstanceGroupTag()).thenReturn("GENERAL"); + when(mockMultiClusterConfigs.getControllerConfig(clusterName)).thenReturn(mockClusterConfig); + + when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); + when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); + when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(1); + when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(1); + when(mockCommonConfig.isHelixRebalancePreferenceForceBaselineConvergeEnabled()).thenReturn(false); + when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); + when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); + when(mockCommonConfig.isControllerClusterHelixCloudEnabled()).thenReturn(false); + + doAnswer(invocation -> { + String controllerClusterName = invocation.getArgument(0); + ClusterConfig clusterConfig = invocation.getArgument(1); + + assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); + Map globalRebalancePreference = + clusterConfig.getGlobalRebalancePreference(); + assertEquals((int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), 1); + assertEquals((int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), 1); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), + 0); + + Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); + assertEquals((int) defaultInstanceCapacityMap.get(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), helixInstanceCapacity); + + Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); + assertEquals( + (int) defaultPartitionWeightMap.get(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + helixResourceCapacityWeight); + return null; + }).when(zkHelixAdminClient).updateClusterConfigs(any(), any()); + + doCallRealMethod().when(zkHelixAdminClient).createVeniceControllerCluster(); + zkHelixAdminClient.createVeniceControllerCluster(); + } } From ff56aec3d321e8eb0fc4decf8f644481d22dddbe Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Wed, 29 Jan 2025 09:32:21 -0800 Subject: [PATCH 04/16] Fix fix unit tests --- .../controller/TestZkHelixAdminClient.java | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index 0a908870ba8..da70bc273f8 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -21,11 +21,11 @@ import java.security.AccessController; import java.security.PrivilegedAction; import java.util.Map; +import org.apache.helix.ConfigAccessor; import org.apache.helix.HelixAdmin; import org.apache.helix.manager.zk.ZKHelixManager; import org.apache.helix.model.CloudConfig; import org.apache.helix.model.ClusterConfig; -import org.apache.helix.model.HelixConfigScope; import org.apache.helix.model.IdealState; import org.apache.helix.model.RESTConfig; import org.testng.annotations.BeforeMethod; @@ -35,6 +35,7 @@ public class TestZkHelixAdminClient { private ZkHelixAdminClient zkHelixAdminClient; private HelixAdmin mockHelixAdmin; + private ConfigAccessor mockHelixConfigAccessor; private VeniceControllerMultiClusterConfig mockMultiClusterConfigs; private VeniceControllerClusterConfig mockCommonConfig; private static final String VENICE_CONTROLLER_CLUSTER = "venice-controller-cluster"; @@ -43,6 +44,7 @@ public class TestZkHelixAdminClient { public void setUp() throws NoSuchFieldException, IllegalAccessException { zkHelixAdminClient = mock(ZkHelixAdminClient.class); mockHelixAdmin = mock(HelixAdmin.class); + mockHelixConfigAccessor = mock(ConfigAccessor.class); mockMultiClusterConfigs = mock(VeniceControllerMultiClusterConfig.class); mockCommonConfig = mock(VeniceControllerClusterConfig.class); @@ -52,6 +54,10 @@ public void setUp() throws NoSuchFieldException, IllegalAccessException { helixAdminField.setAccessible(true); helixAdminField.set(zkHelixAdminClient, mockHelixAdmin); + Field helixConfigAccessorField = ZkHelixAdminClient.class.getDeclaredField("helixConfigAccessor"); + helixConfigAccessorField.setAccessible(true); + helixConfigAccessorField.set(zkHelixAdminClient, mockHelixConfigAccessor); + Field multiClusterConfigsField = ZkHelixAdminClient.class.getDeclaredField("multiClusterConfigs"); multiClusterConfigsField.setAccessible(true); multiClusterConfigsField.set(zkHelixAdminClient, mockMultiClusterConfigs); @@ -171,27 +177,29 @@ public void testUpdateClusterConfigs() { clusterConfig.setFaultZoneType(HelixUtils.TOPOLOGY_CONSTRAINT); doAnswer(invocation -> { - HelixConfigScope scope = invocation.getArgument(0); - Map clusterProps = invocation.getArgument(1); - - assertEquals(scope.getType(), HelixConfigScope.ConfigScopeProperty.CLUSTER); - assertEquals(scope.getClusterName(), clusterName); - assertEquals(clusterProps.size(), 6); - assertEquals(clusterProps.get(ZKHelixManager.ALLOW_PARTICIPANT_AUTO_JOIN), "true"); - assertEquals(clusterProps.get(ClusterConfig.ClusterConfigProperty.DELAY_REBALANCE_ENABLED.name()), "true"); - assertEquals(clusterProps.get(ClusterConfig.ClusterConfigProperty.DELAY_REBALANCE_TIME.name()), "1000"); + String clusterNameProp = invocation.getArgument(0); + ClusterConfig clusterProps = invocation.getArgument(1); + + assertEquals(clusterNameProp, clusterName); + assertEquals(clusterProps.getClusterName(), clusterName); + Map simpleFields = clusterProps.getRecord().getSimpleFields(); + + assertEquals(simpleFields.size(), 6); + assertEquals(simpleFields.get(ZKHelixManager.ALLOW_PARTICIPANT_AUTO_JOIN), "true"); + assertEquals(simpleFields.get(ClusterConfig.ClusterConfigProperty.DELAY_REBALANCE_ENABLED.name()), "true"); + assertEquals(simpleFields.get(ClusterConfig.ClusterConfigProperty.DELAY_REBALANCE_TIME.name()), "1000"); assertEquals( - clusterProps.get(ClusterConfig.ClusterConfigProperty.PERSIST_BEST_POSSIBLE_ASSIGNMENT.name()), + simpleFields.get(ClusterConfig.ClusterConfigProperty.PERSIST_BEST_POSSIBLE_ASSIGNMENT.name()), "true"); assertEquals( - clusterProps.get(ClusterConfig.ClusterConfigProperty.TOPOLOGY.name()), + simpleFields.get(ClusterConfig.ClusterConfigProperty.TOPOLOGY.name()), "/" + HelixUtils.TOPOLOGY_CONSTRAINT); assertEquals( - clusterProps.get(ClusterConfig.ClusterConfigProperty.FAULT_ZONE_TYPE.name()), + simpleFields.get(ClusterConfig.ClusterConfigProperty.FAULT_ZONE_TYPE.name()), HelixUtils.TOPOLOGY_CONSTRAINT); return null; - }).when(mockHelixAdmin).setConfig(any(), any()); + }).when(mockHelixConfigAccessor).setClusterConfig(any(), any()); zkHelixAdminClient.updateClusterConfigs(clusterName, clusterConfig); } @@ -208,17 +216,18 @@ public void testUpdateRESTConfigs() { restConfig.getRecord().setSimpleField("FIELD1", "VALUE1"); doAnswer(invocation -> { - HelixConfigScope scope = invocation.getArgument(0); - Map restProps = invocation.getArgument(1); + String clusterNameProp = invocation.getArgument(0); + ClusterConfig restProps = invocation.getArgument(1); + Map simpleFields = restProps.getRecord().getSimpleFields(); - assertEquals(scope.getType(), HelixConfigScope.ConfigScopeProperty.REST); - assertEquals(scope.getClusterName(), clusterName); - assertEquals(restProps.size(), 2); - assertEquals(restProps.get(RESTConfig.SimpleFields.CUSTOMIZED_HEALTH_URL.name()), restUrl); - assertEquals(restProps.get("FIELD1"), "VALUE1"); + assertEquals(clusterNameProp, clusterName); + assertEquals(restProps.getClusterName(), clusterName); + assertEquals(simpleFields.size(), 2); + assertEquals(simpleFields.get(RESTConfig.SimpleFields.CUSTOMIZED_HEALTH_URL.name()), restUrl); + assertEquals(simpleFields.get("FIELD1"), "VALUE1"); return null; - }).when(mockHelixAdmin).setConfig(any(), any()); + }).when(mockHelixConfigAccessor).setRESTConfig(any(), any()); zkHelixAdminClient.updateRESTConfigs(clusterName, restConfig); } From 22fa515cfb08eb223190cd89aca368df2b38b313 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Wed, 29 Jan 2025 09:39:54 -0800 Subject: [PATCH 05/16] Fix fix unit tests --- .../com/linkedin/venice/controller/TestZkHelixAdminClient.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index da70bc273f8..ce652998963 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -217,11 +217,10 @@ public void testUpdateRESTConfigs() { doAnswer(invocation -> { String clusterNameProp = invocation.getArgument(0); - ClusterConfig restProps = invocation.getArgument(1); + RESTConfig restProps = invocation.getArgument(1); Map simpleFields = restProps.getRecord().getSimpleFields(); assertEquals(clusterNameProp, clusterName); - assertEquals(restProps.getClusterName(), clusterName); assertEquals(simpleFields.size(), 2); assertEquals(simpleFields.get(RESTConfig.SimpleFields.CUSTOMIZED_HEALTH_URL.name()), restUrl); assertEquals(simpleFields.get("FIELD1"), "VALUE1"); From ac644df3a3678b90afc64860f9301130c84f3183 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Wed, 29 Jan 2025 15:49:34 -0800 Subject: [PATCH 06/16] Address review comments --- .../com/linkedin/venice/ConfigConstants.java | 2 +- .../java/com/linkedin/venice/ConfigKeys.java | 4 +- .../venice/controller/TestHAASController.java | 8 ++- .../VeniceControllerClusterConfig.java | 4 +- .../venice/controller/VeniceHelixAdmin.java | 2 +- .../venice/controller/ZkHelixAdminClient.java | 32 +++++---- .../controller/TestZkHelixAdminClient.java | 72 +++++++++++++++++-- 7 files changed, 94 insertions(+), 30 deletions(-) diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigConstants.java b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigConstants.java index 5bce58bf47a..0f076f015ec 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigConstants.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigConstants.java @@ -20,7 +20,7 @@ public class ConfigConstants { public static final long DEFAULT_PUSH_STATUS_STORE_HEARTBEAT_EXPIRATION_TIME_IN_SECONDS = TimeUnit.MINUTES.toSeconds(10); - public static final String DEFAULT_HELIX_RESOURCE_CAPACITY_KEY = "cluster_resource_weight"; + public static final String CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY = "cluster_resource_weight"; /** * End of controller config default value diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java index 5686369872b..a6b2ab45d3c 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java @@ -2422,13 +2422,13 @@ private ConfigKeys() { /** * Specifies the capacity a controller instance can handle, determined by - * {@link ConfigKeys#CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT}. Default is 10000. + * {@link ConfigKeys#CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT}. Default is -1 which indicates disabled. */ public static final String CONTROLLER_HELIX_INSTANCE_CAPACITY = "controller.helix.instance.capacity"; /** * Specifies the weight of each Helix resource. The maximum weight per instance is determined by - * {@link ConfigKeys#CONTROLLER_HELIX_INSTANCE_CAPACITY}. Default is 100. + * {@link ConfigKeys#CONTROLLER_HELIX_INSTANCE_CAPACITY}. Default is -1 which indicates disabled. */ public static final String CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT = "controller.helix.default.instance.capacity"; } diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java index 8f2569cfbec..cc09fb13b48 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java @@ -1,6 +1,6 @@ package com.linkedin.venice.controller; -import static com.linkedin.venice.ConfigConstants.DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; +import static com.linkedin.venice.ConfigConstants.CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; import static com.linkedin.venice.utils.TestUtils.assertCommand; import static com.linkedin.venice.utils.TestUtils.shutdownExecutor; import static com.linkedin.venice.utils.TestUtils.waitForNonDeterministicAssertion; @@ -305,11 +305,13 @@ public void testRebalancePreferenceAndCapacityKeys() { 1); Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); - assertEquals((int) defaultInstanceCapacityMap.get(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), helixInstanceCapacity); + assertEquals( + (int) defaultInstanceCapacityMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + helixInstanceCapacity); Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); assertEquals( - (int) defaultPartitionWeightMap.get(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + (int) defaultPartitionWeightMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), helixResourceCapacityWeight); } } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java index a66762925dd..8591ad2931f 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java @@ -1012,8 +1012,8 @@ public VeniceControllerClusterConfig(VeniceProperties props) { this.helixRebalancePreferenceLessMovement = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, 1); this.helixRebalancePreferenceEnableForceBaselineConverge = props.getBoolean(CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE, false); - this.helixInstanceCapacity = props.getInt(CONTROLLER_HELIX_INSTANCE_CAPACITY, 10000); - this.helixResourceCapacityWeight = props.getInt(CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, 100); + this.helixInstanceCapacity = props.getInt(CONTROLLER_HELIX_INSTANCE_CAPACITY, -1); + this.helixResourceCapacityWeight = props.getInt(CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, -1); } public VeniceProperties getProps() { diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java index 5e185fc357b..9999011f53b 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java @@ -8778,7 +8778,7 @@ public PubSubTopicRepository getPubSubTopicRepository() { return pubSubTopicRepository; } - public SafeHelixManager getHelixManager() { + protected SafeHelixManager getHelixManager() { return helixManager; } } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index 0186be380c5..392b63aa449 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -1,6 +1,6 @@ package com.linkedin.venice.controller; -import static com.linkedin.venice.ConfigConstants.DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; +import static com.linkedin.venice.ConfigConstants.CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.exceptions.VeniceRetriableException; @@ -120,21 +120,25 @@ public void createVeniceControllerCluster() { clusterConfig.setGlobalRebalancePreference(globalRebalancePreference); List instanceCapacityKeys = new ArrayList<>(); - instanceCapacityKeys.add(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); + instanceCapacityKeys.add(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); clusterConfig.setInstanceCapacityKeys(instanceCapacityKeys); - // This is how much capacity a participant can take. The Helix documentation recommends setting this to a high - // value to avoid rebalance failures. The primary goal of setting this is to enable a constraint that takes the - // current top-state distribution into account when rebalancing. - Map defaultInstanceCapacityMap = new HashMap<>(); - defaultInstanceCapacityMap.put(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixInstanceCapacity()); - clusterConfig.setDefaultInstanceCapacityMap(defaultInstanceCapacityMap); - - // This is how much weight each resource in a cluster has - Map defaultPartitionWeightMap = new HashMap<>(); - defaultPartitionWeightMap - .put(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixResourceCapacityWeight()); - clusterConfig.setDefaultPartitionWeightMap(defaultPartitionWeightMap); + if (commonConfig.getHelixInstanceCapacity() > 0 && commonConfig.getHelixResourceCapacityWeight() > 0) { + // This is how much capacity a participant can take. The Helix documentation recommends setting this to a high + // value to avoid rebalance failures. The primary goal of setting this is to enable a constraint that takes + // the + // current top-state distribution into account when rebalancing. + Map defaultInstanceCapacityMap = new HashMap<>(); + defaultInstanceCapacityMap + .put(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixInstanceCapacity()); + clusterConfig.setDefaultInstanceCapacityMap(defaultInstanceCapacityMap); + + // This is how much weight each resource in a cluster has + Map defaultPartitionWeightMap = new HashMap<>(); + defaultPartitionWeightMap + .put(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixResourceCapacityWeight()); + clusterConfig.setDefaultPartitionWeightMap(defaultPartitionWeightMap); + } updateClusterConfigs(controllerClusterName, clusterConfig); helixAdmin.addStateModelDef(controllerClusterName, LeaderStandbySMD.name, LeaderStandbySMD.build()); diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index ce652998963..b1b4ab86790 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -1,6 +1,6 @@ package com.linkedin.venice.controller; -import static com.linkedin.venice.ConfigConstants.DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; +import static com.linkedin.venice.ConfigConstants.CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyString; @@ -235,6 +235,8 @@ public void testUpdateRESTConfigs() { public void testRebalancePreferenceAndCapacityKeys() { String clusterName = "test-cluster"; VeniceControllerClusterConfig mockClusterConfig = mock(VeniceControllerClusterConfig.class); + int helixRebalancePreferenceEvenness = 10; + int helixRebalancePreferenceLessMovement = 1; int helixInstanceCapacity = 10000; int helixResourceCapacityWeight = 100; @@ -243,8 +245,8 @@ public void testRebalancePreferenceAndCapacityKeys() { when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); - when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(1); - when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(1); + when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(helixRebalancePreferenceEvenness); + when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(helixRebalancePreferenceLessMovement); when(mockCommonConfig.isHelixRebalancePreferenceForceBaselineConvergeEnabled()).thenReturn(false); when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); @@ -257,18 +259,24 @@ public void testRebalancePreferenceAndCapacityKeys() { assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); Map globalRebalancePreference = clusterConfig.getGlobalRebalancePreference(); - assertEquals((int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), 1); - assertEquals((int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), 1); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), + helixRebalancePreferenceEvenness); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), + helixRebalancePreferenceLessMovement); assertEquals( (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), 0); Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); - assertEquals((int) defaultInstanceCapacityMap.get(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), helixInstanceCapacity); + assertEquals( + (int) defaultInstanceCapacityMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + helixInstanceCapacity); Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); assertEquals( - (int) defaultPartitionWeightMap.get(DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + (int) defaultPartitionWeightMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), helixResourceCapacityWeight); return null; }).when(zkHelixAdminClient).updateClusterConfigs(any(), any()); @@ -276,4 +284,54 @@ public void testRebalancePreferenceAndCapacityKeys() { doCallRealMethod().when(zkHelixAdminClient).createVeniceControllerCluster(); zkHelixAdminClient.createVeniceControllerCluster(); } + + @Test + public void testDisabledCapacityKeys() { + String clusterName = "test-cluster"; + VeniceControllerClusterConfig mockClusterConfig = mock(VeniceControllerClusterConfig.class); + int helixRebalancePreferenceEvenness = 1; + int helixRebalancePreferenceLessMovement = 1; + int helixInstanceCapacity = -1; + int helixResourceCapacityWeight = -1; + + when(mockClusterConfig.getControllerResourceInstanceGroupTag()).thenReturn("GENERAL"); + when(mockMultiClusterConfigs.getControllerConfig(clusterName)).thenReturn(mockClusterConfig); + + when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); + when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); + when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(helixRebalancePreferenceEvenness); + when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(helixRebalancePreferenceLessMovement); + when(mockCommonConfig.isHelixRebalancePreferenceForceBaselineConvergeEnabled()).thenReturn(false); + when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); + when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); + when(mockCommonConfig.isControllerClusterHelixCloudEnabled()).thenReturn(false); + + doAnswer(invocation -> { + String controllerClusterName = invocation.getArgument(0); + ClusterConfig clusterConfig = invocation.getArgument(1); + + assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); + Map globalRebalancePreference = + clusterConfig.getGlobalRebalancePreference(); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), + helixRebalancePreferenceEvenness); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), + helixRebalancePreferenceLessMovement); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), + 0); + + Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); + assertEquals(defaultInstanceCapacityMap.size(), 0); + + Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); + assertEquals(defaultPartitionWeightMap.size(), 0); + return null; + }).when(zkHelixAdminClient).updateClusterConfigs(any(), any()); + + doCallRealMethod().when(zkHelixAdminClient).createVeniceControllerCluster(); + zkHelixAdminClient.createVeniceControllerCluster(); + } } From a28c0d6134327766110e07fec0d3cadb4500e6d1 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Wed, 29 Jan 2025 15:54:28 -0800 Subject: [PATCH 07/16] Fix syntax --- .../java/com/linkedin/venice/controller/VeniceHelixAdmin.java | 1 + 1 file changed, 1 insertion(+) diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java index f99e71da621..57f2859fbef 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java @@ -8837,6 +8837,7 @@ public PubSubTopicRepository getPubSubTopicRepository() { protected SafeHelixManager getHelixManager() { return helixManager; + } String getPushJobStatusStoreClusterName() { return pushJobStatusStoreClusterName; From 5ce55ec64b75ef31c63e3e953c227fe8f020e4a2 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Wed, 29 Jan 2025 20:56:23 -0800 Subject: [PATCH 08/16] Move capacity key setting into if statement --- .../linkedin/venice/controller/ZkHelixAdminClient.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index 392b63aa449..b58555a021d 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -119,11 +119,11 @@ public void createVeniceControllerCluster() { .put(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE, forceBaseLineConverge); clusterConfig.setGlobalRebalancePreference(globalRebalancePreference); - List instanceCapacityKeys = new ArrayList<>(); - instanceCapacityKeys.add(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); - clusterConfig.setInstanceCapacityKeys(instanceCapacityKeys); - if (commonConfig.getHelixInstanceCapacity() > 0 && commonConfig.getHelixResourceCapacityWeight() > 0) { + List instanceCapacityKeys = new ArrayList<>(); + instanceCapacityKeys.add(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); + clusterConfig.setInstanceCapacityKeys(instanceCapacityKeys); + // This is how much capacity a participant can take. The Helix documentation recommends setting this to a high // value to avoid rebalance failures. The primary goal of setting this is to enable a constraint that takes // the From 9052cd747035e66136b0dcd7468f7c0979e6e6e5 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Wed, 29 Jan 2025 21:00:56 -0800 Subject: [PATCH 09/16] Add more test assertions --- .../com/linkedin/venice/controller/TestHAASController.java | 3 +++ .../linkedin/venice/controller/TestZkHelixAdminClient.java | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java index cc09fb13b48..34b5f6b1534 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java @@ -304,6 +304,9 @@ public void testRebalancePreferenceAndCapacityKeys() { (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), 1); + List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); + assertEquals(instanceCapacityKeys.size(), 1); + Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); assertEquals( (int) defaultInstanceCapacityMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index b1b4ab86790..4057be62dd3 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -20,6 +20,7 @@ import java.lang.reflect.Field; import java.security.AccessController; import java.security.PrivilegedAction; +import java.util.List; import java.util.Map; import org.apache.helix.ConfigAccessor; import org.apache.helix.HelixAdmin; @@ -269,6 +270,9 @@ public void testRebalancePreferenceAndCapacityKeys() { (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), 0); + List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); + assertEquals(instanceCapacityKeys.size(), 1); + Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); assertEquals( (int) defaultInstanceCapacityMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), @@ -323,6 +327,9 @@ public void testDisabledCapacityKeys() { (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), 0); + List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); + assertEquals(instanceCapacityKeys.size(), 0); + Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); assertEquals(defaultInstanceCapacityMap.size(), 0); From 5f7826a772ca1aeeb7e7ca5354fc32ac8a322c0f Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Thu, 30 Jan 2025 11:31:02 -0800 Subject: [PATCH 10/16] Address review comments --- .../java/com/linkedin/venice/ConfigKeys.java | 23 +++--- .../venice/controller/TestHAASController.java | 8 +- .../VeniceControllerClusterConfig.java | 16 ++-- .../VeniceControllerMultiClusterConfig.java | 20 ----- .../venice/controller/ZkHelixAdminClient.java | 39 +++++---- .../controller/TestZkHelixAdminClient.java | 81 ++++++++++++++----- 6 files changed, 108 insertions(+), 79 deletions(-) diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java index a6b2ab45d3c..9adebceef79 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java @@ -2401,34 +2401,37 @@ private ConfigKeys() { /** * Specifies the value to use for Helix's rebalance preference for evenness when using Waged. - * The default value is 1. + * Default is -1 which will use Helix's default. */ public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS = "controller.helix.rebalance.preference.evenness"; /** * Specifies the value to use for Helix's rebalance preference for less movement when using Waged. - * The default value is 1. + * Default is -1 which will use Helix's default. */ public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT = "controller.helix.rebalance.preference.less.movement"; /** - * Indicates whether to enable force baseline convergence in Helix's rebalance preference when using Waged. - * Default is false. + * Specifies the value to use for Helix's rebalance preference for force baseline convergence when using Waged. + * This should always be turned off, so it doesn't overpower other constraints. + * Default is -1 which will use Helix's default. */ - public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE = - "controller.helix.rebalance.preference.enable.force.baseline.converge"; + public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE = + "controller.helix.rebalance.preference.force.baseline.converge"; /** - * Specifies the capacity a controller instance can handle, determined by - * {@link ConfigKeys#CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT}. Default is -1 which indicates disabled. + * Specifies the capacity a controller instance can handle. + * The weight of each Helix resource is determined by {@link ConfigKeys#CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT}. + * Default is -1 which indicates disabled. */ public static final String CONTROLLER_HELIX_INSTANCE_CAPACITY = "controller.helix.instance.capacity"; /** - * Specifies the weight of each Helix resource. The maximum weight per instance is determined by - * {@link ConfigKeys#CONTROLLER_HELIX_INSTANCE_CAPACITY}. Default is -1 which indicates disabled. + * Specifies the weight of each Helix resource. + * The maximum weight per instance is determined by {@link ConfigKeys#CONTROLLER_HELIX_INSTANCE_CAPACITY}. + * Default is -1 which indicates disabled. */ public static final String CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT = "controller.helix.default.instance.capacity"; } diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java index 34b5f6b1534..8e039a2b2b9 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java @@ -268,7 +268,7 @@ public void testRebalancePreferenceAndCapacityKeys() { int helixRebalancePreferenceEvenness = 10; int helixRebalancePreferenceLessMovement = 2; - boolean helixRebalancePreferenceEnableForceBaselineConverge = true; + int helixRebalancePreferenceForceBaselineConverge = 1; int helixInstanceCapacity = 1000; int helixResourceCapacityWeight = 10; @@ -278,8 +278,8 @@ public void testRebalancePreferenceAndCapacityKeys() { clusterProperties .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); clusterProperties.put( - ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE, - helixRebalancePreferenceEnableForceBaselineConverge); + ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, + helixRebalancePreferenceForceBaselineConverge); clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); @@ -302,7 +302,7 @@ public void testRebalancePreferenceAndCapacityKeys() { helixRebalancePreferenceLessMovement); assertEquals( (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), - 1); + helixRebalancePreferenceForceBaselineConverge); List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); assertEquals(instanceCapacityKeys.size(), 1); diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java index 8591ad2931f..63e96041264 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java @@ -58,8 +58,8 @@ import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_INFO_SOURCES; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_PROVIDER; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY; -import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS; +import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT; import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REST_CUSTOMIZED_HEALTH_URL; @@ -552,7 +552,7 @@ public class VeniceControllerClusterConfig { private final int helixRebalancePreferenceEvenness; private final int helixRebalancePreferenceLessMovement; - private final boolean helixRebalancePreferenceEnableForceBaselineConverge; + private final int helixRebalancePreferenceForceBaselineConverge; private final int helixInstanceCapacity; private final int helixResourceCapacityWeight; @@ -1008,10 +1008,10 @@ public VeniceControllerClusterConfig(VeniceProperties props) { this.isHybridStorePartitionCountUpdateEnabled = props.getBoolean(ConfigKeys.CONTROLLER_ENABLE_HYBRID_STORE_PARTITION_COUNT_UPDATE, false); - this.helixRebalancePreferenceEvenness = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, 1); - this.helixRebalancePreferenceLessMovement = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, 1); - this.helixRebalancePreferenceEnableForceBaselineConverge = - props.getBoolean(CONTROLLER_HELIX_REBALANCE_PREFERENCE_ENABLE_FORCE_BASELINE_CONVERGE, false); + this.helixRebalancePreferenceEvenness = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, -1); + this.helixRebalancePreferenceLessMovement = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, -1); + this.helixRebalancePreferenceForceBaselineConverge = + props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, -1); this.helixInstanceCapacity = props.getInt(CONTROLLER_HELIX_INSTANCE_CAPACITY, -1); this.helixResourceCapacityWeight = props.getInt(CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, -1); } @@ -1860,8 +1860,8 @@ public int getHelixRebalancePreferenceLessMovement() { return helixRebalancePreferenceLessMovement; } - public boolean isHelixRebalancePreferenceForceBaselineConvergeEnabled() { - return helixRebalancePreferenceEnableForceBaselineConverge; + public int getHelixRebalancePreferenceForceBaselineConverge() { + return helixRebalancePreferenceForceBaselineConverge; } public int getHelixInstanceCapacity() { diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerMultiClusterConfig.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerMultiClusterConfig.java index 6ee6e99f7ae..0de71246bae 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerMultiClusterConfig.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerMultiClusterConfig.java @@ -302,24 +302,4 @@ public long getServiceDiscoveryRegistrationRetryMS() { public List getControllerInstanceTagList() { return getCommonConfig().getControllerInstanceTagList(); } - - public int getHelixRebalancePreferenceEvenness() { - return getCommonConfig().getHelixRebalancePreferenceEvenness(); - } - - public int getHelixRebalancePreferenceLessMovement() { - return getCommonConfig().getHelixRebalancePreferenceLessMovement(); - } - - public boolean isHelixRebalancePreferenceForceBaselineConvergeEnabled() { - return getCommonConfig().isHelixRebalancePreferenceForceBaselineConvergeEnabled(); - } - - public int getHelixInstanceCapacity() { - return getCommonConfig().getHelixInstanceCapacity(); - } - - public int getHelixResourceCapacityWeight() { - return getCommonConfig().getHelixResourceCapacityWeight(); - } } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index b58555a021d..fa10d332f26 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -107,27 +107,36 @@ public void createVeniceControllerCluster() { // We want to prioritize evenness over less movement when it comes to resource assignment, because the cost // of rebalancing for the controller is cheap as it is stateless. Map globalRebalancePreference = new HashMap<>(); - globalRebalancePreference.put( - ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, - commonConfig.getHelixRebalancePreferenceEvenness()); - globalRebalancePreference.put( - ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, - commonConfig.getHelixRebalancePreferenceLessMovement()); - // This should be turned off, so it doesn't overpower other constraint calculations - int forceBaseLineConverge = commonConfig.isHelixRebalancePreferenceForceBaselineConvergeEnabled() ? 1 : 0; - globalRebalancePreference - .put(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE, forceBaseLineConverge); - clusterConfig.setGlobalRebalancePreference(globalRebalancePreference); - - if (commonConfig.getHelixInstanceCapacity() > 0 && commonConfig.getHelixResourceCapacityWeight() > 0) { + + if (commonConfig.getHelixRebalancePreferenceEvenness() > -1 + && commonConfig.getHelixRebalancePreferenceLessMovement() > -1) { + // EVENNESS and LESS_MOVEMENT need to be defined together + globalRebalancePreference.put( + ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, + commonConfig.getHelixRebalancePreferenceEvenness()); + globalRebalancePreference.put( + ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, + commonConfig.getHelixRebalancePreferenceLessMovement()); + } + + if (commonConfig.getHelixRebalancePreferenceForceBaselineConverge() > -1) { + globalRebalancePreference.put( + ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE, + commonConfig.getHelixRebalancePreferenceForceBaselineConverge()); + } + + if (!globalRebalancePreference.isEmpty()) { + clusterConfig.setGlobalRebalancePreference(globalRebalancePreference); + } + + if (commonConfig.getHelixInstanceCapacity() > -1 && commonConfig.getHelixResourceCapacityWeight() > -1) { List instanceCapacityKeys = new ArrayList<>(); instanceCapacityKeys.add(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); clusterConfig.setInstanceCapacityKeys(instanceCapacityKeys); // This is how much capacity a participant can take. The Helix documentation recommends setting this to a high // value to avoid rebalance failures. The primary goal of setting this is to enable a constraint that takes - // the - // current top-state distribution into account when rebalancing. + // the current top-state distribution into account when rebalancing. Map defaultInstanceCapacityMap = new HashMap<>(); defaultInstanceCapacityMap .put(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixInstanceCapacity()); diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index 4057be62dd3..ce7b38c3881 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -234,21 +234,18 @@ public void testUpdateRESTConfigs() { @Test public void testRebalancePreferenceAndCapacityKeys() { - String clusterName = "test-cluster"; - VeniceControllerClusterConfig mockClusterConfig = mock(VeniceControllerClusterConfig.class); int helixRebalancePreferenceEvenness = 10; int helixRebalancePreferenceLessMovement = 1; + int helixRebalancePreferenceForceBaselineConverge = 1; int helixInstanceCapacity = 10000; int helixResourceCapacityWeight = 100; - when(mockClusterConfig.getControllerResourceInstanceGroupTag()).thenReturn("GENERAL"); - when(mockMultiClusterConfigs.getControllerConfig(clusterName)).thenReturn(mockClusterConfig); - when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(helixRebalancePreferenceEvenness); when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(helixRebalancePreferenceLessMovement); - when(mockCommonConfig.isHelixRebalancePreferenceForceBaselineConvergeEnabled()).thenReturn(false); + when(mockCommonConfig.getHelixRebalancePreferenceForceBaselineConverge()) + .thenReturn(helixRebalancePreferenceForceBaselineConverge); when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); when(mockCommonConfig.isControllerClusterHelixCloudEnabled()).thenReturn(false); @@ -268,7 +265,7 @@ public void testRebalancePreferenceAndCapacityKeys() { helixRebalancePreferenceLessMovement); assertEquals( (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), - 0); + helixRebalancePreferenceForceBaselineConverge); List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); assertEquals(instanceCapacityKeys.size(), 1); @@ -290,22 +287,63 @@ public void testRebalancePreferenceAndCapacityKeys() { } @Test - public void testDisabledCapacityKeys() { - String clusterName = "test-cluster"; - VeniceControllerClusterConfig mockClusterConfig = mock(VeniceControllerClusterConfig.class); - int helixRebalancePreferenceEvenness = 1; - int helixRebalancePreferenceLessMovement = 1; + public void testUndefinedRebalancePreferenceAndCapacityKeys() { + int helixRebalancePreferenceEvenness = -1; + int helixRebalancePreferenceLessMovement = -1; + int helixRebalancePreferenceForceBaselineConverge = -1; int helixInstanceCapacity = -1; int helixResourceCapacityWeight = -1; - when(mockClusterConfig.getControllerResourceInstanceGroupTag()).thenReturn("GENERAL"); - when(mockMultiClusterConfigs.getControllerConfig(clusterName)).thenReturn(mockClusterConfig); + when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); + when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); + when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(helixRebalancePreferenceEvenness); + when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(helixRebalancePreferenceLessMovement); + when(mockCommonConfig.getHelixRebalancePreferenceForceBaselineConverge()) + .thenReturn(helixRebalancePreferenceForceBaselineConverge); + when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); + when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); + when(mockCommonConfig.isControllerClusterHelixCloudEnabled()).thenReturn(false); + + doAnswer(invocation -> { + String controllerClusterName = invocation.getArgument(0); + ClusterConfig clusterConfig = invocation.getArgument(1); + + assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); + + // When you don't specify rebalance preferences, it will use Helix's default settings + Map globalRebalancePreference = + clusterConfig.getGlobalRebalancePreference(); + assertTrue(globalRebalancePreference.isEmpty()); + + List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); + assertEquals(instanceCapacityKeys.size(), 0); + + Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); + assertEquals(defaultInstanceCapacityMap.size(), 0); + + Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); + assertEquals(defaultPartitionWeightMap.size(), 0); + return null; + }).when(zkHelixAdminClient).updateClusterConfigs(any(), any()); + + doCallRealMethod().when(zkHelixAdminClient).createVeniceControllerCluster(); + zkHelixAdminClient.createVeniceControllerCluster(); + } + + @Test + public void testPartiallyDefinedRebalancePreference() { + int helixRebalancePreferenceEvenness = 1; + int helixRebalancePreferenceLessMovement = -1; + int helixRebalancePreferenceForceBaselineConverge = 1; + int helixInstanceCapacity = -1; + int helixResourceCapacityWeight = -1; when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(helixRebalancePreferenceEvenness); when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(helixRebalancePreferenceLessMovement); - when(mockCommonConfig.isHelixRebalancePreferenceForceBaselineConvergeEnabled()).thenReturn(false); + when(mockCommonConfig.getHelixRebalancePreferenceForceBaselineConverge()) + .thenReturn(helixRebalancePreferenceForceBaselineConverge); when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); when(mockCommonConfig.isControllerClusterHelixCloudEnabled()).thenReturn(false); @@ -315,17 +353,16 @@ public void testDisabledCapacityKeys() { ClusterConfig clusterConfig = invocation.getArgument(1); assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); + + // EVENNESS and LESS_MOVEMENT must be defined together. If not it will use Helix's default settings Map globalRebalancePreference = clusterConfig.getGlobalRebalancePreference(); - assertEquals( - (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), - helixRebalancePreferenceEvenness); - assertEquals( - (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), - helixRebalancePreferenceLessMovement); + assertFalse(globalRebalancePreference.containsKey(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS)); + assertFalse(globalRebalancePreference.containsKey(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT)); + // FORCE_BASELINE_CONVERGE can be defined without setting EVENNESS and LESS_MOVEMENT assertEquals( (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), - 0); + helixRebalancePreferenceForceBaselineConverge); List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); assertEquals(instanceCapacityKeys.size(), 0); From 74cdc53ef5a5b8ed419f82985f0d232d693edd1f Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Thu, 30 Jan 2025 11:45:04 -0800 Subject: [PATCH 11/16] Fix unit test --- .../linkedin/venice/controller/TestZkHelixAdminClient.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index ce7b38c3881..866230e7d9e 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -313,7 +313,11 @@ public void testUndefinedRebalancePreferenceAndCapacityKeys() { // When you don't specify rebalance preferences, it will use Helix's default settings Map globalRebalancePreference = clusterConfig.getGlobalRebalancePreference(); - assertTrue(globalRebalancePreference.isEmpty()); + assertEquals((int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), 1); + assertEquals((int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), 1); + assertEquals( + (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), + 0); List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); assertEquals(instanceCapacityKeys.size(), 0); From 0b9a2920605f806a01f3768ec7bbf4bde5b76725 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Fri, 31 Jan 2025 16:31:36 -0800 Subject: [PATCH 12/16] Address review comments --- .../java/com/linkedin/venice/ConfigKeys.java | 12 +- .../VeniceControllerClusterConfig.java | 148 ++++++++++++--- .../venice/controller/VeniceHelixAdmin.java | 2 +- .../venice/controller/ZkHelixAdminClient.java | 50 +---- .../TestVeniceControllerClusterConfig.java | 174 ++++++++++++++++++ .../controller/TestZkHelixAdminClient.java | 96 +++++----- 6 files changed, 365 insertions(+), 117 deletions(-) diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java index 9adebceef79..0c481d8700d 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java @@ -2401,22 +2401,24 @@ private ConfigKeys() { /** * Specifies the value to use for Helix's rebalance preference for evenness when using Waged. - * Default is -1 which will use Helix's default. + * Must be used in conjunction with {@link ConfigKeys#CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT}. + * Accepted range: 0 - 1000 */ public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS = "controller.helix.rebalance.preference.evenness"; /** * Specifies the value to use for Helix's rebalance preference for less movement when using Waged. - * Default is -1 which will use Helix's default. + * Must be used in conjunction with {@link ConfigKeys#CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS}. + * Accepted range: 0 - 1000 */ public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT = "controller.helix.rebalance.preference.less.movement"; /** * Specifies the value to use for Helix's rebalance preference for force baseline convergence when using Waged. - * This should always be turned off, so it doesn't overpower other constraints. - * Default is -1 which will use Helix's default. + * This shouldn't be enabled, so it doesn't overpower other constraints. + * Accepted range: 0 - 1000 */ public static final String CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE = "controller.helix.rebalance.preference.force.baseline.converge"; @@ -2424,14 +2426,12 @@ private ConfigKeys() { /** * Specifies the capacity a controller instance can handle. * The weight of each Helix resource is determined by {@link ConfigKeys#CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT}. - * Default is -1 which indicates disabled. */ public static final String CONTROLLER_HELIX_INSTANCE_CAPACITY = "controller.helix.instance.capacity"; /** * Specifies the weight of each Helix resource. * The maximum weight per instance is determined by {@link ConfigKeys#CONTROLLER_HELIX_INSTANCE_CAPACITY}. - * Default is -1 which indicates disabled. */ public static final String CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT = "controller.helix.default.instance.capacity"; } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java index 63e96041264..c21f6354f16 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java @@ -1,6 +1,7 @@ package com.linkedin.venice.controller; import static com.linkedin.venice.CommonConfigKeys.SSL_FACTORY_CLASS_NAME; +import static com.linkedin.venice.ConfigConstants.CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; import static com.linkedin.venice.ConfigConstants.DEFAULT_MAX_RECORD_SIZE_BYTES_BACKFILL; import static com.linkedin.venice.ConfigConstants.DEFAULT_PUSH_STATUS_STORE_HEARTBEAT_EXPIRATION_TIME_IN_SECONDS; import static com.linkedin.venice.ConfigKeys.ACTIVE_ACTIVE_REAL_TIME_SOURCE_FABRIC_LIST; @@ -225,6 +226,7 @@ import org.apache.helix.cloud.constants.CloudProvider; import org.apache.helix.controller.rebalancer.strategy.CrushRebalanceStrategy; import org.apache.helix.model.CloudConfig; +import org.apache.helix.model.ClusterConfig; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -550,11 +552,10 @@ public class VeniceControllerClusterConfig { private Set pushJobUserErrorCheckpoints; private boolean isHybridStorePartitionCountUpdateEnabled; - private final int helixRebalancePreferenceEvenness; - private final int helixRebalancePreferenceLessMovement; - private final int helixRebalancePreferenceForceBaselineConverge; - private final int helixInstanceCapacity; - private final int helixResourceCapacityWeight; + private final Map helixGlobalRebalancePreference; + private final List helixInstanceCapacityKeys; + private final Map helixDefaultInstanceCapacityMap; + private final Map helixDefaultPartitionWeightMap; public VeniceControllerClusterConfig(VeniceProperties props) { this.props = props; @@ -1008,12 +1009,55 @@ public VeniceControllerClusterConfig(VeniceProperties props) { this.isHybridStorePartitionCountUpdateEnabled = props.getBoolean(ConfigKeys.CONTROLLER_ENABLE_HYBRID_STORE_PARTITION_COUNT_UPDATE, false); - this.helixRebalancePreferenceEvenness = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, -1); - this.helixRebalancePreferenceLessMovement = props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, -1); - this.helixRebalancePreferenceForceBaselineConverge = - props.getInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, -1); - this.helixInstanceCapacity = props.getInt(CONTROLLER_HELIX_INSTANCE_CAPACITY, -1); - this.helixResourceCapacityWeight = props.getInt(CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, -1); + Integer helixRebalancePreferenceEvenness = + props.getOptionalInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS).orElse(null); + Integer helixRebalancePreferenceLessMovement = + props.getOptionalInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT).orElse(null); + Integer helixRebalancePreferenceForceBaselineConverge = + props.getOptionalInt(CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE).orElse(null); + validateHelixRebalancePreferences( + helixRebalancePreferenceEvenness, + helixRebalancePreferenceLessMovement, + helixRebalancePreferenceForceBaselineConverge); + + if ((helixRebalancePreferenceEvenness != null && helixRebalancePreferenceLessMovement != null) + || helixRebalancePreferenceForceBaselineConverge != null) { + helixGlobalRebalancePreference = new HashMap<>(); + } else { + helixGlobalRebalancePreference = null; + } + + if (helixRebalancePreferenceEvenness != null && helixRebalancePreferenceLessMovement != null) { + // EVENNESS and LESS_MOVEMENT need to be defined together + helixGlobalRebalancePreference + .put(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, helixRebalancePreferenceEvenness); + helixGlobalRebalancePreference + .put(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, helixRebalancePreferenceLessMovement); + } + + if (helixRebalancePreferenceForceBaselineConverge != null) { + helixGlobalRebalancePreference.put( + ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE, + helixRebalancePreferenceForceBaselineConverge); + } + + Integer helixInstanceCapacity = props.getOptionalInt(CONTROLLER_HELIX_INSTANCE_CAPACITY).orElse(null); + Integer helixResourceCapacityWeight = props.getOptionalInt(CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT).orElse(null); + validateHelixCapacities(helixInstanceCapacity, helixResourceCapacityWeight); + + if (helixInstanceCapacity != null && helixResourceCapacityWeight != null) { + helixInstanceCapacityKeys = Collections.singletonList(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); + helixDefaultInstanceCapacityMap = new HashMap<>(); + helixDefaultPartitionWeightMap = new HashMap<>(); + + helixDefaultInstanceCapacityMap.put(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, helixInstanceCapacity); + helixDefaultPartitionWeightMap.put(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, helixResourceCapacityWeight); + + } else { + helixInstanceCapacityKeys = null; + helixDefaultInstanceCapacityMap = null; + helixDefaultPartitionWeightMap = null; + } } public VeniceProperties getProps() { @@ -1852,23 +1896,85 @@ public Set getPushJobUserErrorCheckpoints() { return pushJobUserErrorCheckpoints; } - public int getHelixRebalancePreferenceEvenness() { - return helixRebalancePreferenceEvenness; + public Map getHelixGlobalRebalancePreference() { + return helixGlobalRebalancePreference; + } + + public List getHelixInstanceCapacityKeys() { + return helixInstanceCapacityKeys; + } + + public Map getHelixDefaultInstanceCapacityMap() { + return helixDefaultInstanceCapacityMap; } - public int getHelixRebalancePreferenceLessMovement() { - return helixRebalancePreferenceLessMovement; + public Map getHelixDefaultPartitionWeightMap() { + return helixDefaultPartitionWeightMap; } - public int getHelixRebalancePreferenceForceBaselineConverge() { - return helixRebalancePreferenceForceBaselineConverge; + private void validateHelixRebalancePreferences( + Integer helixRebalancePreferenceEvenness, + Integer helixRebalancePreferenceLessMovement, + Integer helixRebalancePreferenceForceBaselineConverge) { + if (helixRebalancePreferenceEvenness == null && helixRebalancePreferenceLessMovement == null + && helixRebalancePreferenceForceBaselineConverge == null) { + return; + } + + if ((helixRebalancePreferenceEvenness == null && helixRebalancePreferenceLessMovement != null) + || (helixRebalancePreferenceEvenness != null && helixRebalancePreferenceLessMovement == null)) { + throw new ConfigurationException( + CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS + " and " + CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT + + " must be defined together."); + } + + validateHelixRebalancePreferenceRange( + helixRebalancePreferenceEvenness, + CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS); + validateHelixRebalancePreferenceRange( + helixRebalancePreferenceLessMovement, + CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT); + validateHelixRebalancePreferenceRange( + helixRebalancePreferenceForceBaselineConverge, + CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE); } - public int getHelixInstanceCapacity() { - return helixInstanceCapacity; + private void validateHelixRebalancePreferenceRange(Integer value, String rebalancePreferenceName) { + if (value == null) { + return; + } + + int MIN_HELIX_REBALANCE_PREFERENCE = 0; + int MAX_HELIX_REBALANCE_PREFERENCE = 1000; + if (value < MIN_HELIX_REBALANCE_PREFERENCE || value > MAX_HELIX_REBALANCE_PREFERENCE) { + throw new ConfigurationException( + rebalancePreferenceName + " must be in the range between " + MIN_HELIX_REBALANCE_PREFERENCE + " and " + + MAX_HELIX_REBALANCE_PREFERENCE); + } } - public int getHelixResourceCapacityWeight() { - return helixResourceCapacityWeight; + private void validateHelixCapacities(Integer helixInstanceCapacity, Integer helixResourceCapacityWeight) { + if ((helixInstanceCapacity != null && helixResourceCapacityWeight == null) + || (helixInstanceCapacity == null && helixResourceCapacityWeight != null)) { + throw new ConfigurationException( + CONTROLLER_HELIX_INSTANCE_CAPACITY + " and " + CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT + + " must be defined together"); + } + + // Both are null, no further validation needed + if (helixInstanceCapacity == null) { + return; + } + + if (helixInstanceCapacity <= 0 || helixResourceCapacityWeight <= 0) { + throw new ConfigurationException( + CONTROLLER_HELIX_INSTANCE_CAPACITY + " and " + CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT + + " must both be greater than 0"); + } + + if (helixInstanceCapacity < helixResourceCapacityWeight) { + throw new ConfigurationException( + CONTROLLER_HELIX_INSTANCE_CAPACITY + " cannot be < " + CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT); + } } } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java index 57f2859fbef..860c6bb6c86 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java @@ -8835,7 +8835,7 @@ public PubSubTopicRepository getPubSubTopicRepository() { return pubSubTopicRepository; } - protected SafeHelixManager getHelixManager() { + SafeHelixManager getHelixManager() { return helixManager; } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index fa10d332f26..f9f93636597 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -1,7 +1,5 @@ package com.linkedin.venice.controller; -import static com.linkedin.venice.ConfigConstants.CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; - import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.exceptions.VeniceRetriableException; import com.linkedin.venice.helix.ZkClientFactory; @@ -9,9 +7,7 @@ import com.linkedin.venice.utils.RetryUtils; import io.tehuti.metrics.MetricsRepository; import java.time.Duration; -import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -104,49 +100,21 @@ public void createVeniceControllerCluster() { clusterConfig.setTopologyAwareEnabled(false); clusterConfig.setPersistBestPossibleAssignment(true); - // We want to prioritize evenness over less movement when it comes to resource assignment, because the cost - // of rebalancing for the controller is cheap as it is stateless. - Map globalRebalancePreference = new HashMap<>(); - - if (commonConfig.getHelixRebalancePreferenceEvenness() > -1 - && commonConfig.getHelixRebalancePreferenceLessMovement() > -1) { - // EVENNESS and LESS_MOVEMENT need to be defined together - globalRebalancePreference.put( - ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, - commonConfig.getHelixRebalancePreferenceEvenness()); - globalRebalancePreference.put( - ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, - commonConfig.getHelixRebalancePreferenceLessMovement()); - } - - if (commonConfig.getHelixRebalancePreferenceForceBaselineConverge() > -1) { - globalRebalancePreference.put( - ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE, - commonConfig.getHelixRebalancePreferenceForceBaselineConverge()); - } - - if (!globalRebalancePreference.isEmpty()) { - clusterConfig.setGlobalRebalancePreference(globalRebalancePreference); + if (!commonConfig.getHelixGlobalRebalancePreference().isEmpty()) { + // We want to prioritize evenness over less movement when it comes to resource assignment, because the cost + // of rebalancing for the controller is cheap as it is stateless. + clusterConfig.setGlobalRebalancePreference(commonConfig.getHelixGlobalRebalancePreference()); } - if (commonConfig.getHelixInstanceCapacity() > -1 && commonConfig.getHelixResourceCapacityWeight() > -1) { - List instanceCapacityKeys = new ArrayList<>(); - instanceCapacityKeys.add(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); - clusterConfig.setInstanceCapacityKeys(instanceCapacityKeys); + if (commonConfig.getHelixDefaultInstanceCapacityMap() != null + && commonConfig.getHelixDefaultPartitionWeightMap() != null) { + clusterConfig.setInstanceCapacityKeys(commonConfig.getHelixInstanceCapacityKeys()); // This is how much capacity a participant can take. The Helix documentation recommends setting this to a high // value to avoid rebalance failures. The primary goal of setting this is to enable a constraint that takes // the current top-state distribution into account when rebalancing. - Map defaultInstanceCapacityMap = new HashMap<>(); - defaultInstanceCapacityMap - .put(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixInstanceCapacity()); - clusterConfig.setDefaultInstanceCapacityMap(defaultInstanceCapacityMap); - - // This is how much weight each resource in a cluster has - Map defaultPartitionWeightMap = new HashMap<>(); - defaultPartitionWeightMap - .put(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY, commonConfig.getHelixResourceCapacityWeight()); - clusterConfig.setDefaultPartitionWeightMap(defaultPartitionWeightMap); + clusterConfig.setDefaultInstanceCapacityMap(commonConfig.getHelixDefaultInstanceCapacityMap()); + clusterConfig.setDefaultPartitionWeightMap(commonConfig.getHelixDefaultPartitionWeightMap()); } updateClusterConfigs(controllerClusterName, clusterConfig); diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java index 5764eb6b163..c9e857e3745 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java @@ -1,5 +1,6 @@ package com.linkedin.venice.controller; +import static com.linkedin.venice.ConfigConstants.CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; import static com.linkedin.venice.ConfigKeys.ACTIVE_ACTIVE_REAL_TIME_SOURCE_FABRIC_LIST; import static com.linkedin.venice.ConfigKeys.ADMIN_HELIX_MESSAGING_CHANNEL_ENABLED; import static com.linkedin.venice.ConfigKeys.CHILD_CLUSTER_ALLOWLIST; @@ -34,12 +35,16 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; import static org.testng.Assert.expectThrows; +import com.linkedin.venice.ConfigKeys; import com.linkedin.venice.PushJobCheckpoints; import com.linkedin.venice.controllerapi.ControllerRoute; +import com.linkedin.venice.exceptions.ConfigurationException; import com.linkedin.venice.exceptions.UndefinedPropertyException; import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.status.protocol.PushJobDetails; @@ -59,6 +64,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.helix.cloud.constants.CloudProvider; import org.apache.helix.model.CloudConfig; +import org.apache.helix.model.ClusterConfig; import org.testng.Assert; import org.testng.annotations.Test; @@ -295,4 +301,172 @@ public void testHelixRestCustomizedHealthUrl() { VeniceControllerClusterConfig clusterConfig = new VeniceControllerClusterConfig(new VeniceProperties(baseProps)); assertEquals(clusterConfig.getHelixRestCustomizedHealthUrl(), healthUrl); } + + @Test + public void testRebalancePreferenceAndCapacityKeys() { + Properties clusterProperties = getBaseSingleRegionProperties(false); + + int helixRebalancePreferenceEvenness = 10; + int helixRebalancePreferenceLessMovement = 1; + int helixRebalancePreferenceForceBaselineConverge = 1; + int helixInstanceCapacity = 10000; + int helixResourceCapacityWeight = 100; + + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); + clusterProperties + .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); + clusterProperties.put( + ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, + helixRebalancePreferenceForceBaselineConverge); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); + + VeniceControllerClusterConfig clusterConfig = + new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + + Map helixGlobalRebalancePreference = + clusterConfig.getHelixGlobalRebalancePreference(); + assertNotNull(helixGlobalRebalancePreference); + + assertEquals( + (int) helixGlobalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), + helixRebalancePreferenceEvenness); + assertEquals( + (int) helixGlobalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), + helixRebalancePreferenceLessMovement); + assertEquals( + (int) helixGlobalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), + helixRebalancePreferenceForceBaselineConverge); + + List helixInstanceCapacityKeys = clusterConfig.getHelixInstanceCapacityKeys(); + assertEquals(helixInstanceCapacityKeys.size(), 1); + assertEquals(helixInstanceCapacityKeys.get(0), CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY); + + Map helixDefaultInstanceCapacityMap = clusterConfig.getHelixDefaultInstanceCapacityMap(); + assertEquals( + (int) helixDefaultInstanceCapacityMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + helixInstanceCapacity); + + Map helixDefaultPartitionWeightMap = clusterConfig.getHelixDefaultPartitionWeightMap(); + assertEquals( + (int) helixDefaultPartitionWeightMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + helixResourceCapacityWeight); + } + + @Test + public void testUndefinedRebalancePreferenceAndCapacityKeys() { + Properties clusterProperties = getBaseSingleRegionProperties(false); + VeniceControllerClusterConfig clusterConfig = + new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + + assertNull(clusterConfig.getHelixGlobalRebalancePreference()); + assertNull(clusterConfig.getHelixInstanceCapacityKeys()); + assertNull(clusterConfig.getHelixDefaultInstanceCapacityMap()); + assertNull(clusterConfig.getHelixDefaultPartitionWeightMap()); + } + + @Test + public void testPartiallyDefinedRebalancePreferenceAndCapacityKeys() { + Properties clusterProperties = getBaseSingleRegionProperties(false); + + int helixRebalancePreferenceEvenness = 10; + int helixRebalancePreferenceLessMovement = 2; + int helixRebalancePreferenceForceBaselineConverge = 1; + int helixInstanceCapacity = 10000; + int helixResourceCapacityWeight = 100; + + // EVENNESS must be defined with LESS_MOVEMENT + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); + assertThrows( + ConfigurationException.class, + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS); + + // LESS_MOVEMENT must be defined with EVENNESS + clusterProperties + .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); + assertThrows( + ConfigurationException.class, + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT); + + // You can set FORCE_BASELINE_CONVERGE without EVENNESS and LESS_MOVEMENT + clusterProperties.put( + ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, + helixRebalancePreferenceForceBaselineConverge); + VeniceControllerClusterConfig clusterConfig = + new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + Map helixGlobalRebalancePreference = + clusterConfig.getHelixGlobalRebalancePreference(); + assertEquals(helixGlobalRebalancePreference.size(), 1); + assertEquals( + (int) helixGlobalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), + helixRebalancePreferenceForceBaselineConverge); + clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE); + + // You can set capacities without rebalance preference + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); + clusterConfig = new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + + Map helixDefaultInstanceCapacityMap = clusterConfig.getHelixDefaultInstanceCapacityMap(); + assertEquals( + (int) helixDefaultInstanceCapacityMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + helixInstanceCapacity); + + Map helixDefaultPartitionWeightMap = clusterConfig.getHelixDefaultPartitionWeightMap(); + assertEquals( + (int) helixDefaultPartitionWeightMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), + helixResourceCapacityWeight); + } + + @Test + public void testInvalidRebalancePreferenceAndCapacityKeys() { + Properties clusterProperties = getBaseSingleRegionProperties(false); + + int helixRebalancePreferenceEvenness = 10; + int helixRebalancePreferenceLessMovement = -1; + int helixRebalancePreferenceForceBaselineConverge = -1; + int helixInstanceCapacity = 1; + int helixResourceCapacityWeight = 10; + + // Rebalance preference cannot be negative + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); + clusterProperties + .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); + assertThrows( + ConfigurationException.class, + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + + // Rebalance preference must be < 1000 + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, 1001); + assertThrows( + ConfigurationException.class, + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + + // Rebalance preference cannot be negative + clusterProperties + .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); + clusterProperties.put( + ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, + helixRebalancePreferenceForceBaselineConverge); + assertThrows( + ConfigurationException.class, + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE); + + // CONTROLLER_HELIX_INSTANCE_CAPACITY cannot be less than CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); + assertThrows( + ConfigurationException.class, + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + + // CONTROLLER_HELIX_INSTANCE_CAPACITY and CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT must be defined together + clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY); + assertThrows( + ConfigurationException.class, + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + } + } diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index 866230e7d9e..d74c11e6f51 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -1,6 +1,7 @@ package com.linkedin.venice.controller; import static com.linkedin.venice.ConfigConstants.CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY; +import static com.linkedin.venice.controller.TestVeniceControllerClusterConfig.getBaseSingleRegionProperties; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyString; @@ -16,12 +17,15 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import com.linkedin.venice.ConfigKeys; import com.linkedin.venice.utils.HelixUtils; +import com.linkedin.venice.utils.VeniceProperties; import java.lang.reflect.Field; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.List; import java.util.Map; +import java.util.Properties; import org.apache.helix.ConfigAccessor; import org.apache.helix.HelixAdmin; import org.apache.helix.manager.zk.ZKHelixManager; @@ -233,7 +237,7 @@ public void testUpdateRESTConfigs() { } @Test - public void testRebalancePreferenceAndCapacityKeys() { + public void testRebalancePreferenceAndCapacityKeys() throws NoSuchFieldException, IllegalAccessException { int helixRebalancePreferenceEvenness = 10; int helixRebalancePreferenceLessMovement = 1; int helixRebalancePreferenceForceBaselineConverge = 1; @@ -242,21 +246,30 @@ public void testRebalancePreferenceAndCapacityKeys() { when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); - when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(helixRebalancePreferenceEvenness); - when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(helixRebalancePreferenceLessMovement); - when(mockCommonConfig.getHelixRebalancePreferenceForceBaselineConverge()) - .thenReturn(helixRebalancePreferenceForceBaselineConverge); - when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); - when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); - when(mockCommonConfig.isControllerClusterHelixCloudEnabled()).thenReturn(false); + + Properties clusterProperties = getBaseSingleRegionProperties(false); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); + clusterProperties + .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); + clusterProperties.put( + ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, + helixRebalancePreferenceForceBaselineConverge); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); + clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); + VeniceControllerClusterConfig clusterConfig = + new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + + Field commonConfigsField = ZkHelixAdminClient.class.getDeclaredField("commonConfig"); + commonConfigsField.setAccessible(true); + commonConfigsField.set(zkHelixAdminClient, clusterConfig); doAnswer(invocation -> { String controllerClusterName = invocation.getArgument(0); - ClusterConfig clusterConfig = invocation.getArgument(1); + ClusterConfig helixClusterConfig = invocation.getArgument(1); assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); Map globalRebalancePreference = - clusterConfig.getGlobalRebalancePreference(); + helixClusterConfig.getGlobalRebalancePreference(); assertEquals( (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), helixRebalancePreferenceEvenness); @@ -267,15 +280,15 @@ public void testRebalancePreferenceAndCapacityKeys() { (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), helixRebalancePreferenceForceBaselineConverge); - List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); + List instanceCapacityKeys = helixClusterConfig.getInstanceCapacityKeys(); assertEquals(instanceCapacityKeys.size(), 1); - Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); + Map defaultInstanceCapacityMap = helixClusterConfig.getDefaultInstanceCapacityMap(); assertEquals( (int) defaultInstanceCapacityMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), helixInstanceCapacity); - Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); + Map defaultPartitionWeightMap = helixClusterConfig.getDefaultPartitionWeightMap(); assertEquals( (int) defaultPartitionWeightMap.get(CONTROLLER_DEFAULT_HELIX_RESOURCE_CAPACITY_KEY), helixResourceCapacityWeight); @@ -288,44 +301,31 @@ public void testRebalancePreferenceAndCapacityKeys() { @Test public void testUndefinedRebalancePreferenceAndCapacityKeys() { - int helixRebalancePreferenceEvenness = -1; - int helixRebalancePreferenceLessMovement = -1; - int helixRebalancePreferenceForceBaselineConverge = -1; - int helixInstanceCapacity = -1; - int helixResourceCapacityWeight = -1; - when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); - when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(helixRebalancePreferenceEvenness); - when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(helixRebalancePreferenceLessMovement); - when(mockCommonConfig.getHelixRebalancePreferenceForceBaselineConverge()) - .thenReturn(helixRebalancePreferenceForceBaselineConverge); - when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); - when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); - when(mockCommonConfig.isControllerClusterHelixCloudEnabled()).thenReturn(false); doAnswer(invocation -> { String controllerClusterName = invocation.getArgument(0); - ClusterConfig clusterConfig = invocation.getArgument(1); + ClusterConfig helixClusterConfig = invocation.getArgument(1); assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); // When you don't specify rebalance preferences, it will use Helix's default settings Map globalRebalancePreference = - clusterConfig.getGlobalRebalancePreference(); + helixClusterConfig.getGlobalRebalancePreference(); assertEquals((int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS), 1); assertEquals((int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT), 1); assertEquals( (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), 0); - List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); + List instanceCapacityKeys = helixClusterConfig.getInstanceCapacityKeys(); assertEquals(instanceCapacityKeys.size(), 0); - Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); + Map defaultInstanceCapacityMap = helixClusterConfig.getDefaultInstanceCapacityMap(); assertEquals(defaultInstanceCapacityMap.size(), 0); - Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); + Map defaultPartitionWeightMap = helixClusterConfig.getDefaultPartitionWeightMap(); assertEquals(defaultPartitionWeightMap.size(), 0); return null; }).when(zkHelixAdminClient).updateClusterConfigs(any(), any()); @@ -335,32 +335,32 @@ public void testUndefinedRebalancePreferenceAndCapacityKeys() { } @Test - public void testPartiallyDefinedRebalancePreference() { - int helixRebalancePreferenceEvenness = 1; - int helixRebalancePreferenceLessMovement = -1; + public void testPartiallyDefinedRebalancePreference() throws NoSuchFieldException, IllegalAccessException { int helixRebalancePreferenceForceBaselineConverge = 1; - int helixInstanceCapacity = -1; - int helixResourceCapacityWeight = -1; when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); - when(mockCommonConfig.getHelixRebalancePreferenceEvenness()).thenReturn(helixRebalancePreferenceEvenness); - when(mockCommonConfig.getHelixRebalancePreferenceLessMovement()).thenReturn(helixRebalancePreferenceLessMovement); - when(mockCommonConfig.getHelixRebalancePreferenceForceBaselineConverge()) - .thenReturn(helixRebalancePreferenceForceBaselineConverge); - when(mockCommonConfig.getHelixInstanceCapacity()).thenReturn(helixInstanceCapacity); - when(mockCommonConfig.getHelixResourceCapacityWeight()).thenReturn(helixResourceCapacityWeight); - when(mockCommonConfig.isControllerClusterHelixCloudEnabled()).thenReturn(false); + + Properties clusterProperties = getBaseSingleRegionProperties(false); + clusterProperties.put( + ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, + helixRebalancePreferenceForceBaselineConverge); + VeniceControllerClusterConfig clusterConfig = + new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + + Field commonConfigsField = ZkHelixAdminClient.class.getDeclaredField("commonConfig"); + commonConfigsField.setAccessible(true); + commonConfigsField.set(zkHelixAdminClient, clusterConfig); doAnswer(invocation -> { String controllerClusterName = invocation.getArgument(0); - ClusterConfig clusterConfig = invocation.getArgument(1); + ClusterConfig helixClusterConfig = invocation.getArgument(1); assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); // EVENNESS and LESS_MOVEMENT must be defined together. If not it will use Helix's default settings Map globalRebalancePreference = - clusterConfig.getGlobalRebalancePreference(); + helixClusterConfig.getGlobalRebalancePreference(); assertFalse(globalRebalancePreference.containsKey(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS)); assertFalse(globalRebalancePreference.containsKey(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT)); // FORCE_BASELINE_CONVERGE can be defined without setting EVENNESS and LESS_MOVEMENT @@ -368,13 +368,13 @@ public void testPartiallyDefinedRebalancePreference() { (int) globalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), helixRebalancePreferenceForceBaselineConverge); - List instanceCapacityKeys = clusterConfig.getInstanceCapacityKeys(); + List instanceCapacityKeys = helixClusterConfig.getInstanceCapacityKeys(); assertEquals(instanceCapacityKeys.size(), 0); - Map defaultInstanceCapacityMap = clusterConfig.getDefaultInstanceCapacityMap(); + Map defaultInstanceCapacityMap = helixClusterConfig.getDefaultInstanceCapacityMap(); assertEquals(defaultInstanceCapacityMap.size(), 0); - Map defaultPartitionWeightMap = clusterConfig.getDefaultPartitionWeightMap(); + Map defaultPartitionWeightMap = helixClusterConfig.getDefaultPartitionWeightMap(); assertEquals(defaultPartitionWeightMap.size(), 0); return null; }).when(zkHelixAdminClient).updateClusterConfigs(any(), any()); From a8e9c6bc61a2cfec58d899b99a4abb61ba80772a Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Fri, 31 Jan 2025 17:00:13 -0800 Subject: [PATCH 13/16] Fix bug --- .../venice/controller/ZkHelixAdminClient.java | 4 ++-- .../venice/controller/TestZkHelixAdminClient.java | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index f9f93636597..dc76eead708 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -95,12 +95,12 @@ public void createVeniceControllerCluster() { } ClusterConfig clusterConfig = new ClusterConfig(controllerClusterName); clusterConfig.getRecord().setBooleanField(ZKHelixManager.ALLOW_PARTICIPANT_AUTO_JOIN, true); - // Topology and fault zone type fields are used by CRUSH alg. Helix would apply the constrains on CRUSH alg to + // Topology and fault zone type fields are used by CRUSH alg. Helix would apply the constraints on CRUSH alg to // choose proper instance to hold the replica. clusterConfig.setTopologyAwareEnabled(false); clusterConfig.setPersistBestPossibleAssignment(true); - if (!commonConfig.getHelixGlobalRebalancePreference().isEmpty()) { + if (commonConfig.getHelixGlobalRebalancePreference() != null) { // We want to prioritize evenness over less movement when it comes to resource assignment, because the cost // of rebalancing for the controller is cheap as it is stateless. clusterConfig.setGlobalRebalancePreference(commonConfig.getHelixGlobalRebalancePreference()); diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index d74c11e6f51..a3b7def23d8 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -300,10 +300,18 @@ public void testRebalancePreferenceAndCapacityKeys() throws NoSuchFieldException } @Test - public void testUndefinedRebalancePreferenceAndCapacityKeys() { + public void testUndefinedRebalancePreferenceAndCapacityKeys() throws NoSuchFieldException, IllegalAccessException { when(zkHelixAdminClient.isVeniceControllerClusterCreated()).thenReturn(false); when(mockHelixAdmin.addCluster(VENICE_CONTROLLER_CLUSTER, false)).thenReturn(true); + Properties clusterProperties = getBaseSingleRegionProperties(false); + VeniceControllerClusterConfig clusterConfig = + new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + + Field commonConfigsField = ZkHelixAdminClient.class.getDeclaredField("commonConfig"); + commonConfigsField.setAccessible(true); + commonConfigsField.set(zkHelixAdminClient, clusterConfig); + doAnswer(invocation -> { String controllerClusterName = invocation.getArgument(0); ClusterConfig helixClusterConfig = invocation.getArgument(1); @@ -358,7 +366,6 @@ public void testPartiallyDefinedRebalancePreference() throws NoSuchFieldExceptio assertEquals(controllerClusterName, VENICE_CONTROLLER_CLUSTER); - // EVENNESS and LESS_MOVEMENT must be defined together. If not it will use Helix's default settings Map globalRebalancePreference = helixClusterConfig.getGlobalRebalancePreference(); assertFalse(globalRebalancePreference.containsKey(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS)); From 20fea5c53e814aa44740438b48b6cca07aec60b1 Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Mon, 10 Feb 2025 13:42:16 -0800 Subject: [PATCH 14/16] Address review comments --- .../venice/controller/ZkHelixAdminClient.java | 2 +- .../TestVeniceControllerClusterConfig.java | 61 ++++++++++--------- .../controller/TestZkHelixAdminClient.java | 4 +- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index dc76eead708..b2694a7b595 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -41,7 +41,7 @@ public class ZkHelixAdminClient implements HelixAdminClient { private static final String CONTROLLER_HAAS_ZK_CLIENT_NAME = "controller-zk-client-for-haas-admin"; private final HelixAdmin helixAdmin; - private final ConfigAccessor helixConfigAccessor; + ConfigAccessor helixConfigAccessor; private final VeniceControllerClusterConfig commonConfig; private final VeniceControllerMultiClusterConfig multiClusterConfigs; private final String haasSuperClusterName; diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java index c9e857e3745..a5d8e53f2eb 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java @@ -367,7 +367,6 @@ public void testUndefinedRebalancePreferenceAndCapacityKeys() { @Test public void testPartiallyDefinedRebalancePreferenceAndCapacityKeys() { - Properties clusterProperties = getBaseSingleRegionProperties(false); int helixRebalancePreferenceEvenness = 10; int helixRebalancePreferenceLessMovement = 2; @@ -376,38 +375,39 @@ public void testPartiallyDefinedRebalancePreferenceAndCapacityKeys() { int helixResourceCapacityWeight = 100; // EVENNESS must be defined with LESS_MOVEMENT - clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); + Properties clusterProperties1 = getBaseSingleRegionProperties(false); + clusterProperties1.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); assertThrows( ConfigurationException.class, - () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); - clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS); + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties1))); // LESS_MOVEMENT must be defined with EVENNESS - clusterProperties + Properties clusterProperties2 = getBaseSingleRegionProperties(false); + clusterProperties2 .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); assertThrows( ConfigurationException.class, - () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); - clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT); + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties2))); // You can set FORCE_BASELINE_CONVERGE without EVENNESS and LESS_MOVEMENT - clusterProperties.put( + Properties clusterProperties3 = getBaseSingleRegionProperties(false); + clusterProperties3.put( ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, helixRebalancePreferenceForceBaselineConverge); VeniceControllerClusterConfig clusterConfig = - new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties3)); Map helixGlobalRebalancePreference = clusterConfig.getHelixGlobalRebalancePreference(); assertEquals(helixGlobalRebalancePreference.size(), 1); assertEquals( (int) helixGlobalRebalancePreference.get(ClusterConfig.GlobalRebalancePreferenceKey.FORCE_BASELINE_CONVERGE), helixRebalancePreferenceForceBaselineConverge); - clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE); // You can set capacities without rebalance preference - clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); - clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); - clusterConfig = new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties)); + Properties clusterProperties4 = getBaseSingleRegionProperties(false); + clusterProperties4.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); + clusterProperties4.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); + clusterConfig = new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties4)); Map helixDefaultInstanceCapacityMap = clusterConfig.getHelixDefaultInstanceCapacityMap(); assertEquals( @@ -422,8 +422,6 @@ public void testPartiallyDefinedRebalancePreferenceAndCapacityKeys() { @Test public void testInvalidRebalancePreferenceAndCapacityKeys() { - Properties clusterProperties = getBaseSingleRegionProperties(false); - int helixRebalancePreferenceEvenness = 10; int helixRebalancePreferenceLessMovement = -1; int helixRebalancePreferenceForceBaselineConverge = -1; @@ -431,42 +429,47 @@ public void testInvalidRebalancePreferenceAndCapacityKeys() { int helixResourceCapacityWeight = 10; // Rebalance preference cannot be negative - clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); - clusterProperties + Properties clusterProperties1 = getBaseSingleRegionProperties(false); + clusterProperties1.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); + clusterProperties1 .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); assertThrows( ConfigurationException.class, - () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties1))); // Rebalance preference must be < 1000 - clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, 1001); + Properties clusterProperties2 = getBaseSingleRegionProperties(false); + clusterProperties2.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_EVENNESS, helixRebalancePreferenceEvenness); + clusterProperties2.put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, 1001); assertThrows( ConfigurationException.class, - () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties2))); // Rebalance preference cannot be negative - clusterProperties + Properties clusterProperties3 = getBaseSingleRegionProperties(false); + clusterProperties3 .put(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_LESS_MOVEMENT, helixRebalancePreferenceLessMovement); - clusterProperties.put( + clusterProperties3.put( ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE, helixRebalancePreferenceForceBaselineConverge); assertThrows( ConfigurationException.class, - () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); - clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_REBALANCE_PREFERENCE_FORCE_BASELINE_CONVERGE); + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties3))); // CONTROLLER_HELIX_INSTANCE_CAPACITY cannot be less than CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT - clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); - clusterProperties.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); + Properties clusterProperties4 = getBaseSingleRegionProperties(false); + clusterProperties4.put(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY, helixInstanceCapacity); + clusterProperties4.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); assertThrows( ConfigurationException.class, - () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties4))); // CONTROLLER_HELIX_INSTANCE_CAPACITY and CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT must be defined together - clusterProperties.remove(ConfigKeys.CONTROLLER_HELIX_INSTANCE_CAPACITY); + Properties clusterProperties5 = getBaseSingleRegionProperties(false); + clusterProperties5.put(ConfigKeys.CONTROLLER_HELIX_RESOURCE_CAPACITY_WEIGHT, helixResourceCapacityWeight); assertThrows( ConfigurationException.class, - () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties))); + () -> new VeniceControllerClusterConfig(new VeniceProperties(clusterProperties5))); } } diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index a3b7def23d8..359523dd1a1 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -59,9 +59,7 @@ public void setUp() throws NoSuchFieldException, IllegalAccessException { helixAdminField.setAccessible(true); helixAdminField.set(zkHelixAdminClient, mockHelixAdmin); - Field helixConfigAccessorField = ZkHelixAdminClient.class.getDeclaredField("helixConfigAccessor"); - helixConfigAccessorField.setAccessible(true); - helixConfigAccessorField.set(zkHelixAdminClient, mockHelixConfigAccessor); + zkHelixAdminClient.helixConfigAccessor = mockHelixConfigAccessor; Field multiClusterConfigsField = ZkHelixAdminClient.class.getDeclaredField("multiClusterConfigs"); multiClusterConfigsField.setAccessible(true); From 3f3020abd95ba484844882e69870cfce388f371f Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Mon, 10 Feb 2025 13:54:23 -0800 Subject: [PATCH 15/16] Fix broken test --- .../com/linkedin/venice/controller/TestHAASController.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java index 79d12131608..97958a7c627 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java @@ -293,7 +293,9 @@ public void testTransitionToHAASControllerAsStorageClusterLeader() { @Test(timeOut = 60 * Time.MS_PER_SECOND) public void testRebalancePreferenceAndCapacityKeys() { - try (VeniceClusterWrapper venice = ServiceFactory.getVeniceCluster(0, 0, 0, 1); + VeniceClusterCreateOptions options = + new VeniceClusterCreateOptions.Builder().numberOfControllers(0).numberOfServers(0).numberOfRouters(0).build(); + try (VeniceClusterWrapper venice = ServiceFactory.getVeniceCluster(options); HelixAsAServiceWrapper helixAsAServiceWrapper = startAndWaitForHAASToBeAvailable(venice.getZk().getAddress())) { String controllerClusterName = "venice-controllers"; From f9a009e7a742fcce27c55dd4afac264a5fec4bdb Mon Sep 17 00:00:00 2001 From: Koorous Vargha Date: Tue, 11 Feb 2025 09:46:06 -0800 Subject: [PATCH 16/16] Make helixConfigAccessor final --- .../com/linkedin/venice/controller/ZkHelixAdminClient.java | 2 +- .../linkedin/venice/controller/TestZkHelixAdminClient.java | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java index b2694a7b595..dc76eead708 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java @@ -41,7 +41,7 @@ public class ZkHelixAdminClient implements HelixAdminClient { private static final String CONTROLLER_HAAS_ZK_CLIENT_NAME = "controller-zk-client-for-haas-admin"; private final HelixAdmin helixAdmin; - ConfigAccessor helixConfigAccessor; + private final ConfigAccessor helixConfigAccessor; private final VeniceControllerClusterConfig commonConfig; private final VeniceControllerMultiClusterConfig multiClusterConfigs; private final String haasSuperClusterName; diff --git a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java index 359523dd1a1..a3b7def23d8 100644 --- a/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java +++ b/services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java @@ -59,7 +59,9 @@ public void setUp() throws NoSuchFieldException, IllegalAccessException { helixAdminField.setAccessible(true); helixAdminField.set(zkHelixAdminClient, mockHelixAdmin); - zkHelixAdminClient.helixConfigAccessor = mockHelixConfigAccessor; + Field helixConfigAccessorField = ZkHelixAdminClient.class.getDeclaredField("helixConfigAccessor"); + helixConfigAccessorField.setAccessible(true); + helixConfigAccessorField.set(zkHelixAdminClient, mockHelixConfigAccessor); Field multiClusterConfigsField = ZkHelixAdminClient.class.getDeclaredField("multiClusterConfigs"); multiClusterConfigsField.setAccessible(true);