Skip to content

Commit 4cf1d0a

Browse files
[controller] Enable configuring a customized health check URL for storage clusters (#1273)
Helix has added a new functionality that allows configuring a customized health check endpoint that Helix will invoke to decide on the health of the cluster. This URL can be configured by setting the controller config `controller.helix.rest.customized.health.url`
1 parent ddaa0bb commit 4cf1d0a

File tree

9 files changed

+119
-18
lines changed

9 files changed

+119
-18
lines changed

internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,12 @@ private ConfigKeys() {
350350
*/
351351
public static final String CONTROLLER_HELIX_CLOUD_INFO_PROCESSOR_NAME = "controller.helix.cloud.info.processor.name";
352352

353+
/**
354+
* Base URL for customized health checks triggered by Helix. Default is empty string.
355+
*/
356+
public static final String CONTROLLER_HELIX_REST_CUSTOMIZED_HEALTH_URL =
357+
"controller.helix.rest.customized.health.url";
358+
353359
/**
354360
* Whether to enable graveyard cleanup for batch-only store at cluster level. Default is false.
355361
*/

internal/venice-common/src/main/java/com/linkedin/venice/controllerapi/AggregatedHealthStatusRequest.java

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
package com.linkedin.venice.controllerapi;
22

3+
import static com.linkedin.venice.controllerapi.ControllerApiConstants.CLUSTER_ID;
4+
import static com.linkedin.venice.controllerapi.ControllerApiConstants.INSTANCES;
5+
import static com.linkedin.venice.controllerapi.ControllerApiConstants.TO_BE_STOPPED_INSTANCES;
6+
37
import com.fasterxml.jackson.annotation.JsonCreator;
48
import com.fasterxml.jackson.annotation.JsonProperty;
59
import java.util.Collections;
@@ -13,16 +17,16 @@ public class AggregatedHealthStatusRequest {
1317

1418
@JsonCreator
1519
public AggregatedHealthStatusRequest(
16-
@JsonProperty("cluster_id") String cluster_id,
17-
@JsonProperty("instances") List<String> instances,
18-
@JsonProperty("to_be_stopped_instances") List<String> to_be_stopped_instances) {
20+
@JsonProperty(CLUSTER_ID) String cluster_id,
21+
@JsonProperty(INSTANCES) List<String> instances,
22+
@JsonProperty(TO_BE_STOPPED_INSTANCES) List<String> to_be_stopped_instances) {
1923
if (cluster_id == null) {
20-
throw new IllegalArgumentException("'cluster_id' is required");
24+
throw new IllegalArgumentException("'" + CLUSTER_ID + "' is required");
2125
}
2226
this.cluster_id = cluster_id;
2327

2428
if (instances == null) {
25-
throw new IllegalArgumentException("'instances' is required");
29+
throw new IllegalArgumentException("'" + INSTANCES + "' is required");
2630
}
2731
this.instances = instances;
2832

@@ -33,17 +37,17 @@ public AggregatedHealthStatusRequest(
3337
}
3438
}
3539

36-
@JsonProperty("cluster_id")
40+
@JsonProperty(CLUSTER_ID)
3741
public String getClusterId() {
3842
return cluster_id;
3943
}
4044

41-
@JsonProperty("instances")
45+
@JsonProperty(INSTANCES)
4246
public List<String> getInstances() {
4347
return instances;
4448
}
4549

46-
@JsonProperty("to_be_stopped_instances")
50+
@JsonProperty(TO_BE_STOPPED_INSTANCES)
4751
public List<String> getToBeStoppedInstances() {
4852
return to_be_stopped_instances;
4953
}

internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/controller/TestHAASController.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ public Void call() {
272272
client.addClusterToGrandCluster("venice-controllers");
273273
for (int i = 0; i < 10; i++) {
274274
String clusterName = "cluster-" + i;
275-
client.createVeniceStorageCluster(clusterName, new ClusterConfig(clusterName));
275+
client.createVeniceStorageCluster(clusterName, new ClusterConfig(clusterName), null);
276276
client.addClusterToGrandCluster(clusterName);
277277
client.addVeniceStorageClusterToControllerCluster(clusterName);
278278
}

services/venice-controller/src/main/java/com/linkedin/venice/controller/HelixAdminClient.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.util.List;
44
import java.util.Map;
55
import org.apache.helix.model.ClusterConfig;
6+
import org.apache.helix.model.RESTConfig;
67

78

89
/**
@@ -31,8 +32,9 @@ public interface HelixAdminClient {
3132
* Create and configure the Venice storage cluster.
3233
* @param clusterName of the Venice storage cluster.
3334
* @param clusterConfig {@link ClusterConfig} for the new cluster.
35+
* @param restConfig {@link RESTConfig} for the new cluster.
3436
*/
35-
void createVeniceStorageCluster(String clusterName, ClusterConfig clusterConfig);
37+
void createVeniceStorageCluster(String clusterName, ClusterConfig clusterConfig, RESTConfig restConfig);
3638

3739
/**
3840
* Check if the given Venice storage cluster's cluster resource is in the Venice controller cluster.
@@ -67,6 +69,13 @@ public interface HelixAdminClient {
6769
*/
6870
void updateClusterConfigs(String clusterName, ClusterConfig clusterConfig);
6971

72+
/**
73+
* Update some Helix cluster properties for the given cluster.
74+
* @param clusterName of the cluster to be updated.
75+
* @param restConfig {@link RESTConfig} for the new cluster.
76+
*/
77+
void updateRESTConfigs(String clusterName, RESTConfig restConfig);
78+
7079
/**
7180
* Disable or enable a list of partitions on an instance.
7281
*/

services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceControllerClusterConfig.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_INFO_PROCESSOR_NAME;
5353
import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_INFO_SOURCES;
5454
import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_PROVIDER;
55+
import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REST_CUSTOMIZED_HEALTH_URL;
5556
import static com.linkedin.venice.ConfigKeys.CONTROLLER_INSTANCE_TAG_LIST;
5657
import static com.linkedin.venice.ConfigKeys.CONTROLLER_JETTY_CONFIG_OVERRIDE_PREFIX;
5758
import static com.linkedin.venice.ConfigKeys.CONTROLLER_MIN_SCHEMA_COUNT_TO_KEEP;
@@ -368,6 +369,8 @@ public class VeniceControllerClusterConfig {
368369
private final boolean storageClusterHelixCloudEnabled;
369370
private final CloudConfig helixCloudConfig;
370371

372+
private final String helixRestCustomizedHealthUrl;
373+
371374
private final boolean usePushStatusStoreForIncrementalPushStatusReads;
372375

373376
private final long metaStoreWriterCloseTimeoutInMS;
@@ -930,6 +933,8 @@ public VeniceControllerClusterConfig(VeniceProperties props) {
930933
helixCloudConfig = null;
931934
}
932935

936+
this.helixRestCustomizedHealthUrl = props.getString(CONTROLLER_HELIX_REST_CUSTOMIZED_HEALTH_URL, "");
937+
933938
this.unregisterMetricForDeletedStoreEnabled = props.getBoolean(UNREGISTER_METRIC_FOR_DELETED_STORE_ENABLED, false);
934939
this.identityParserClassName = props.getString(IDENTITY_PARSER_CLASS, DefaultIdentityParser.class.getName());
935940
this.storeGraveyardCleanupEnabled = props.getBoolean(CONTROLLER_STORE_GRAVEYARD_CLEANUP_ENABLED, false);
@@ -1580,6 +1585,10 @@ public CloudConfig getHelixCloudConfig() {
15801585
return helixCloudConfig;
15811586
}
15821587

1588+
public String getHelixRestCustomizedHealthUrl() {
1589+
return helixRestCustomizedHealthUrl;
1590+
}
1591+
15831592
public boolean usePushStatusStoreForIncrementalPush() {
15841593
return usePushStatusStoreForIncrementalPushStatusReads;
15851594
}

services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceHelixAdmin.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@
274274
import org.apache.helix.model.LeaderStandbySMD;
275275
import org.apache.helix.model.LiveInstance;
276276
import org.apache.helix.model.MaintenanceSignal;
277+
import org.apache.helix.model.RESTConfig;
277278
import org.apache.helix.model.builder.HelixConfigScopeBuilder;
278279
import org.apache.helix.participant.StateMachineEngine;
279280
import org.apache.helix.zookeeper.datamodel.ZNRecord;
@@ -6298,7 +6299,13 @@ private void setupStorageClusterAsNeeded(String clusterName) {
62986299
helixClusterConfig.setTopology("/" + HelixUtils.TOPOLOGY_CONSTRAINT);
62996300
helixClusterConfig.setFaultZoneType(HelixUtils.TOPOLOGY_CONSTRAINT);
63006301

6301-
helixAdminClient.createVeniceStorageCluster(clusterName, helixClusterConfig);
6302+
RESTConfig restConfig = null;
6303+
if (!StringUtils.isEmpty(clusterConfigs.getHelixRestCustomizedHealthUrl())) {
6304+
restConfig = new RESTConfig(clusterName);
6305+
restConfig.set(RESTConfig.SimpleFields.CUSTOMIZED_HEALTH_URL, clusterConfigs.getHelixRestCustomizedHealthUrl());
6306+
}
6307+
6308+
helixAdminClient.createVeniceStorageCluster(clusterName, helixClusterConfig, restConfig);
63026309
}
63036310
if (!helixAdminClient.isClusterInGrandCluster(clusterName)) {
63046311
helixAdminClient.addClusterToGrandCluster(clusterName);

services/venice-controller/src/main/java/com/linkedin/venice/controller/ZkHelixAdminClient.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.helix.model.IdealState;
2525
import org.apache.helix.model.InstanceConfig;
2626
import org.apache.helix.model.LeaderStandbySMD;
27+
import org.apache.helix.model.RESTConfig;
2728
import org.apache.helix.model.builder.HelixConfigScopeBuilder;
2829
import org.apache.helix.zookeeper.impl.client.ZkClient;
2930
import org.apache.logging.log4j.LogManager;
@@ -114,10 +115,10 @@ public void createVeniceControllerCluster() {
114115
}
115116

116117
/**
117-
* @see HelixAdminClient#createVeniceStorageCluster(String, ClusterConfig)
118+
* @see HelixAdminClient#createVeniceStorageCluster(String, ClusterConfig, RESTConfig)
118119
*/
119120
@Override
120-
public void createVeniceStorageCluster(String clusterName, ClusterConfig helixClusterConfig) {
121+
public void createVeniceStorageCluster(String clusterName, ClusterConfig helixClusterConfig, RESTConfig restConfig) {
121122
boolean success = RetryUtils.executeWithMaxAttempt(() -> {
122123
if (!isVeniceStorageClusterCreated(clusterName)) {
123124
if (!helixAdmin.addCluster(clusterName, false)) {
@@ -130,6 +131,10 @@ public void createVeniceStorageCluster(String clusterName, ClusterConfig helixCl
130131
if (clusterConfig.isStorageClusterHelixCloudEnabled()) {
131132
helixAdmin.addCloudConfig(clusterName, clusterConfig.getHelixCloudConfig());
132133
}
134+
135+
if (restConfig != null) {
136+
updateRESTConfigs(clusterName, restConfig);
137+
}
133138
}
134139
return true;
135140
}, 3, Duration.ofSeconds(5), Collections.singletonList(Exception.class));
@@ -215,6 +220,17 @@ public void updateClusterConfigs(String clusterName, ClusterConfig clusterConfig
215220
helixAdmin.setConfig(configScope, helixClusterProperties);
216221
}
217222

223+
/**
224+
* @see HelixAdminClient#updateRESTConfigs(String, RESTConfig)
225+
*/
226+
@Override
227+
public void updateRESTConfigs(String clusterName, RESTConfig restConfig) {
228+
HelixConfigScope configScope =
229+
new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.REST).forCluster(clusterName).build();
230+
Map<String, String> helixRestProperties = new HashMap<>(restConfig.getRecord().getSimpleFields());
231+
helixAdmin.setConfig(configScope, helixRestProperties);
232+
}
233+
218234
/**
219235
* @see HelixAdminClient#enablePartition(boolean, String, String, String, List)
220236
*/

services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceControllerClusterConfig.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_INFO_PROCESSOR_NAME;
1515
import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_INFO_SOURCES;
1616
import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_CLOUD_PROVIDER;
17+
import static com.linkedin.venice.ConfigKeys.CONTROLLER_HELIX_REST_CUSTOMIZED_HEALTH_URL;
1718
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_MODE;
1819
import static com.linkedin.venice.ConfigKeys.CONTROLLER_SSL_ENABLED;
1920
import static com.linkedin.venice.ConfigKeys.CONTROLLER_STORAGE_CLUSTER_HELIX_CLOUD_ENABLED;
@@ -283,4 +284,15 @@ private void validateCloudConfig(
283284
assertEquals(cloudConfig.getCloudInfoProcessorName(), processorName);
284285
assertEquals(cloudConfig.getCloudInfoSources(), cloudInfoSources);
285286
}
287+
288+
@Test
289+
public void testHelixRestCustomizedHealthUrl() {
290+
Properties baseProps = getBaseSingleRegionProperties(false);
291+
292+
String healthUrl = "http://localhost:8080/health";
293+
baseProps.setProperty(CONTROLLER_HELIX_REST_CUSTOMIZED_HEALTH_URL, healthUrl);
294+
295+
VeniceControllerClusterConfig clusterConfig = new VeniceControllerClusterConfig(new VeniceProperties(baseProps));
296+
assertEquals(clusterConfig.getHelixRestCustomizedHealthUrl(), healthUrl);
297+
}
286298
}

services/venice-controller/src/test/java/com/linkedin/venice/controller/TestZkHelixAdminClient.java

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.helix.model.ClusterConfig;
2727
import org.apache.helix.model.HelixConfigScope;
2828
import org.apache.helix.model.IdealState;
29+
import org.apache.helix.model.RESTConfig;
2930
import org.testng.annotations.BeforeMethod;
3031
import org.testng.annotations.Test;
3132

@@ -120,25 +121,35 @@ public void testCreateVeniceStorageCluster() {
120121
when(mockMultiClusterConfigs.getControllerConfig(clusterName)).thenReturn(mockClusterConfig);
121122

122123
doReturn(true).when(mockHelixAdmin).addCluster(clusterName, false);
123-
doCallRealMethod().when(zkHelixAdminClient).createVeniceStorageCluster(any(), any());
124+
doCallRealMethod().when(zkHelixAdminClient).createVeniceStorageCluster(any(), any(), any());
124125

125-
// When the cluster is not Helix cloud enabled
126126
ClusterConfig helixClusterConfig = mock(ClusterConfig.class);
127-
zkHelixAdminClient.createVeniceStorageCluster(clusterName, helixClusterConfig);
127+
zkHelixAdminClient.createVeniceStorageCluster(clusterName, helixClusterConfig, null);
128128

129129
verify(zkHelixAdminClient).updateClusterConfigs(clusterName, helixClusterConfig);
130130
verify(mockHelixAdmin, never()).addCloudConfig(any(), any());
131+
verify(zkHelixAdminClient, never()).updateRESTConfigs(any(), any());
131132

132133
clearInvocations(zkHelixAdminClient);
133134

134-
// When the cluster is Helix cloud enabled
135135
doReturn(true).when(mockClusterConfig).isStorageClusterHelixCloudEnabled();
136136
CloudConfig cloudConfig = mock(CloudConfig.class);
137137
doReturn(cloudConfig).when(mockClusterConfig).getHelixCloudConfig();
138-
zkHelixAdminClient.createVeniceStorageCluster(clusterName, helixClusterConfig);
138+
zkHelixAdminClient.createVeniceStorageCluster(clusterName, helixClusterConfig, null);
139139

140140
verify(zkHelixAdminClient).updateClusterConfigs(clusterName, helixClusterConfig);
141141
verify(mockHelixAdmin).addCloudConfig(clusterName, cloudConfig);
142+
verify(zkHelixAdminClient, never()).updateRESTConfigs(any(), any());
143+
144+
clearInvocations(zkHelixAdminClient, mockHelixAdmin);
145+
doReturn(false).when(mockClusterConfig).isStorageClusterHelixCloudEnabled();
146+
147+
RESTConfig restConfig = mock(RESTConfig.class);
148+
zkHelixAdminClient.createVeniceStorageCluster(clusterName, helixClusterConfig, restConfig);
149+
150+
verify(zkHelixAdminClient).updateClusterConfigs(clusterName, helixClusterConfig);
151+
verify(mockHelixAdmin, never()).addCloudConfig(any(), any());
152+
verify(zkHelixAdminClient).updateRESTConfigs(clusterName, restConfig);
142153
}
143154

144155
@Test
@@ -183,4 +194,31 @@ public void testUpdateClusterConfigs() {
183194

184195
zkHelixAdminClient.updateClusterConfigs(clusterName, clusterConfig);
185196
}
197+
198+
@Test
199+
public void testUpdateRESTConfigs() {
200+
doCallRealMethod().when(zkHelixAdminClient).updateRESTConfigs(anyString(), any());
201+
202+
String clusterName = "testCluster";
203+
String restUrl = "http://localhost:8080";
204+
RESTConfig restConfig = new RESTConfig(clusterName);
205+
206+
restConfig.set(RESTConfig.SimpleFields.CUSTOMIZED_HEALTH_URL, restUrl);
207+
restConfig.getRecord().setSimpleField("FIELD1", "VALUE1");
208+
209+
doAnswer(invocation -> {
210+
HelixConfigScope scope = invocation.getArgument(0);
211+
Map<String, String> restProps = invocation.getArgument(1);
212+
213+
assertEquals(scope.getType(), HelixConfigScope.ConfigScopeProperty.REST);
214+
assertEquals(scope.getClusterName(), clusterName);
215+
assertEquals(restProps.size(), 2);
216+
assertEquals(restProps.get(RESTConfig.SimpleFields.CUSTOMIZED_HEALTH_URL.name()), restUrl);
217+
assertEquals(restProps.get("FIELD1"), "VALUE1");
218+
219+
return null;
220+
}).when(mockHelixAdmin).setConfig(any(), any());
221+
222+
zkHelixAdminClient.updateRESTConfigs(clusterName, restConfig);
223+
}
186224
}

0 commit comments

Comments
 (0)