Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[controller] Checking parent controller region state before handling requests for all routes #1075

Merged
merged 7 commits into from
Jul 29, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,17 @@ private ConfigKeys() {
/** Whether current controller is parent or not */
public static final String CONTROLLER_PARENT_MODE = "controller.parent.mode";

/**
* This config specifies the state of the region of the parent controller.
*
* The region can be in one of the following states:
* ACTIVE: the parent controller in the region is serving requests.
* PASSIVE: the parent controller in the region is rejecting requests.
*
* By default, this is set to ACTIVE.
*/
public static final String CONTROLLER_PARENT_REGION_STATE = "controller.parent.region.state";

/**
* This config is used to control how many errored topics we are going to keep in parent cluster.
* This is mostly used to investigate the Kafka missing message issue.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.eq;

import com.linkedin.venice.controller.ParentControllerRegionState;
import com.linkedin.venice.controller.VeniceHelixAdmin;
import com.linkedin.venice.controllerapi.ControllerApiConstants;
import com.linkedin.venice.controllerapi.ControllerRoute;
Expand Down Expand Up @@ -205,6 +206,7 @@ public void testAAIncrementalPushRTSourceRegion(boolean sourceGridFabricPresent,
mockStore.setIncrementalPushEnabled(true);
doReturn(mockStore).when(admin).getStore(anyString(), anyString());
doReturn(true).when(admin).isParent();
doReturn(ParentControllerRegionState.ACTIVE).when(admin).getParentControllerRegionState();
doReturn(true).when(admin).isLeaderControllerFor(anyString());
doReturn(1).when(admin).getReplicationFactor(anyString(), anyString());
doReturn(1).when(admin).calculateNumberOfPartitions(anyString(), anyString());
Expand Down Expand Up @@ -330,6 +332,7 @@ public void testSamzaReplicationPolicyMode(boolean samzaPolicy, boolean storePol
doReturn("kafka-bootstrap").when(admin).getKafkaBootstrapServers(anyBoolean());
doReturn("store_rt").when(admin).getRealTimeTopic(anyString(), anyString());
doReturn(samzaPolicy).when(admin).isParent();
doReturn(ParentControllerRegionState.ACTIVE).when(admin).getParentControllerRegionState();
doReturn(aaEnabled).when(admin).isActiveActiveReplicationEnabledInAllRegion(anyString(), anyString(), eq(true));
mockStore.setActiveActiveReplicationEnabled(aaEnabled);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,13 @@ void updateRoutersClusterConfig(
*/
boolean isParent();

/**
* Return the state of the region of the parent controller.
* @return {@link ParentControllerRegionState#ACTIVE} which means that the parent controller in the region is serving requests.
* Otherwise, return {@link ParentControllerRegionState#PASSIVE}
*/
ParentControllerRegionState getParentControllerRegionState();

/**
* Get child datacenter to child controller url mapping.
* @return A map of child datacenter -> child controller url
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.linkedin.venice.controller;

/**
* Enum representing the state of the region where the parent controller resides.
* i.e., Region dc-0 is ACTIVE while Region dc-1 is PASSIVE
* This means that ParentController in dc-0 is serving requests while ParentController in dc-1 is rejecting requests
*/
public enum ParentControllerRegionState {
/** The region is active, so the parent controller in the region is serving requests */
ACTIVE,
/**
* The region is passive, so the parent controller in the region is rejecting requests.
* This region is ready to take over if components in the currently active region fails
*/
PASSIVE
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import static com.linkedin.venice.ConfigKeys.CONTROLLER_NAME;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_EXTERNAL_SUPERSET_SCHEMA_GENERATION_ENABLED;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_MODE;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_REGION_STATE;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_SYSTEM_STORE_HEARTBEAT_CHECK_WAIT_TIME_SECONDS;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_SYSTEM_STORE_REPAIR_CHECK_INTERVAL_SECONDS;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_SYSTEM_STORE_REPAIR_RETRY_COUNT;
Expand Down Expand Up @@ -157,6 +158,7 @@
import static com.linkedin.venice.SSLConfig.DEFAULT_CONTROLLER_SSL_ENABLED;
import static com.linkedin.venice.VeniceConstants.DEFAULT_PER_ROUTER_READ_QUOTA;
import static com.linkedin.venice.VeniceConstants.DEFAULT_SSL_FACTORY_CLASS_NAME;
import static com.linkedin.venice.controller.ParentControllerRegionState.ACTIVE;
import static com.linkedin.venice.pubsub.PubSubConstants.DEFAULT_KAFKA_REPLICATION_FACTOR;
import static com.linkedin.venice.pubsub.PubSubConstants.PUBSUB_TOPIC_MANAGER_METADATA_FETCHER_CONSUMER_POOL_SIZE_DEFAULT_VALUE;
import static com.linkedin.venice.utils.ByteUtils.BYTES_PER_MB;
Expand Down Expand Up @@ -221,6 +223,7 @@ public class VeniceControllerClusterConfig {
private final String controllerClusterZkAddress;
private final boolean multiRegion;
private final boolean parent;
private final ParentControllerRegionState parentControllerRegionState;
private final Map<String, String> childDataCenterControllerUrlMap;
private final String d2ServiceName;
private final String clusterDiscoveryD2ServiceName;
Expand Down Expand Up @@ -631,6 +634,8 @@ public VeniceControllerClusterConfig(VeniceProperties props) {
this.controllerClusterReplica = props.getInt(CONTROLLER_CLUSTER_REPLICA, 3);
this.controllerClusterZkAddress = props.getString(CONTROLLER_CLUSTER_ZK_ADDRESSS, getZkAddress());
this.parent = props.getBoolean(CONTROLLER_PARENT_MODE, false);
this.parentControllerRegionState =
ParentControllerRegionState.valueOf(props.getString(CONTROLLER_PARENT_REGION_STATE, ACTIVE.name()));

if (childDatacenters.isEmpty()) {
this.childDataCenterControllerUrlMap = Collections.emptyMap();
Expand Down Expand Up @@ -1162,6 +1167,10 @@ public boolean isParent() {
return parent;
}

public ParentControllerRegionState getParentControllerRegionState() {
return parentControllerRegionState;
}

public long getDeprecatedJobTopicRetentionMs() {
return deprecatedJobTopicRetentionMs;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ public boolean isParent() {
return getCommonConfig().isParent();
}

public ParentControllerRegionState getParentControllerRegionState() {
return getCommonConfig().getParentControllerRegionState();
}

public String getControllerName() {
return getCommonConfig().getControllerName();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7553,6 +7553,14 @@ public boolean isParent() {
return multiClusterConfigs.isParent();
}

/**
* @see Admin#getParentControllerRegionState()
*/
@Override
public ParentControllerRegionState getParentControllerRegionState() {
return multiClusterConfigs.getParentControllerRegionState();
}

/**
* @see Admin#getChildDataCenterControllerUrlMap(String)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4871,6 +4871,14 @@ public boolean isParent() {
return getVeniceHelixAdmin().isParent();
}

/**
* @see Admin#getParentControllerRegionState()
*/
@Override
public ParentControllerRegionState getParentControllerRegionState() {
return getVeniceHelixAdmin().getParentControllerRegionState();
}

/**
* @see Admin#getChildDataCenterControllerUrlMap(String)
*/
Expand Down
Loading
Loading