Skip to content

Commit a7dbb18

Browse files
committed
add delayed ingestion in dvc for target region push
1 parent 2a34ca2 commit a7dbb18

File tree

4 files changed

+211
-126
lines changed

4 files changed

+211
-126
lines changed

clients/da-vinci-client/src/main/java/com/linkedin/davinci/StoreBackend.java

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,19 @@
44
import com.linkedin.venice.exceptions.VeniceException;
55
import com.linkedin.venice.meta.Store;
66
import com.linkedin.venice.meta.Version;
7+
import com.linkedin.venice.meta.VersionStatus;
78
import com.linkedin.venice.serialization.AvroStoreDeserializerCache;
89
import com.linkedin.venice.serialization.StoreDeserializerCache;
910
import com.linkedin.venice.utils.ComplementSet;
1011
import com.linkedin.venice.utils.ConcurrentRef;
1112
import com.linkedin.venice.utils.ReferenceCounted;
13+
import com.linkedin.venice.utils.RegionUtils;
1214
import java.util.HashSet;
1315
import java.util.Optional;
1416
import java.util.Set;
1517
import java.util.concurrent.CompletableFuture;
1618
import java.util.concurrent.CompletionException;
19+
import org.apache.commons.lang.StringUtils;
1720
import org.apache.logging.log4j.LogManager;
1821
import org.apache.logging.log4j.Logger;
1922

@@ -240,9 +243,24 @@ synchronized void trySubscribeDaVinciFutureVersion() {
240243
} else {
241244
return;
242245
}
243-
LOGGER.info("Subscribing to future version {}", targetVersion.kafkaTopicName());
244-
setDaVinciFutureVersion(new VersionBackend(backend, targetVersion, stats));
245-
daVinciFutureVersion.subscribe(subscription).whenComplete((v, e) -> trySwapDaVinciCurrentVersion(e));
246+
247+
Store store = backend.getStoreRepository().getStoreOrThrow(storeName);
248+
Set<String> targetRegions = RegionUtils.parseRegionsFilterList(store.getTargetSwapRegion());
249+
String currentRegion = backend.getConfigLoader().getVeniceServerConfig().getRegionName();
250+
boolean isTargetRegionEnabled = !StringUtils.isEmpty(store.getTargetSwapRegion());
251+
boolean startIngestionInNonTargetRegion =
252+
isTargetRegionEnabled && targetVersion.getStatus() == VersionStatus.ONLINE;
253+
254+
// Subscribe to the future version if:
255+
// 1. Target region push with delayed ingestion is not enabled
256+
// 2. Target region push with delayed ingestion is enabled and the current region is a target region
257+
// 3. Target region push with delayed ingestion is enabled and the current region is a non target region
258+
// and the wait time has elapsed. The wait time has elapsed when the version status is marked ONLINE
259+
if (targetRegions.contains(currentRegion) || startIngestionInNonTargetRegion || !isTargetRegionEnabled) {
260+
LOGGER.info("Subscribing to future version {}", targetVersion.kafkaTopicName());
261+
setDaVinciFutureVersion(new VersionBackend(backend, targetVersion, stats));
262+
daVinciFutureVersion.subscribe(subscription).whenComplete((v, e) -> trySwapDaVinciCurrentVersion(e));
263+
}
246264
}
247265

248266
/**
@@ -373,4 +391,8 @@ private void swapCurrentVersion() {
373391
public StoreDeserializerCache getStoreDeserializerCache() {
374392
return storeDeserializerCache;
375393
}
394+
395+
public String getStoreName() {
396+
return storeName;
397+
}
376398
}

internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestDeferredVersionSwap.java

Lines changed: 154 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
11
package com.linkedin.venice.endToEnd;
22

3+
import static com.linkedin.venice.ConfigKeys.CLIENT_SYSTEM_STORE_REPOSITORY_REFRESH_INTERVAL_SECONDS;
34
import static com.linkedin.venice.ConfigKeys.CONTROLLER_DEFERRED_VERSION_SWAP_SERVICE_ENABLED;
45
import static com.linkedin.venice.ConfigKeys.CONTROLLER_DEFERRED_VERSION_SWAP_SLEEP_MS;
6+
import static com.linkedin.venice.ConfigKeys.DATA_BASE_PATH;
7+
import static com.linkedin.venice.ConfigKeys.LOCAL_REGION_NAME;
58
import static com.linkedin.venice.utils.IntegrationTestPushUtils.createStoreForJob;
69
import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V3_SCHEMA;
710
import static com.linkedin.venice.utils.TestWriteUtils.getTempDataDirectory;
811
import static com.linkedin.venice.vpj.VenicePushJobConstants.TARGETED_REGION_PUSH_WITH_DEFERRED_SWAP;
12+
import static org.testng.Assert.assertNotNull;
13+
import static org.testng.Assert.assertNull;
914

15+
import com.linkedin.davinci.client.DaVinciClient;
16+
import com.linkedin.davinci.client.DaVinciConfig;
1017
import com.linkedin.venice.controllerapi.ControllerClient;
1118
import com.linkedin.venice.controllerapi.UpdateStoreQueryParams;
19+
import com.linkedin.venice.integration.utils.DaVinciTestContext;
1220
import com.linkedin.venice.integration.utils.ServiceFactory;
21+
import com.linkedin.venice.integration.utils.VeniceClusterWrapper;
22+
import com.linkedin.venice.integration.utils.VeniceMultiClusterWrapper;
1323
import com.linkedin.venice.integration.utils.VeniceMultiRegionClusterCreateOptions;
1424
import com.linkedin.venice.integration.utils.VeniceTwoLayerMultiRegionMultiClusterWrapper;
1525
import com.linkedin.venice.meta.StoreInfo;
@@ -19,12 +29,16 @@
1929
import com.linkedin.venice.utils.TestUtils;
2030
import com.linkedin.venice.utils.TestWriteUtils;
2131
import com.linkedin.venice.utils.Utils;
32+
import com.linkedin.venice.utils.VeniceProperties;
2233
import java.io.File;
2334
import java.io.IOException;
35+
import java.util.List;
2436
import java.util.Map;
2537
import java.util.Properties;
2638
import java.util.concurrent.TimeUnit;
2739
import java.util.stream.IntStream;
40+
import org.apache.logging.log4j.LogManager;
41+
import org.apache.logging.log4j.Logger;
2842
import org.testng.Assert;
2943
import org.testng.annotations.AfterClass;
3044
import org.testng.annotations.BeforeClass;
@@ -40,6 +54,9 @@ public class TestDeferredVersionSwap {
4054
private static final String[] CLUSTER_NAMES =
4155
IntStream.range(0, NUMBER_OF_CLUSTERS).mapToObj(i -> "venice-cluster" + i).toArray(String[]::new);
4256

57+
private static final int TEST_TIMEOUT = 120_000;
58+
private static final Logger LOGGER = LogManager.getLogger(TestDeferredVersionSwap.class);
59+
4360
@BeforeClass
4461
public void setUp() {
4562
Properties controllerProps = new Properties();
@@ -68,7 +85,7 @@ public void cleanUp() {
6885
Utils.closeQuietlyWithErrorLogged(multiRegionMultiClusterWrapper);
6986
}
7087

71-
@Test
88+
@Test(timeOut = TEST_TIMEOUT)
7289
public void testDeferredVersionSwap() throws IOException {
7390
File inputDir = getTempDataDirectory();
7491
TestWriteUtils.writeSimpleAvroFileWithStringToV3Schema(inputDir, 100, 100);
@@ -131,4 +148,140 @@ public void testDeferredVersionSwap() throws IOException {
131148
}
132149
}
133150

151+
@Test(timeOut = TEST_TIMEOUT * 2)
152+
public void testDvcDelayedIngestionWithTargetRegion() throws Exception {
153+
// Setup job properties
154+
UpdateStoreQueryParams storeParms = new UpdateStoreQueryParams().setUnusedSchemaDeletionEnabled(true);
155+
storeParms.setTargetRegionSwapWaitTime(1);
156+
storeParms.setTargetRegionSwap(TARGET_REGION);
157+
String parentControllerURLs = multiRegionMultiClusterWrapper.getControllerConnectString();
158+
String keySchemaStr = "\"int\"";
159+
String valueSchemaStr = "\"int\"";
160+
161+
// Create store + start a normal push
162+
int keyCount = 100;
163+
File inputDir = getTempDataDirectory();
164+
TestWriteUtils.writeSimpleAvroFileWithIntToIntSchema(inputDir, keyCount);
165+
String inputDirPath = "file://" + inputDir.getAbsolutePath();
166+
String storeName = Utils.getUniqueString("testDvcDelayedIngestionWithTargetRegion");
167+
Properties props =
168+
IntegrationTestPushUtils.defaultVPJProps(multiRegionMultiClusterWrapper, inputDirPath, storeName);
169+
try (ControllerClient parentControllerClient = new ControllerClient(CLUSTER_NAMES[0], parentControllerURLs)) {
170+
createStoreForJob(CLUSTER_NAMES[0], keySchemaStr, valueSchemaStr, props, storeParms).close();
171+
LOGGER.info("DvcDeferredVersionSwap starting normal push job");
172+
TestWriteUtils.runPushJob("Test push job", props);
173+
TestUtils.waitForNonDeterministicPushCompletion(
174+
Version.composeKafkaTopic(storeName, 1),
175+
parentControllerClient,
176+
30,
177+
TimeUnit.SECONDS);
178+
179+
// Version should only be swapped in all regions
180+
TestUtils.waitForNonDeterministicAssertion(1, TimeUnit.MINUTES, () -> {
181+
Map<String, Integer> coloVersions =
182+
parentControllerClient.getStore(storeName).getStore().getColoToCurrentVersions();
183+
184+
coloVersions.forEach((colo, version) -> {
185+
Assert.assertEquals((int) version, 1);
186+
});
187+
});
188+
}
189+
190+
// Create dvc client in target region
191+
List<VeniceMultiClusterWrapper> childDatacenters = multiRegionMultiClusterWrapper.getChildRegions();
192+
VeniceClusterWrapper cluster1 = childDatacenters.get(0).getClusters().get(CLUSTER_NAMES[0]);
193+
VeniceProperties backendConfig = DaVinciTestContext.getDaVinciPropertyBuilder(cluster1.getZk().getAddress())
194+
.put(DATA_BASE_PATH, Utils.getTempDataDirectory().getAbsolutePath())
195+
.put(LOCAL_REGION_NAME, TARGET_REGION)
196+
.put(CLIENT_SYSTEM_STORE_REPOSITORY_REFRESH_INTERVAL_SECONDS, 1)
197+
.build();
198+
DaVinciClient<Object, Object> client1 =
199+
ServiceFactory.getGenericAvroDaVinciClient(storeName, cluster1, new DaVinciConfig(), backendConfig);
200+
client1.subscribeAll().get();
201+
202+
// Check that v1 is ingested
203+
for (int i = 1; i <= keyCount; i++) {
204+
assertNotNull(client1.get(i).get());
205+
}
206+
207+
// Do another push with target region enabled
208+
int keyCount2 = 200;
209+
File inputDir2 = getTempDataDirectory();
210+
String inputDirPath2 = "file://" + inputDir2.getAbsolutePath();
211+
TestWriteUtils.writeSimpleAvroFileWithIntToIntSchema(inputDir2, keyCount2);
212+
Properties props2 =
213+
IntegrationTestPushUtils.defaultVPJProps(multiRegionMultiClusterWrapper, inputDirPath2, storeName);
214+
try (ControllerClient parentControllerClient = new ControllerClient(CLUSTER_NAMES[0], parentControllerURLs)) {
215+
props2.put(TARGETED_REGION_PUSH_WITH_DEFERRED_SWAP, true);
216+
LOGGER.info("DvcDeferredVersionSwap starting target region push job");
217+
TestWriteUtils.runPushJob("Test push job", props2);
218+
TestUtils.waitForNonDeterministicPushCompletion(
219+
Version.composeKafkaTopic(storeName, 2),
220+
parentControllerClient,
221+
30,
222+
TimeUnit.SECONDS);
223+
224+
// Version should only be swapped in the target region
225+
TestUtils.waitForNonDeterministicAssertion(1, TimeUnit.MINUTES, () -> {
226+
Map<String, Integer> coloVersions =
227+
parentControllerClient.getStore(storeName).getStore().getColoToCurrentVersions();
228+
229+
coloVersions.forEach((colo, version) -> {
230+
if (colo.equals(TARGET_REGION)) {
231+
Assert.assertEquals((int) version, 2);
232+
} else {
233+
Assert.assertEquals((int) version, 1);
234+
}
235+
});
236+
});
237+
238+
// Data should be automatically ingested in target region for dvc
239+
TestUtils.waitForNonDeterministicAssertion(30, TimeUnit.SECONDS, () -> {
240+
for (int i = 101; i <= keyCount2; i++) {
241+
assertNotNull(client1.get(i).get());
242+
}
243+
});
244+
245+
// Close dvc client in target region
246+
client1.close();
247+
248+
// Create dvc client in non target region
249+
VeniceClusterWrapper cluster2 = childDatacenters.get(1).getClusters().get(CLUSTER_NAMES[0]);
250+
VeniceProperties backendConfig2 = DaVinciTestContext.getDaVinciPropertyBuilder(cluster2.getZk().getAddress())
251+
.put(DATA_BASE_PATH, Utils.getTempDataDirectory().getAbsolutePath())
252+
.put(LOCAL_REGION_NAME, "dc-1")
253+
.put(CLIENT_SYSTEM_STORE_REPOSITORY_REFRESH_INTERVAL_SECONDS, 1)
254+
.build();
255+
DaVinciClient<Object, Object> client2 =
256+
ServiceFactory.getGenericAvroDaVinciClient(storeName, cluster2, new DaVinciConfig(), backendConfig2);
257+
client2.subscribeAll().get();
258+
259+
// Check that v2 is not ingested
260+
TestUtils.waitForNonDeterministicAssertion(30, TimeUnit.SECONDS, () -> {
261+
for (int i = 101; i <= keyCount2; i++) {
262+
assertNull(client2.get(i).get());
263+
}
264+
});
265+
266+
// Version should be swapped in all regions
267+
LOGGER.info("DvcDeferredVersionSwap check that target region push is complete");
268+
TestUtils.waitForNonDeterministicAssertion(1, TimeUnit.MINUTES, () -> {
269+
Map<String, Integer> coloVersions =
270+
parentControllerClient.getStore(storeName).getStore().getColoToCurrentVersions();
271+
272+
coloVersions.forEach((colo, version) -> {
273+
Assert.assertEquals((int) version, 2);
274+
});
275+
});
276+
277+
// Check that v2 is ingested in dvc non target region
278+
TestUtils.waitForNonDeterministicAssertion(30, TimeUnit.SECONDS, () -> {
279+
for (int i = 101; i <= keyCount2; i++) {
280+
assertNotNull(client2.get(i).get());
281+
}
282+
});
283+
284+
client2.close();
285+
}
286+
}
134287
}

services/venice-controller/src/main/java/com/linkedin/venice/controller/DeferredVersionSwapService.java

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,8 @@
66
import com.github.benmanes.caffeine.cache.Cache;
77
import com.github.benmanes.caffeine.cache.Caffeine;
88
import com.linkedin.venice.controller.stats.DeferredVersionSwapStats;
9-
import com.linkedin.venice.controllerapi.ControllerClient;
10-
import com.linkedin.venice.controllerapi.StoreResponse;
119
import com.linkedin.venice.meta.ReadWriteStoreRepository;
1210
import com.linkedin.venice.meta.Store;
13-
import com.linkedin.venice.meta.StoreInfo;
1411
import com.linkedin.venice.meta.Version;
1512
import com.linkedin.venice.meta.VersionStatus;
1613
import com.linkedin.venice.pushmonitor.ExecutionStatus;
@@ -21,7 +18,6 @@
2118
import java.util.HashSet;
2219
import java.util.List;
2320
import java.util.Map;
24-
import java.util.Optional;
2521
import java.util.Set;
2622
import java.util.concurrent.Executors;
2723
import java.util.concurrent.ScheduledExecutorService;
@@ -79,17 +75,6 @@ private Set<String> getRegionsForVersionSwap(Map<String, Integer> candidateRegio
7975
return remainingRegions;
8076
}
8177

82-
private String getTargetRegion(Set<String> targetRegions) {
83-
return targetRegions.iterator().next();
84-
}
85-
86-
private StoreResponse getStoreForRegion(String clusterName, String targetRegion, String storeName) {
87-
Map<String, ControllerClient> controllerClientMap =
88-
veniceParentHelixAdmin.getVeniceHelixAdmin().getControllerClientMap(clusterName);
89-
ControllerClient targetRegionControllerClient = controllerClientMap.get(targetRegion);
90-
return targetRegionControllerClient.getStore(storeName);
91-
}
92-
9378
private boolean didWaitTimeElapseInTargetRegions(
9479
Map<String, Long> completionTimes,
9580
Set<String> targetRegions,
@@ -146,7 +131,7 @@ public void run() {
146131
storePushCompletionTimes,
147132
targetRegions,
148133
store.getTargetSwapRegionWaitTime())) {
149-
LOGGER.info(
134+
LOGGER.debug(
150135
"Skipping version swap for store: {} on version: {} as wait time: {} has not passed",
151136
storeName,
152137
targetVersionNum,
@@ -159,34 +144,6 @@ public void run() {
159144
veniceParentHelixAdmin.getCurrentVersionsForMultiColos(cluster, storeName);
160145
Set<String> remainingRegions = getRegionsForVersionSwap(coloToVersions, targetRegions);
161146

162-
StoreResponse targetRegionStoreResponse =
163-
getStoreForRegion(cluster, getTargetRegion(targetRegions), storeName);
164-
if (targetRegionStoreResponse.isError()) {
165-
LOGGER.warn("Got error when fetching targetRegionStore: {}", targetRegionStoreResponse.getError());
166-
continue;
167-
}
168-
169-
StoreInfo targetRegionStore = targetRegionStoreResponse.getStore();
170-
Optional<Version> version = targetRegionStore.getVersion(targetVersionNum);
171-
if (!version.isPresent()) {
172-
LOGGER.warn(
173-
"Unable to find version {} for store: {} in regions: {}",
174-
targetVersionNum,
175-
storeName,
176-
store.getTargetSwapRegion());
177-
continue;
178-
}
179-
180-
// Do not perform version swap for davinci stores
181-
// TODO remove this check once DVC delayed ingestion is completed
182-
if (version.get().getIsDavinciHeartbeatReported()) {
183-
LOGGER.info(
184-
"Skipping version swap for store: {} on version: {} as it is davinci",
185-
storeName,
186-
targetVersionNum);
187-
continue;
188-
}
189-
190147
// Check that push is completed in target regions
191148
Admin.OfflinePushStatusInfo pushStatusInfo =
192149
veniceParentHelixAdmin.getOffLinePushStatus(cluster, kafkaTopicName);
@@ -269,7 +226,7 @@ public void run() {
269226
store.getTargetSwapRegionWaitTime());
270227

271228
if (!didWaitTimeElapseInTargetRegions) {
272-
LOGGER.info(
229+
LOGGER.debug(
273230
"Skipping version swap for store: {} on version: {} as wait time: {} has not passed",
274231
storeName,
275232
targetVersionNum,

0 commit comments

Comments
 (0)