Skip to content

Commit 4667171

Browse files
Jingyan Liclaude
andcommitted
[controller] Use terminal ERROR for never-created version in /job
Address review: NOT_CREATED is non-terminal (isTerminal=false), so VPJ would keep polling rather than exiting its loop. Use the terminal ERROR status for the versionNum > largestUsedVersionNumber branch — a version that was never created is a genuine inconsistency, not a transient absence. Keep ARCHIVED for the retired branch (versionNum <= largestUsedVersionNumber) and differentiate the two cases via distinct status-details strings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 698d522 commit 4667171

2 files changed

Lines changed: 20 additions & 12 deletions

File tree

services/venice-controller/src/main/java/com/linkedin/venice/controller/VeniceParentHelixAdmin.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4517,23 +4517,30 @@ private OfflinePushStatusInfo getOffLineJobStatus(
45174517
Version version = parentStore.getVersion(versionNum);
45184518

45194519
// Version may be absent between two consecutive VPJ polls (retention eviction, supersession,
4520-
// controller leadership change, or manual cleanup). The protocol already defines terminal
4521-
// values for this state — return one with HTTP 200 so VPJ exits its poll loop, rather than
4522-
// letting an unguarded deref below escape as HTTP 500.
4520+
// controller leadership change, or manual cleanup). Return a terminal status with HTTP 200 so
4521+
// VPJ exits its poll loop, rather than letting an unguarded deref below escape as HTTP 500.
4522+
// Both branches must be terminal (NOT_CREATED is non-terminal, so VPJ would keep polling):
4523+
// - versionNum <= largestUsedVersionNumber: the version existed once and was retired -> ARCHIVED.
4524+
// - versionNum > largestUsedVersionNumber: a version that was never created is being polled,
4525+
// which is a genuine inconsistency rather than a transient absence -> ERROR.
45234526
if (version == null) {
4524-
ExecutionStatus terminalStatus = versionNum <= parentStore.getLargestUsedVersionNumber()
4525-
? ExecutionStatus.ARCHIVED
4526-
: ExecutionStatus.NOT_CREATED;
4527+
boolean wasRetired = versionNum <= parentStore.getLargestUsedVersionNumber();
4528+
ExecutionStatus terminalStatus = wasRetired ? ExecutionStatus.ARCHIVED : ExecutionStatus.ERROR;
4529+
String statusDetails = wasRetired
4530+
? "Parent version " + versionNum + " was retired and is no longer present in store metadata"
4531+
: "Parent version " + versionNum + " was never created (largest used version number is "
4532+
+ parentStore.getLargestUsedVersionNumber() + ")";
45274533
LOGGER.info(
4528-
"Version {} for store {} no longer present in parent store metadata; returning {}",
4534+
"Version {} for store {} not present in parent store metadata; returning {}: {}",
45294535
versionNum,
45304536
storeName,
4531-
terminalStatus);
4537+
terminalStatus,
4538+
statusDetails);
45324539
return new OfflinePushStatusInfo(
45334540
terminalStatus,
45344541
null,
45354542
extraInfo,
4536-
"Parent version " + versionNum + " no longer present in store metadata",
4543+
statusDetails,
45374544
extraDetails,
45384545
extraInfoUpdateTimestamp);
45394546
}

services/venice-controller/src/test/java/com/linkedin/venice/controller/TestVeniceParentHelixAdmin.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2262,9 +2262,10 @@ public void testAbsentVersionReturnsArchived() {
22622262
}
22632263

22642264
@Test
2265-
public void testAbsentVersionReturnsNotCreated() {
2265+
public void testAbsentVersionReturnsError() {
22662266
// Version is null and versionNum > largestUsedVersionNumber, meaning the requested version
2267-
// was never created. Expect NOT_CREATED instead of NPE/500.
2267+
// was never created. This is a genuine inconsistency, so expect the terminal ERROR status
2268+
// (not the non-terminal NOT_CREATED, which would leave VPJ polling) instead of NPE/500.
22682269
Map<ExecutionStatus, ControllerClient> clientMap = getMockJobStatusQueryClient();
22692270
Map<String, ControllerClient> controllerClients = new HashMap<>();
22702271
controllerClients.put("cluster", clientMap.get(ExecutionStatus.STARTED));
@@ -2284,7 +2285,7 @@ public void testAbsentVersionReturnsNotCreated() {
22842285

22852286
Admin.OfflinePushStatusInfo offlineJobStatus =
22862287
parentAdmin.getOffLineJobStatus("IGNORED", "topic1_v9", controllerClients);
2287-
assertEquals(offlineJobStatus.getExecutionStatus(), ExecutionStatus.NOT_CREATED);
2288+
assertEquals(offlineJobStatus.getExecutionStatus(), ExecutionStatus.ERROR);
22882289
}
22892290

22902291
@Test

0 commit comments

Comments
 (0)