Skip to content

Commit cc29f8f

Browse files
m-nagarajanclaude
andcommitted
[server][fc] Fix gRPC read stats recording successful reads as failures
GrpcOutboundStatsHandler chose between successRequest and errorRequest with: !ctx.hasError() && !responseStatus.equals(OK) || responseStatus.equals(NOT_FOUND) Java evaluates && before ||, so this parses as (!hasError && !status.equals(OK)) || status.equals(NOT_FOUND). A successful value-found read (no error, status OK) evaluates to false and falls into the else branch, recording the read via errorRequest(). That emits Read.CallCount{Http.Response.StatusCode=200, Venice.Response.StatusCodeCategory=fail} and increments the error_request sensor for every successful gRPC read. The same broken condition also recorded genuine 400/500 errors and 429 throttles as successes. Parenthesize the condition to mirror the Netty StatsHandler: record a success only when no handler flagged an error and the status is OK or NOT_FOUND; record TOO_MANY_REQUESTS as neither (a throttled request is not a server-side failure); record everything else as an error. Added GrpcOutboundStatsHandlerTest covering value-found -> success (regression guard), key-absent -> success, error-flagged-with-OK -> error, 400/500 -> error, and 429 -> neither. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 2ad367e commit cc29f8f

2 files changed

Lines changed: 85 additions & 3 deletions

File tree

services/venice-server/src/main/java/com/linkedin/venice/listener/grpc/handlers/GrpcOutboundStatsHandler.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,15 @@ public void processRequest(GrpcRequestContext ctx) {
3838

3939
double elapsedTime = LatencyUtils.getElapsedTimeFromNSToMS(statsContext.getRequestStartTimeInNS());
4040

41-
if (!ctx.hasError() && !responseStatus.equals(HttpResponseStatus.OK)
42-
|| responseStatus.equals(HttpResponseStatus.NOT_FOUND)) {
41+
/*
42+
* Record a success only when no handler flagged an error and the response is OK (value found) or NOT_FOUND
43+
* (key absent); otherwise record an error. TOO_MANY_REQUESTS is recorded as neither, since a throttled
44+
* request is not a server-side failure. Mirrors the Netty StatsHandler categorization.
45+
*/
46+
if (!ctx.hasError()
47+
&& (responseStatus.equals(HttpResponseStatus.OK) || responseStatus.equals(HttpResponseStatus.NOT_FOUND))) {
4348
statsContext.successRequest(serverHttpRequestStats, elapsedTime);
44-
} else {
49+
} else if (!responseStatus.equals(HttpResponseStatus.TOO_MANY_REQUESTS)) {
4550
statsContext.errorRequest(serverHttpRequestStats, elapsedTime);
4651
}
4752
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package com.linkedin.venice.listener.grpc.handlers;
2+
3+
import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
4+
import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR;
5+
import static io.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND;
6+
import static io.netty.handler.codec.http.HttpResponseStatus.OK;
7+
import static io.netty.handler.codec.http.HttpResponseStatus.TOO_MANY_REQUESTS;
8+
import static org.mockito.ArgumentMatchers.anyDouble;
9+
import static org.mockito.ArgumentMatchers.eq;
10+
import static org.mockito.Mockito.mock;
11+
import static org.mockito.Mockito.never;
12+
import static org.mockito.Mockito.times;
13+
import static org.mockito.Mockito.verify;
14+
import static org.mockito.Mockito.when;
15+
16+
import com.linkedin.venice.listener.ServerStatsContext;
17+
import com.linkedin.venice.listener.grpc.GrpcRequestContext;
18+
import com.linkedin.venice.protocols.VeniceServerResponse;
19+
import com.linkedin.venice.stats.AggServerHttpRequestStats;
20+
import com.linkedin.venice.stats.ServerHttpRequestStats;
21+
import io.grpc.stub.StreamObserver;
22+
import io.netty.handler.codec.http.HttpResponseStatus;
23+
import org.testng.annotations.DataProvider;
24+
import org.testng.annotations.Test;
25+
26+
27+
public class GrpcOutboundStatsHandlerTest {
28+
private static final String STORE_NAME = "test_store";
29+
30+
/*
31+
* Each row is: the response status, whether a prior handler flagged an error, and the expected outcome -
32+
* TRUE = recorded as success, FALSE = recorded as error, null = recorded as neither (throttled).
33+
*/
34+
@DataProvider(name = "responseStatusCases", parallel = true)
35+
public Object[][] responseStatusCases() {
36+
return new Object[][] { { OK, false, Boolean.TRUE }, // value found -> success (regression guard for 200-as-fail)
37+
{ NOT_FOUND, false, Boolean.TRUE }, // key absent -> success
38+
{ OK, true, Boolean.FALSE }, // error flagged despite OK status -> error
39+
{ BAD_REQUEST, false, Boolean.FALSE }, // malformed request -> error
40+
{ INTERNAL_SERVER_ERROR, false, Boolean.FALSE }, // server failure -> error
41+
{ TOO_MANY_REQUESTS, false, null } }; // throttled -> neither success nor error
42+
}
43+
44+
@Test(dataProvider = "responseStatusCases")
45+
public void testRequestCategorization(HttpResponseStatus responseStatus, boolean hasError, Boolean expectSuccess) {
46+
ServerHttpRequestStats storeStats = mock(ServerHttpRequestStats.class);
47+
AggServerHttpRequestStats aggStats = mock(AggServerHttpRequestStats.class);
48+
when(aggStats.getStoreStats(STORE_NAME)).thenReturn(storeStats);
49+
50+
ServerStatsContext statsContext = mock(ServerStatsContext.class);
51+
when(statsContext.getResponseStatus()).thenReturn(responseStatus);
52+
when(statsContext.getStoreName()).thenReturn(STORE_NAME);
53+
when(statsContext.getCurrentStats()).thenReturn(aggStats);
54+
when(statsContext.getRequestStartTimeInNS()).thenReturn(0L);
55+
56+
@SuppressWarnings("unchecked")
57+
StreamObserver<VeniceServerResponse> responseObserver = mock(StreamObserver.class);
58+
GrpcRequestContext ctx = new GrpcRequestContext(null, VeniceServerResponse.newBuilder(), responseObserver);
59+
ctx.setGrpcStatsContext(statsContext);
60+
if (hasError) {
61+
ctx.setError();
62+
}
63+
64+
new GrpcOutboundStatsHandler().processRequest(ctx);
65+
66+
if (expectSuccess == null) {
67+
verify(statsContext, never()).successRequest(eq(storeStats), anyDouble());
68+
verify(statsContext, never()).errorRequest(eq(storeStats), anyDouble());
69+
} else if (expectSuccess) {
70+
verify(statsContext, times(1)).successRequest(eq(storeStats), anyDouble());
71+
verify(statsContext, never()).errorRequest(eq(storeStats), anyDouble());
72+
} else {
73+
verify(statsContext, times(1)).errorRequest(eq(storeStats), anyDouble());
74+
verify(statsContext, never()).successRequest(eq(storeStats), anyDouble());
75+
}
76+
}
77+
}

0 commit comments

Comments
 (0)