Skip to content

Commit

Permalink
revert and cleanup the incoming request/key count changes
Browse files Browse the repository at this point in the history
  • Loading branch information
m-nagarajan committed Feb 11, 2025
1 parent ced945c commit 4e86995
Show file tree
Hide file tree
Showing 10 changed files with 32 additions and 171 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ public enum VeniceMetricsDimensions {
/** {@link HttpResponseStatusCodeCategory} ie. 1xx, 2xx, etc */
HTTP_RESPONSE_STATUS_CODE_CATEGORY("http.response.status_code_category"),

/** {@link RequestValidationOutcome#outcome} */
VENICE_REQUEST_VALIDATION_OUTCOME("venice.request.validation_outcome"),

/** {@link VeniceResponseStatusCategory} */
VENICE_RESPONSE_STATUS_CODE_CATEGORY("venice.response.status_code_category"),

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@ public void testGetDimensionNameInSnakeCase() {
case HTTP_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "http.response.status_code_category");
break;
case VENICE_REQUEST_VALIDATION_OUTCOME:
assertEquals(dimension.getDimensionName(format), "venice.request.validation_outcome");
break;
case VENICE_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "venice.response.status_code_category");
break;
Expand Down Expand Up @@ -66,9 +63,6 @@ public void testGetDimensionNameInCamelCase() {
case HTTP_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "http.response.statusCodeCategory");
break;
case VENICE_REQUEST_VALIDATION_OUTCOME:
assertEquals(dimension.getDimensionName(format), "venice.request.validationOutcome");
break;
case VENICE_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "venice.response.statusCodeCategory");
break;
Expand Down Expand Up @@ -104,9 +98,6 @@ public void testGetDimensionNameInPascalCase() {
case HTTP_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "Http.Response.StatusCodeCategory");
break;
case VENICE_REQUEST_VALIDATION_OUTCOME:
assertEquals(dimension.getDimensionName(format), "Venice.Request.ValidationOutcome");
break;
case VENICE_RESPONSE_STATUS_CODE_CATEGORY:
assertEquals(dimension.getDimensionName(format), "Venice.Response.StatusCodeCategory");
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ public VenicePath parseResourceUri(String uri, BasicFullHttpRequest fullHttpRequ
routerStats.getStatsByType(keyCountLimitException.getRequestType())
.recordBadRequestKeyCount(
keyCountLimitException.getStoreName(),
responseStatus,
keyCountLimitException.getRequestKeyCount());
}
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import com.linkedin.venice.read.RequestType;
import com.linkedin.venice.stats.AbstractVeniceAggStats;
import com.linkedin.venice.stats.AbstractVeniceAggStoreStats;
import com.linkedin.venice.stats.dimensions.RequestValidationOutcome;
import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap;
import io.netty.handler.codec.http.HttpResponseStatus;
import io.tehuti.metrics.MetricsRepository;
Expand Down Expand Up @@ -149,15 +148,10 @@ public void recordBadRequest(String storeName, HttpResponseStatus responseStatus
}
}

public void recordBadRequestKeyCount(String storeName, int keyNum) {
totalStats
.recordIncomingBadRequestKeyCountMetric(keyNum, RequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED);
public void recordBadRequestKeyCount(String storeName, HttpResponseStatus responseStatus, int keyNum) {
totalStats.recordIncomingBadRequestKeyCountMetric(responseStatus, keyNum);
if (storeName != null) {
recordStoreStats(
storeName,
stats -> stats.recordIncomingBadRequestKeyCountMetric(
keyNum,
RequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED));
recordStoreStats(storeName, stats -> stats.recordIncomingBadRequestKeyCountMetric(responseStatus, keyNum));
}
}

Expand Down Expand Up @@ -271,8 +265,8 @@ public long getTotalRetriesError() {
}

public void recordKeyNum(String storeName, int keyNum) {
totalStats.recordIncomingKeyCountMetric(keyNum, RequestValidationOutcome.VALID);
recordStoreStats(storeName, stats -> stats.recordIncomingKeyCountMetric(keyNum, RequestValidationOutcome.VALID));
totalStats.recordIncomingKeyCountMetric(keyNum);
recordStoreStats(storeName, stats -> stats.recordIncomingKeyCountMetric(keyNum));
}

public void recordRequestUsage(String storeName, int usage) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import static com.linkedin.venice.router.stats.RouterMetricEntity.CALL_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.CALL_TIME;
import static com.linkedin.venice.router.stats.RouterMetricEntity.DISALLOWED_RETRY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.INCOMING_CALL_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.INCOMING_KEY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.KEY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.RETRY_COUNT;
import static com.linkedin.venice.router.stats.RouterMetricEntity.RETRY_DELAY;
Expand All @@ -24,7 +22,6 @@
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME;
import static java.util.Collections.singletonList;
Expand All @@ -41,7 +38,6 @@
import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository;
import com.linkedin.venice.stats.dimensions.RequestRetryAbortReason;
import com.linkedin.venice.stats.dimensions.RequestRetryType;
import com.linkedin.venice.stats.dimensions.RequestValidationOutcome;
import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions;
import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory;
import com.linkedin.venice.stats.metrics.MetricEntityState;
Expand Down Expand Up @@ -82,7 +78,7 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats {
}

/** metrics to track incoming requests */
private final MetricEntityState incomingRequestMetric;
private final Sensor requestSensor;

/** metrics to track response handling */
private final MetricEntityState healthyRequestMetric;
Expand Down Expand Up @@ -114,9 +110,9 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats {
private final MetricEntityState noAvailableReplicaAbortedRetryCountMetric;

/** key count metrics */
private final MetricEntityState incomingKeyCountMetric;
private final MetricEntityState incomingBadRequestKeyCountMetric;
private final MetricEntityState keyCountMetric;
private final Sensor keyNumSensor;
private final Sensor badRequestKeyCountSensor;

/** OTel metrics yet to be added */
private final Sensor requestSizeSensor;
Expand Down Expand Up @@ -189,18 +185,14 @@ public RouterHttpRequestStats(
Rate requestRate = new OccurrenceRate();
Rate healthyRequestRate = new OccurrenceRate();
Rate tardyRequestRate = new OccurrenceRate();

incomingRequestMetric = new MetricEntityState(
INCOMING_CALL_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
RouterTehutiMetricNameEnum.REQUEST,
Arrays.asList(new Count(), requestRate));
requestSensor = registerSensor("request", new Count(), requestRate);

healthyRequestRateSensor =
registerSensor(new TehutiUtils.SimpleRatioStat(healthyRequestRate, requestRate, "healthy_request_ratio"));
tardyRequestRatioSensor =
registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio"));
keyNumSensor = registerSensor("key_num", new Avg(), new Max(0));
badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max());

healthyRequestMetric = new MetricEntityState(
CALL_COUNT.getMetricEntity(),
Expand Down Expand Up @@ -327,20 +319,6 @@ public RouterHttpRequestStats(
RouterTehutiMetricNameEnum.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST,
singletonList(new Count()));

incomingKeyCountMetric = new MetricEntityState(
INCOMING_KEY_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
RouterTehutiMetricNameEnum.KEY_NUM,
Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0)));

incomingBadRequestKeyCountMetric = new MetricEntityState(
INCOMING_KEY_COUNT.getMetricEntity(),
otelRepository,
this::registerSensorFinal,
RouterTehutiMetricNameEnum.BAD_REQUEST_KEY_COUNT,
Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0)));

keyCountMetric = new MetricEntityState(KEY_COUNT.getMetricEntity(), otelRepository);

errorRetryAttemptTriggeredByPendingRequestCheckSensor =
Expand Down Expand Up @@ -432,7 +410,7 @@ private String getDimensionName(VeniceMetricsDimensions dimension) {
* types of requests also have their latencies logged at the same time.
*/
public void recordIncomingRequest() {
incomingRequestMetric.record(1, commonMetricDimensions);
requestSensor.record(1);
inFlightRequestSensor.record(currentInFlightRequest.incrementAndGet());
totalInflightRequestSensor.record();
}
Expand All @@ -453,7 +431,7 @@ Attributes getRequestMetricDimensions(
public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus, int keyNum) {
Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.SUCCESS);
healthyRequestMetric.record(1, dimensions);
recordKeyCountMetric(keyNum, dimensions);
keyCountMetric.record(keyNum, dimensions);
if (latency != null) {
healthyLatencyMetric.record(latency, dimensions);
}
Expand All @@ -467,7 +445,7 @@ public void recordUnhealthyRequest(HttpResponseStatus responseStatus) {
public void recordUnhealthyRequest(double latency, HttpResponseStatus responseStatus, int keyNum) {
Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL);
unhealthyRequestMetric.record(1, dimensions);
recordKeyCountMetric(keyNum, dimensions);
keyCountMetric.record(keyNum, dimensions);
unhealthyLatencyMetric.record(latency, dimensions);
}

Expand All @@ -486,14 +464,14 @@ public void recordReadQuotaUsage(int quotaUsage) {
public void recordTardyRequest(double latency, HttpResponseStatus responseStatus, int keyNum) {
Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.SUCCESS);
tardyRequestMetric.record(1, dimensions);
recordKeyCountMetric(keyNum, dimensions);
keyCountMetric.record(keyNum, dimensions);
tardyLatencyMetric.record(latency, dimensions);
}

public void recordThrottledRequest(double latency, HttpResponseStatus responseStatus, int keyNum) {
Attributes dimensions = getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL);
throttledRequestMetric.record(1, dimensions);
recordKeyCountMetric(keyNum, dimensions);
keyCountMetric.record(keyNum, dimensions);
throttledLatencyMetric.record(latency, dimensions);
}

Expand Down Expand Up @@ -602,30 +580,13 @@ public void recordFindUnhealthyHostRequest() {
findUnhealthyHostRequestSensor.record();
}

public void recordIncomingKeyCountMetric(int keyNum, RequestValidationOutcome outcome) {
Attributes dimensions = null;
if (emitOpenTelemetryMetrics) {
dimensions = Attributes.builder()
.putAll(commonMetricDimensions)
.put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome())
.build();
}
incomingKeyCountMetric.record(keyNum, dimensions);
}

public void recordIncomingBadRequestKeyCountMetric(int keyNum, RequestValidationOutcome outcome) {
Attributes dimensions = null;
if (emitOpenTelemetryMetrics) {
dimensions = Attributes.builder()
.putAll(commonMetricDimensions)
.put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome())
.build();
}
incomingBadRequestKeyCountMetric.record(keyNum, dimensions);
public void recordIncomingKeyCountMetric(int keyNum) {
keyNumSensor.record(keyNum);
}

public void recordKeyCountMetric(int keyNum, Attributes dimensions) {
keyCountMetric.record(keyNum, dimensions);
public void recordIncomingBadRequestKeyCountMetric(HttpResponseStatus responseStatus, int keyNum) {
badRequestKeyCountSensor.record(keyNum);
keyCountMetric.record(keyNum, getRequestMetricDimensions(responseStatus, VeniceResponseStatusCategory.FAIL));
}

public void recordRequestUsage(int usage) {
Expand Down Expand Up @@ -721,8 +682,6 @@ Attributes getCommonMetricDimensions() {
* Metric names for tehuti metrics used in this class
*/
enum RouterTehutiMetricNameEnum implements TehutiMetricNameEnum {
/** for {@link RouterMetricEntity#INCOMING_CALL_COUNT} */
REQUEST,
/** for {@link RouterMetricEntity#CALL_COUNT} */
HEALTHY_REQUEST, UNHEALTHY_REQUEST, TARDY_REQUEST, THROTTLED_REQUEST, BAD_REQUEST,
/** for {@link RouterMetricEntity#CALL_TIME} */
Expand All @@ -737,9 +696,7 @@ enum RouterTehutiMetricNameEnum implements TehutiMetricNameEnum {
RETRY_DELAY,
/** for {@link RouterMetricEntity#ABORTED_RETRY_COUNT} */
DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, SLOW_ROUTE_ABORTED_RETRY_REQUEST, RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST,
NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST,
/** for {@link RouterMetricEntity#INCOMING_KEY_COUNT} */
KEY_NUM, BAD_REQUEST_KEY_COUNT;
NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST;

private final String metricName;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY;
import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME;
import static com.linkedin.venice.utils.Utils.setOf;
Expand All @@ -23,19 +22,10 @@
*/
public enum RouterMetricEntity {
/**
* Count of all incoming requests in the request handling path
*/
INCOMING_CALL_COUNT(
MetricType.COUNTER, MetricUnit.NUMBER, "Count of all incoming requests in the request handling path",
setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)
),
/**
* Count of all requests in the response handling path along with response codes.
* This vs {@link #INCOMING_CALL_COUNT} will help to correlate the incoming vs properly handled requests.
* Count of all requests during response handling along with response codes
*/
CALL_COUNT(
MetricType.COUNTER, MetricUnit.NUMBER,
"Count of all requests in the response handling path along with response codes",
MetricType.COUNTER, MetricUnit.NUMBER, "Count of all requests during response handling along with response codes",
setOf(
VENICE_STORE_NAME,
VENICE_CLUSTER_NAME,
Expand All @@ -58,18 +48,10 @@ public enum RouterMetricEntity {
VENICE_RESPONSE_STATUS_CODE_CATEGORY)
),
/**
* Count of keys in incoming requests in the request handling path
*/
INCOMING_KEY_COUNT(
MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, MetricUnit.NUMBER,
"Count of keys in incoming requests in the request handling path",
setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_VALIDATION_OUTCOME)
),
/**
* Count of keys in the response handling path along with response codes.
* Count of keys during response handling along with response codes
*/
KEY_COUNT(
MetricType.HISTOGRAM, MetricUnit.NUMBER, "Count of keys in the response handling path along with response codes",
MetricType.HISTOGRAM, MetricUnit.NUMBER, "Count of keys during response handling along with response codes",
setOf(
VENICE_STORE_NAME,
VENICE_CLUSTER_NAME,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import io.netty.handler.codec.http.EmptyHttpHeaders;
import io.netty.handler.codec.http.HttpHeaders;
import io.netty.handler.codec.http.HttpMethod;
import io.netty.handler.codec.http.HttpResponseStatus;
import io.netty.handler.codec.http.HttpVersion;
import java.nio.ByteBuffer;
import java.util.ArrayList;
Expand Down Expand Up @@ -306,7 +307,8 @@ public void parseRequestWithBatchSizeViolation() throws RouterException {
fail("A RouterException should be thrown here");
} catch (RouterException e) {
// expected and validate bad request metric
verify(multiGetStats, times(1)).recordBadRequestKeyCount(storeName, maxKeyCount + 1);
verify(multiGetStats, times(1))
.recordBadRequestKeyCount(storeName, HttpResponseStatus.REQUEST_ENTITY_TOO_LARGE, maxKeyCount + 1);
} catch (Throwable t) {
t.printStackTrace();
fail("Only RouterException is expected, but got: " + t.getClass());
Expand Down
Loading

0 comments on commit 4e86995

Please sign in to comment.