|
13 | 13 | import com.linkedin.venice.utils.RedundantExceptionFilter;
|
14 | 14 | import com.linkedin.venice.utils.Utils;
|
15 | 15 | import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap;
|
| 16 | +import com.linkedin.venice.utils.lazy.Lazy; |
16 | 17 | import io.tehuti.metrics.MetricConfig;
|
17 | 18 | import io.tehuti.metrics.stats.Rate;
|
18 | 19 | import java.util.HashMap;
|
@@ -66,6 +67,8 @@ class ConsumptionTask implements Runnable {
|
66 | 67 | private final Map<PubSubTopicPartition, Rate> messageRatePerTopicPartition = new VeniceConcurrentHashMap<>();
|
67 | 68 | private final Map<PubSubTopicPartition, Rate> bytesRatePerTopicPartition = new VeniceConcurrentHashMap<>();
|
68 | 69 | private final Map<PubSubTopicPartition, Rate> pollRatePerTopicPartition = new VeniceConcurrentHashMap<>();
|
| 70 | + |
| 71 | + private final Lazy<Rate> overallConsumerPollRate; |
69 | 72 | private final RedundantExceptionFilter redundantExceptionFilter;
|
70 | 73 | private final Map<PubSubTopicPartition, Long> lastSuccessfulPollTimestampPerTopicPartition =
|
71 | 74 | new VeniceConcurrentHashMap<>();
|
@@ -105,6 +108,7 @@ public ConsumptionTask(
|
105 | 108 | this.taskId = taskId;
|
106 | 109 | this.offsetLagGetter = offsetLagGetter;
|
107 | 110 | this.redundantExceptionFilter = redundantExceptionFilter;
|
| 111 | + this.overallConsumerPollRate = Lazy.of(() -> createRate(System.currentTimeMillis())); |
108 | 112 | this.consumptionTaskIdStr = Utils.getSanitizedStringForLogger(consumerNamePrefix) + " - " + taskId;
|
109 | 113 | this.LOGGER = LogManager.getLogger(getClass().getSimpleName() + "[ " + consumptionTaskIdStr + " ]");
|
110 | 114 | }
|
@@ -196,6 +200,7 @@ public void run() {
|
196 | 200 | consumedDataReceiver.write(topicPartitionMessages);
|
197 | 201 | checkSlowPartitionWithHighLag(pubSubTopicPartition);
|
198 | 202 | }
|
| 203 | + overallConsumerPollRate.get().record(1, lastSuccessfulPollTimestamp); |
199 | 204 | aggStats.recordTotalConsumerRecordsProducingToWriterBufferLatency(
|
200 | 205 | LatencyUtils.getElapsedTimeFromMsToMs(beforeProducingToWriteBufferTimestamp));
|
201 | 206 | aggStats.recordTotalNonZeroPollResultNum(polledPubSubMessagesCount);
|
@@ -302,15 +307,17 @@ private void checkSlowPartitionWithHighLag(PubSubTopicPartition pubSubTopicParti
|
302 | 307 | Long offsetLag = offsetLagGetter.apply(pubSubTopicPartition);
|
303 | 308 | Double messageRate = getMessageRate(pubSubTopicPartition);
|
304 | 309 | Double pollRate = getPollRate(pubSubTopicPartition);
|
| 310 | + Double consumerPollRate = overallConsumerPollRate.get().measure(metricConfig, System.currentTimeMillis()); |
305 | 311 | String slowTaskWithPartitionStr = consumptionTaskIdStr + " - " + pubSubTopicPartition;
|
306 | 312 | if (offsetLag > 200000 && messageRate < 200
|
307 | 313 | && !redundantExceptionFilter.isRedundantException(slowTaskWithPartitionStr)) {
|
308 | 314 | LOGGER.warn(
|
309 |
| - "Slow partition with high lag detected: {}. Lag: {}, Message Rate: {}, Poll Rate: {}", |
| 315 | + "Slow partition with high lag detected: {}. Lag: {}, Message Rate: {}, Poll Rate: {},Consumer Poll Rate: {}", |
310 | 316 | pubSubTopicPartition,
|
311 | 317 | offsetLag,
|
312 | 318 | messageRate,
|
313 |
| - pollRate); |
| 319 | + pollRate, |
| 320 | + consumerPollRate); |
314 | 321 | }
|
315 | 322 | }
|
316 | 323 |
|
|
0 commit comments