Skip to content

Commit a88522a

Browse files
authored
[producers][server][dvc] Dedupe DIV debug info (#700)
In some use cases, the debug info in servers could take a large amout of heap. This commit optimizes this by deduping the instances used in the various debug maps, since they are very repetitive, and by using a more compact map from the fastutil library, rather than a CHM. Miscellaneous: - Deleted the ArrayBasedPrimitiveIntegerSet, which is unused. - Added back the test permutation number to build logs. - Added two commits containing just big directory moves to git-blame-ignore-revs
1 parent 70edf83 commit a88522a

File tree

8 files changed

+94
-224
lines changed

8 files changed

+94
-224
lines changed

Diff for: .git-blame-ignore-revs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
43b8eb5ca60b708c6bc86a6a0c92df2d657af2c1
22
690830eb1aae03017aa43627128b4860e60eeab1
33
bb6544918fc3105a834782eb3c170711d5ba206c
4+
63a7921da1c1814f9d856a6375f84ba392c01abb
5+
9bd0175869cd66376457bdecf9e00583cfb98301

Diff for: build.gradle

+2-12
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,6 @@ def parser = new XmlSlurper()
149149
parser.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false)
150150
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
151151

152-
// We remove square brackets from test names, which occur when using a DataProvider,
153-
// because occasionally the number in the brackets is non-deterministic (unknown why)
154-
// and when that occurs, the test-retry plugin gets confused and cannot match the
155-
// attempts together.
156-
def removeSquareBrackets(String testName) {
157-
return testName.replaceFirst('\\[[0-9]+\\]', '')
158-
}
159-
160152
configurations {
161153
alpnAgent {
162154
}
@@ -431,16 +423,14 @@ subprojects {
431423
}
432424

433425
beforeTest { descriptor ->
434-
def testName = removeSquareBrackets(descriptor.displayName)
435426
def out = services.get(StyledTextOutputFactory).create("an-ouput")
436427

437-
out.style(Style.Normal).println("$descriptor.className > $testName STARTED")
428+
out.style(Style.Normal).println("$descriptor.className > $descriptor.displayName STARTED")
438429
}
439430

440431
afterTest { descriptor, result ->
441432
def totalTime = result.endTime - result.startTime
442433
def prettyTime = totalTime < 1000 ? "$totalTime ms" : "${totalTime / 1000} s"
443-
def testName = removeSquareBrackets(descriptor.displayName)
444434
def out = services.get(StyledTextOutputFactory).create("an-ouput")
445435

446436
def style = result.resultType == TestResult.ResultType.SUCCESS
@@ -455,7 +445,7 @@ subprojects {
455445
? 'FAILED '
456446
: 'SKIPPED '
457447

458-
out.style(Style.Normal).text("$descriptor.className > $testName ")
448+
out.style(Style.Normal).text("$descriptor.className > $descriptor.displayName ")
459449
.style(style).text(status)
460450
.style(Style.Normal).println("($prettyTime)")
461451

Diff for: clients/da-vinci-client/src/main/java/com/linkedin/davinci/validation/PartitionTracker.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,10 @@ private void updateOffsetRecord(GUID guid, Segment segment, OffsetRecord offsetR
163163
* that). It is redundant that we store the same debug values once per partition. In the future,
164164
* if we want to eliminate this redundancy, we could move the per-producer debug info to another
165165
* data structure, though that would increase bookkeeping complexity. This is expected to be a
166-
* minor overhead, and therefore it appears to be a premature to optimize this now.
166+
* minor overhead, and therefore it appears to be premature to optimize this now.
167167
*/
168-
state.aggregates = segment.getAggregates();
169-
state.debugInfo = segment.getDebugInfo();
168+
state.aggregates = CollectionUtils.substituteEmptyMap(segment.getAggregates());
169+
state.debugInfo = CollectionUtils.substituteEmptyMap(segment.getDebugInfo());
170170
}
171171
state.checksumType = segment.getCheckSumType().getValue();
172172
/**

Diff for: gradle/spotbugs/exclude.xml

-1
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,6 @@
251251
<Class name="com.linkedin.venice.client.store.schemas.TestValueRecordWithMoreFields"/>
252252
<Class name="com.linkedin.venice.fastclient.schema.TestValueSchema"/>
253253
<Class name="com.linkedin.venice.utils.TestMockTime"/>
254-
<Class name="com.linkedin.venice.serializer.ArrayBasedPrimitiveIntegerSet"/>
255254
<Class name="com.linkedin.davinci.ingestion.IsolatedIngestionBackend"/>
256255
</Or>
257256
</Match>

Diff for: internal/venice-client-common/src/main/java/com/linkedin/venice/serializer/ArrayBasedPrimitiveIntegerSet.java

-206
This file was deleted.

Diff for: internal/venice-common/build.gradle

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ dependencies {
2727

2828
implementation libraries.avroUtilCompatHelper
2929
implementation libraries.bouncyCastle
30+
implementation libraries.caffeine
3031
implementation libraries.classgraph
3132
implementation libraries.commonsCodec
3233
implementation libraries.commonsIo // IntelliJ gets confused when running tests unless we explicitly depend on a recent version of commons-io

Diff for: internal/venice-common/src/main/java/com/linkedin/venice/kafka/validation/Segment.java

+36-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import static com.linkedin.venice.kafka.validation.SegmentStatus.END_OF_FINAL_SEGMENT;
44
import static com.linkedin.venice.kafka.validation.SegmentStatus.NOT_STARTED;
55

6+
import com.github.benmanes.caffeine.cache.Cache;
7+
import com.github.benmanes.caffeine.cache.Caffeine;
68
import com.linkedin.venice.annotation.NotThreadsafe;
79
import com.linkedin.venice.exceptions.validation.UnsupportedMessageTypeException;
810
import com.linkedin.venice.kafka.protocol.ControlMessage;
@@ -16,6 +18,7 @@
1618
import com.linkedin.venice.kafka.validation.checksum.CheckSumType;
1719
import com.linkedin.venice.message.KafkaKey;
1820
import com.linkedin.venice.utils.CollectionUtils;
21+
import it.unimi.dsi.fastutil.objects.Object2ObjectArrayMap;
1922
import java.nio.ByteBuffer;
2023
import java.util.Collections;
2124
import java.util.Map;
@@ -39,6 +42,16 @@
3942
*/
4043
@NotThreadsafe
4144
public class Segment {
45+
/**
46+
* This cache is to reduce the size on heap of debug info, which is very repetitive in nature. Using this cache, each
47+
* unique CharSequence should exist only once on the heap, with many segments referring to it.
48+
*
49+
* We use weak values so that if there are no more segments referencing a given entry, it will also be cleared from
50+
* the cache, and thus avoid a mem leak.
51+
*/
52+
private static final Cache<CharSequence, CharSequence> DEDUPED_DEBUG_INFO =
53+
Caffeine.newBuilder().weakValues().build();
54+
4255
// Immutable state
4356
private final int partition;
4457
private final int segmentNumber;
@@ -78,7 +91,7 @@ public Segment(
7891
this.ended = false;
7992
this.finalSegment = false;
8093
this.newSegment = true;
81-
this.debugInfo = debugInfo;
94+
this.debugInfo = getDedupedDebugInfo(debugInfo);
8295
this.aggregates = aggregates;
8396
}
8497

@@ -101,7 +114,7 @@ public Segment(int partition, ProducerPartitionState state) {
101114
this.ended = segmentStatus.isTerminal();
102115
this.finalSegment = segmentStatus == END_OF_FINAL_SEGMENT;
103116
this.newSegment = false;
104-
this.debugInfo = CollectionUtils.substituteEmptyMap(state.getDebugInfo());
117+
this.debugInfo = getDedupedDebugInfo(state.getDebugInfo());
105118
this.aggregates = CollectionUtils.substituteEmptyMap(state.getAggregates());
106119
this.registered = state.isRegistered;
107120
this.lastRecordProducerTimestamp = state.messageTimestamp;
@@ -117,6 +130,10 @@ public Segment(Segment segment) {
117130
this.ended = segment.ended;
118131
this.finalSegment = segment.finalSegment;
119132
this.newSegment = false;
133+
/**
134+
* N.B. No need to call {@link #getDedupedDebugInfo(Map)} here since we assume the other {@link Segment} instance we
135+
* are copying from was already deduped, having come from one of the other constructors.
136+
*/
120137
this.debugInfo = segment.debugInfo;
121138
this.aggregates = segment.aggregates;
122139
this.registered = segment.registered;
@@ -373,4 +390,21 @@ public SegmentStatus getStatus() {
373390
return SegmentStatus.IN_PROGRESS;
374391
}
375392
}
393+
394+
private Map<CharSequence, CharSequence> getDedupedDebugInfo(Map<CharSequence, CharSequence> original) {
395+
if (original == null || original.isEmpty()) {
396+
return Collections.emptyMap();
397+
}
398+
/**
399+
* The {@link Object2ObjectArrayMap} has an O(N) performance on lookups, but we don't care about the performance
400+
* of the debug info map, so it is fine. The main concern is to make it as compact as possible, which this
401+
* implementation achieves by minimizing per-element overhead (e.g. there is no {@link HashMap.Node} wrapping each
402+
* entry).
403+
*/
404+
Map<CharSequence, CharSequence> deduped = new Object2ObjectArrayMap<>(original.size());
405+
for (Map.Entry<CharSequence, CharSequence> entry: original.entrySet()) {
406+
deduped.put(DEDUPED_DEBUG_INFO.get(entry.getKey(), k -> k), DEDUPED_DEBUG_INFO.get(entry.getValue(), k -> k));
407+
}
408+
return deduped;
409+
}
376410
}

0 commit comments

Comments
 (0)