Skip to content

Commit 21c0b2d

Browse files
authored
Fix memory leak in Exception Replay (#7885)
The weak map stateByThrowable keep the throwable as the key but this exception is also strong referenced by snapshots stored inside the ThrowableState in CapturedThrowable and in locals and extensions for @exception. Fixing by storing weak reference inside the CapturedThrowable and clearing the other ref for @exception at commit time
1 parent 768ce77 commit 21c0b2d

File tree

5 files changed

+39
-8
lines changed

5 files changed

+39
-8
lines changed

dd-java-agent/agent-debugger/debugger-bootstrap/src/main/java/datadog/trace/bootstrap/debugger/CapturedContext.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import datadog.trace.bootstrap.debugger.util.Redaction;
1010
import datadog.trace.bootstrap.debugger.util.TimeoutChecker;
1111
import datadog.trace.bootstrap.debugger.util.WellKnownClasses;
12+
import java.lang.ref.WeakReference;
1213
import java.util.ArrayList;
1314
import java.util.Collections;
1415
import java.util.HashMap;
@@ -183,6 +184,10 @@ public ValueReferenceResolver withExtensions(Map<String, Object> extensions) {
183184
return new CapturedContext(this, extensions);
184185
}
185186

187+
public void removeExtension(String name) {
188+
extensions.remove(name);
189+
}
190+
186191
private void addExtension(String name, Object value) {
187192
extensions.put(name, value);
188193
}
@@ -642,7 +647,7 @@ public TimeoutChecker getTimeoutChecker() {
642647
public static class CapturedThrowable {
643648
private final String type;
644649
private final String message;
645-
private final transient Throwable throwable;
650+
private final transient WeakReference<Throwable> throwable;
646651

647652
/*
648653
* Need to exclude stacktrace from equals/hashCode computation.
@@ -663,7 +668,7 @@ public CapturedThrowable(
663668
this.type = type;
664669
this.message = message;
665670
this.stacktrace = new ArrayList<>(stacktrace);
666-
this.throwable = t;
671+
this.throwable = new WeakReference<>(t);
667672
}
668673

669674
public String getType() {
@@ -679,7 +684,7 @@ public List<CapturedStackFrame> getStacktrace() {
679684
}
680685

681686
public Throwable getThrowable() {
682-
return throwable;
687+
return throwable.get();
683688
}
684689

685690
private static List<CapturedStackFrame> captureFrames(StackTraceElement[] stackTrace) {

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/ExceptionProbeManager.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,14 @@ boolean shouldCaptureException(String fingerprint, Clock clock) {
146146

147147
public void addSnapshot(Snapshot snapshot) {
148148
Throwable throwable = snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable();
149+
if (throwable == null) {
150+
LOGGER.debug("Snapshot has no throwable: {}", snapshot.getId());
151+
return;
152+
}
149153
throwable = ExceptionHelper.getInnerMostThrowable(throwable);
150154
if (throwable == null) {
151-
LOGGER.debug(
152-
"Unable to find root cause of exception: {}",
153-
snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable().toString());
155+
throwable = snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable();
156+
LOGGER.debug("Unable to find root cause of exception: {}", String.valueOf(throwable));
154157
return;
155158
}
156159
ThrowableState state =
@@ -172,6 +175,10 @@ void updateLastCapture(String fingerprint, Clock clock) {
172175
fingerprints.put(fingerprint, Instant.now(clock));
173176
}
174177

178+
boolean hasExceptionStateTracked() {
179+
return !snapshotsByThrowable.isEmpty();
180+
}
181+
175182
public static class ThrowableState {
176183
private final String exceptionId;
177184
private List<Snapshot> snapshots;

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/probe/ExceptionProbe.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import datadog.trace.bootstrap.debugger.ProbeId;
1414
import datadog.trace.bootstrap.debugger.ProbeImplementation;
1515
import datadog.trace.bootstrap.debugger.ProbeLocation;
16+
import datadog.trace.bootstrap.debugger.el.ValueReferences;
1617
import java.util.List;
1718
import org.slf4j.Logger;
1819
import org.slf4j.LoggerFactory;
@@ -67,6 +68,10 @@ public void evaluate(
6768
return;
6869
}
6970
Throwable throwable = context.getCapturedThrowable().getThrowable();
71+
if (throwable == null) {
72+
LOGGER.debug("Throwable cleared by GC");
73+
return;
74+
}
7075
Throwable innerMostThrowable = getInnerMostThrowable(throwable);
7176
String fingerprint =
7277
Fingerprinter.fingerprint(innerMostThrowable, exceptionProbeManager.getClassNameFilter());
@@ -107,6 +112,9 @@ public void commit(
107112
* - DebuggerContext.commit()
108113
*/
109114
snapshot.recordStackTrace(4);
115+
// clear any strong ref to the exception before adding the snapshot to avoid leaking snapshots
116+
// inside the stateByThrowable map
117+
clearExceptionRefs(snapshot);
110118
// add snapshot for later to wait for triggering point (ExceptionDebugger::handleException)
111119
exceptionProbeManager.addSnapshot(snapshot);
112120
LOGGER.debug(
@@ -117,6 +125,11 @@ public void commit(
117125
}
118126
}
119127

128+
private void clearExceptionRefs(Snapshot snapshot) {
129+
snapshot.getCaptures().getReturn().getLocals().remove(ValueReferences.EXCEPTION_REF);
130+
snapshot.getCaptures().getReturn().removeExtension(ValueReferences.EXCEPTION_EXTENSION_NAME);
131+
}
132+
120133
@Override
121134
public void buildLocation(InstrumentationResult result) {
122135
String type = where.getTypeName();

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/DefaultExceptionDebuggerTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,14 @@ public void nestedException() {
143143
expectedFrameIndex,
144144
"com.datadog.debugger.exception.DefaultExceptionDebuggerTest",
145145
"createTest1Exception");
146+
// make sure we are not leaking references
147+
exception = null; // release strong reference
148+
System.gc();
149+
// calling ExceptionProbeManager#hasExceptionStateTracked() will call WeakIdentityHashMap#size()
150+
// through isEmpty() an will purge stale entries
151+
assertWithTimeout(
152+
() -> !exceptionDebugger.getExceptionProbeManager().hasExceptionStateTracked(),
153+
Duration.ofSeconds(30));
146154
}
147155

148156
@Test

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeInstrumentationTest.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ public void instrumentAndCaptureSnapshots() throws Exception {
131131
Snapshot snapshot0 = listener.snapshots.get(0);
132132
assertProbeId(probeIdsByMethodName, "processWithException", snapshot0.getProbe().getId());
133133
assertEquals("oops", snapshot0.getCaptures().getReturn().getCapturedThrowable().getMessage());
134-
assertTrue(snapshot0.getCaptures().getReturn().getLocals().containsKey("@exception"));
135134
ProbeLocation location = snapshot0.getProbe().getLocation();
136135
assertEquals(
137136
location.getType() + "." + location.getMethod(), snapshot0.getStack().get(0).getFunction());
@@ -212,7 +211,6 @@ public void recursive() throws Exception {
212211
assertProbeId(probeIdsByMethodName, "fiboException", snapshot0.getProbe().getId());
213212
assertEquals(
214213
"oops fibo", snapshot0.getCaptures().getReturn().getCapturedThrowable().getMessage());
215-
assertTrue(snapshot0.getCaptures().getReturn().getLocals().containsKey("@exception"));
216214
assertEquals("1", getValue(snapshot0.getCaptures().getReturn().getArguments().get("n")));
217215
Snapshot snapshot1 = listener.snapshots.get(1);
218216
assertEquals("2", getValue(snapshot1.getCaptures().getReturn().getArguments().get("n")));

0 commit comments

Comments
 (0)