Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions tests/profiling/collector/test_greenlet_buffer_reuse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""Regression guard: unwind_greenlets reuses its per-thread StackInfo buffer.

The buffer-reuse fix keeps the `current_greenlets` vector (and each entry's
FrameStack) alive between samples, so it only allocates new StackInfo objects
when a sample exceeds the prior peak greenlet count. Before the fix, every
sample allocated a fresh StackInfo (with a std::deque<Frame>) per tracked
greenlet -- a large source of native heap churn under gevent.

RSS/arena size cannot distinguish the two implementations (on the single
sampling thread, freed blocks are reused next sample, so the footprint is
identical). What differs is the *number of allocations*. The native module
exposes a cumulative counter, ``stack._stack._greenlet_buffer_alloc_count()``, that is
incremented only on the buffer-growth path. With reuse it plateaus once the
working set is sampled; a per-sample-allocation regression makes it grow
without bound.

This test asserts the counter stops growing after warmup. It fails on builds
without the fix (the counter symbol does not exist there).
"""

import os
import sys

import pytest


GEVENT_COMPATIBLE_WITH_PYTHON_VERSION = os.getenv("DD_PROFILE_TEST_GEVENT", False) and (
sys.version_info[:2] < (3, 13) or (sys.version_info[:2] == (3, 13) and sys.version_info[3] != "free-threading")
)


@pytest.mark.skipif(
not GEVENT_COMPATIBLE_WITH_PYTHON_VERSION,
reason="gevent not compatible / DD_PROFILE_TEST_GEVENT not set",
)
@pytest.mark.subprocess(
env=dict(DD_PROFILING_OUTPUT_PPROF="/tmp/test_greenlet_buffer_reuse"),
out=None,
err=None,
)
def test_greenlet_unwind_buffer_reuse() -> None:
from gevent import monkey

monkey.patch_all()

import gevent

from ddtrace.internal.datadog.profiling import stack
from ddtrace.profiling import profiler

N_IDLE = 500
STACK_DEPTH = 30
WARMUP_S = 3.0
MEASURE_S = 5.0

def _idle_deep(depth: int) -> None:
if depth > 0:
_idle_deep(depth - 1)
else:
gevent.sleep(1000)

def idle_greenlet() -> None:
_idle_deep(STACK_DEPTH)

p = profiler.Profiler()
p.start()
stack.set_interval(0.005) # 5ms (minimum) for aggressive sampling
stack.set_adaptive_sampling(False)
try:
idles = [gevent.spawn(idle_greenlet) for _ in range(N_IDLE)]
gevent.sleep(0.2) # let them register

# Warm up: across several full samples the reuse buffer grows to the
# peak greenlet count and then stops allocating.
gevent.sleep(WARMUP_S)
c1 = stack._stack._greenlet_buffer_alloc_count()

# Sustained sampling: with reuse the counter must not keep climbing.
gevent.sleep(MEASURE_S)
c2 = stack._stack._greenlet_buffer_alloc_count()

gevent.killall(idles, timeout=5)
finally:
p.stop()

# Sanity: the buffer was actually populated, i.e. greenlets were sampled and
# unwound. One full sample grows one StackInfo per leaf greenlet.
assert c1 >= N_IDLE // 2, (
f"greenlet reuse buffer was barely populated (c1={c1}, N_IDLE={N_IDLE}); "
f"greenlets may not have been sampled, so this guard would be vacuous."
)

# The actual guard: after warmup the buffer is reused, so growth ~ 0.
# Allow generous slack for incidental greenlet churn / parent-chain entries.
growth = c2 - c1
assert growth <= N_IDLE // 10, (
f"unwind_greenlets is allocating StackInfo per sample instead of reusing "
f"its buffer: it grew by {growth} over {MEASURE_S:.0f}s after warmup "
f"(c1={c1}, c2={c2}). Per-sample greenlet buffers are no longer reused."
)
87 changes: 87 additions & 0 deletions tests/profiling/collector/test_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,93 @@ def measure(n_idle: int) -> float:
)


@pytest.mark.skipif(
not GEVENT_COMPATIBLE_WITH_PYTHON_VERSION,
reason=f"gevent is not compatible with Python {'.'.join(map(str, tuple(sys.version_info)[:3]))}",
)
@pytest.mark.subprocess(
env=dict(
DD_PROFILING_OUTPUT_PPROF="/tmp/test_gevent_unwind_greenlets_rss_stable",
),
out=None,
err=None,
)
def test_gevent_unwind_greenlets_rss_stable() -> None:
"""Verify that sustained unwind_greenlets sampling does not grow RSS
unboundedly.

Before the fix, every sample iteration allocated a fresh StackInfo (with
an internal std::deque<Frame>) per tracked greenlet, contributing ~half of
the native heap-live-size in long-running gevent services. After the fix
the per-thread buffers are reused across samples, so RSS should plateau
quickly after a warmup window.
"""
from gevent import monkey

monkey.patch_all()

import time

import gevent

from ddtrace.internal.datadog.profiling import stack
from ddtrace.profiling import profiler
from ddtrace.vendor import psutil

N_IDLE = 1000
STACK_DEPTH = 30
WARMUP_S = 3.0
MEASURE_S = 10.0
SAMPLE_PERIOD_S = 1.0
MAX_GROWTH_MB = 20 # generous; regressions typically show hundreds of MB

def _idle_deep(depth: int) -> None:
if depth > 0:
_idle_deep(depth - 1)
else:
gevent.sleep(1000)

def idle_greenlet() -> None:
_idle_deep(STACK_DEPTH)

p = profiler.Profiler()
p.start()
stack.set_interval(0.005) # 5ms (minimum allowed) for aggressive sampling
stack.set_adaptive_sampling(False)
try:
idles = [gevent.spawn(idle_greenlet) for _ in range(N_IDLE)]
gevent.sleep(0.1) # let them register

proc = psutil.Process()
t_end = time.monotonic() + WARMUP_S
while time.monotonic() < t_end:
gevent.sleep(SAMPLE_PERIOD_S)

rss_after_warmup = proc.memory_info().rss

samples = []
t_end = time.monotonic() + MEASURE_S
while time.monotonic() < t_end:
gevent.sleep(SAMPLE_PERIOD_S)
samples.append(proc.memory_info().rss)

gevent.killall(idles, timeout=5)
finally:
p.stop()

peak_rss = max(samples)
growth_bytes = peak_rss - rss_after_warmup
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Measure allocation churn instead of post-warmup RSS

Because the baseline is taken only after the 3s warmup, this test can pass on the unfixed path whenever the allocator has already grown/cached enough memory during warmup to satisfy the repeated StackInfo/deque allocations during the 10s measurement window. In that scenario the per-sample churn still exists, but peak_rss - rss_after_warmup stays under 20 MB, so the regression guard produces the false negative the test is meant to rule out; assert on allocation/heap-live-size churn or compare fixed-vs-baseline behavior instead of post-warmup RSS growth.

Useful? React with 👍 / 👎.

growth_mb = growth_bytes / (1024 * 1024)

assert growth_mb < MAX_GROWTH_MB, (
f"RSS grew {growth_mb:.1f} MB during {MEASURE_S:.0f}s of sustained "
f"unwind_greenlets sampling (after {WARMUP_S:.0f}s warmup, "
f"baseline {rss_after_warmup / (1024 * 1024):.1f} MB, peak "
f"{peak_rss / (1024 * 1024):.1f} MB). Per-sample stack-collector "
f"buffers may no longer be reused."
)


def test_greenlet_string_table_cleanup_after_ephemeral_clear(tmp_path: Path) -> None:
"""Verify that tracking/untracking greenlets across 25+ uploads (which triggers
ephemeral string table clearing) does not crash or lose greenlet names.
Expand Down
Loading