Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions ddtrace/profiling/collector/_memalloc_code_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ CodeFunctionCache::occupancy_histogram() const
return hist;
}

std::optional<Datadog::function_id>
CodeFunctionCache::lookup(PyCodeObject* code, PyObject* name, PyObject* filename, int firstlineno)
CacheHit
CodeFunctionCache::lookup(PyCodeObject* code, PyObject* name, PyObject* filename, int firstlineno, int lasti) noexcept
{
Set& s = sets_[set_index(code)];
for (size_t i = 0; i < WAYS_PER_SET; ++i) {
Expand All @@ -74,22 +74,26 @@ CodeFunctionCache::lookup(PyCodeObject* code, PyObject* name, PyObject* filename
* re-interns and overwrites it via insert(). */
if (s.names[i] == name && s.filenames[i] == filename && s.firstlines[i] == firstlineno) {
++hits_;
return s.functions[i];
/* line >= 0 only when the stored lasti matches, letting the
* caller skip parse_linetable; otherwise -1 signals a reparse. */
return CacheHit{ s.functions[i], s.lastis[i] == lasti ? s.lines[i] : -1 };
}
++misses_;
return std::nullopt;
return CacheHit{ nullptr, -1 };
}
}
++misses_;
return std::nullopt;
return CacheHit{ nullptr, -1 };
}

void
CodeFunctionCache::insert(PyCodeObject* code,
Datadog::function_id id,
PyObject* name,
PyObject* filename,
int firstlineno)
int firstlineno,
int lasti,
int line)
{
Set& s = sets_[set_index(code)];

Expand All @@ -100,6 +104,8 @@ CodeFunctionCache::insert(PyCodeObject* code,
s.names[i] = name;
s.filenames[i] = filename;
s.firstlines[i] = firstlineno;
s.lastis[i] = lasti;
s.lines[i] = line;
return;
}
}
Expand All @@ -113,6 +119,8 @@ CodeFunctionCache::insert(PyCodeObject* code,
s.names[way] = name;
s.filenames[way] = filename;
s.firstlines[way] = firstlineno;
s.lastis[way] = lasti;
s.lines[way] = line;
}

void
Expand Down
42 changes: 36 additions & 6 deletions ddtrace/profiling/collector/_memalloc_code_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,25 @@

namespace Datadog {

/* Result of a cache lookup.
* func_id == nullptr: cache miss — caller must do a full miss (intern strings, insert).
* line == -1: lasti didn't match the stored value — caller must resolve the line for this frame.
* line >= 0: lasti matched; cached line number is valid.
*
* CacheHit is intentionally kept small to make lookup() cheap to return by value.
* On x86-64 System V, aggregates up to 16B are typically returned in registers; other ABIs may differ. */
struct CacheHit
{
Datadog::function_id func_id; // nullptr = miss
int line; // -1 = lasti mismatch; >=0 = cached line
};

/* Keep CacheHit small; increasing its size can force some ABIs to use a hidden
* return pointer (sret), adding per-lookup overhead on the allocator-hook hot path. */
static_assert(sizeof(CacheHit) <= 16,
"CacheHit exceeds 16B — consider measuring lookup() "
"return overhead on supported ABIs before adding fields");

/* CodeFunctionCache caches libdatadog function_id values keyed by
* PyCodeObject*. Frame walks during heap-profiler sample construction
* call ProfilesDictionary::insert_str twice and insert_function once
Expand Down Expand Up @@ -53,14 +72,23 @@ class CodeFunctionCache
* capacity = num_sets * WAYS_PER_SET. Clamped to [MIN, MAX]. */
explicit CodeFunctionCache(size_t capacity_hint = DEFAULT_CAPACITY);

/* Returns the cached function_id for `code` only if present AND its stored
* identity still matches the supplied (name, filename, firstlineno),
* guarding against PyCodeObject address reuse. Otherwise a miss. */
std::optional<Datadog::function_id> lookup(PyCodeObject* code, PyObject* name, PyObject* filename, int firstlineno);
/* Returns a CacheHit for `code` only if present AND its stored identity
* still matches the supplied (name, filename, firstlineno), guarding
* against PyCodeObject address reuse. Check hit.func_id != nullptr for a
* hit; hit.line >= 0 means lasti matched the stored value so the caller
* can skip parse_linetable. */
CacheHit lookup(PyCodeObject* code, PyObject* name, PyObject* filename, int firstlineno, int lasti) noexcept;

/* Inserts (code, id) together with the identity used to validate future
* lookups. If the target set is full, evicts via FIFO. */
void insert(PyCodeObject* code, Datadog::function_id id, PyObject* name, PyObject* filename, int firstlineno);
* lookups plus the (lasti, line) pair. If the target set is full, evicts
* via FIFO. */
void insert(PyCodeObject* code,
Datadog::function_id id,
PyObject* name,
PyObject* filename,
int firstlineno,
int lasti,
int line);

/* Drops every entry. Counters are preserved (use reset_counters to
* zero them). */
Expand Down Expand Up @@ -93,6 +121,8 @@ class CodeFunctionCache
PyObject* names[WAYS_PER_SET] = { nullptr, nullptr };
PyObject* filenames[WAYS_PER_SET] = { nullptr, nullptr };
int firstlines[WAYS_PER_SET] = { 0, 0 };
int lastis[WAYS_PER_SET] = { -1, -1 }; // -1 = no lasti cached
int lines[WAYS_PER_SET] = { 0, 0 };
/* FIFO eviction: next way to overwrite, alternates 0/1. */
uint8_t next_evict = 0;
};
Expand Down
41 changes: 31 additions & 10 deletions ddtrace/profiling/collector/_memalloc_tb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,23 @@
#include "_memalloc_code_cache.h"
#include "_memalloc_tb.h"

/* Parse the line number for (code, lasti) without calling PyCode_Addr2Line.
* Equivalent to memalloc_get_lineno() in _memalloc_frame.h but takes lasti
* directly so callers that already computed it don't do it twice. Keep these
* implementations in sync. */
static inline int
resolve_lineno(PyCodeObject* code, int lasti)
{
#ifdef _PY310_AND_LATER
const unsigned char* table = (const unsigned char*)PyBytes_AS_STRING(code->co_linetable);
Py_ssize_t len = PyBytes_GET_SIZE(code->co_linetable);
#else
const unsigned char* table = (const unsigned char*)PyBytes_AS_STRING(code->co_lnotab);
Py_ssize_t len = PyBytes_GET_SIZE(code->co_lnotab);
#endif
return DataDog::parse_linetable(table, len, lasti, code->co_firstlineno);
}

/* Extract a UTF-8 string_view from a Python unicode object without any
* CPython API calls that could allocate, free, or touch error state.
*
Expand Down Expand Up @@ -101,30 +118,34 @@ push_stacktrace_to_sample_no_refcount(Datadog::Sample& sample, uint16_t max_nfra
if (code == NULL) {
continue;
}
int line = memalloc_get_lineno(frame, code);

/* Identity used to validate cache hits against PyCodeObject address
* reuse. Cheap borrowed-reference / field reads only (no allocation). */
PyObject* code_name = DataDog::get_code_name(code);
PyObject* code_filename = code->co_filename;
int code_firstlineno = code->co_firstlineno;
int lasti = DataDog::get_lasti(frame, code);

/* Cache lookup short-circuits the three libdd calls (insert_str x2 +
* insert_function) Sample::push_frame would otherwise make. */
* insert_function) Sample::push_frame would otherwise make. A hit with
* hit.line >= 0 also lets us skip parse_linetable for this frame. */
Datadog::CodeFunctionCache* cache = Datadog::CodeFunctionCache::instance;
if (cache != nullptr) {
std::optional<Datadog::function_id> cached =
cache->lookup(code, code_name, code_filename, code_firstlineno);
if (cached) {
sample.push_frame(*cached, 0, line);
Datadog::CacheHit hit = cache->lookup(code, code_name, code_filename, code_firstlineno, lasti);
if (hit.func_id != nullptr) {
/* Reuse the cached line when lasti matched (line >= 0); otherwise
* parse the line table once for this (now mismatched) lasti. */
int line = hit.line >= 0 ? hit.line : resolve_lineno(code, lasti);
sample.push_frame(hit.func_id, 0, line);
++pushed_frames;
continue;
}
}

/* Cache miss: intern explicitly so we can cache the resulting
* function_id. Drops the frame on intern failure to match the
* silent-skip behavior of Sample::push_frame_impl. */
/* Cache miss: resolve the line, then intern explicitly so we can cache
* the resulting function_id. Drops the frame on intern failure to match
* the silent-skip behavior of Sample::push_frame_impl. */
int line = resolve_lineno(code, lasti);
std::string_view name_sv = unicode_to_sv_no_alloc(code_name);
std::string_view filename_sv = unicode_to_sv_no_alloc(code_filename);

Expand All @@ -139,7 +160,7 @@ push_stacktrace_to_sample_no_refcount(Datadog::Sample& sample, uint16_t max_nfra
}

if (cache != nullptr) {
cache->insert(code, *func_id, code_name, code_filename, code_firstlineno);
cache->insert(code, *func_id, code_name, code_filename, code_firstlineno, lasti, line);
}
sample.push_frame(*func_id, 0, line);
++pushed_frames;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
other:
- |
profiling: Adds a per-way ``lasti``-to-line cache in the heap profiler's
``CodeFunctionCache``. Repeated allocation sites that hit the cache now
reuse the previously resolved line number, skipping ``parse_linetable``
on the frame-walk hot path during heap sample construction.
Loading