From aa1fbaa8cfa27558ed73a396a1fdd7b349ea02a2 Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Tue, 28 Jan 2025 14:27:13 -0500 Subject: [PATCH] fix(profiling): fix SystemError when collecting memory profiler events [backport 2.19] (#12125) Backports #12075 to 2.19 We added locking to the memory profiler to address crashes. These locks are mostly "try" locks, meaning we bail out if we can't acquire them right away. This was done defensively to mitigate the possibility of deadlock until we fully understood why the locks are needed and could guarantee their correctness. But as a result of using try locks, the `iter_events` function in particular can fail if the memory profiler lock is contended when it tries to collect profiling events. The function then returns NULL, leading to SystemError exceptions because we don't set an error. Even if we set an error, returning NULL isn't the right thing to do. It'll basically mean we wait until the next profile iteration, still accumulating events in the same buffer, and try again to upload the events. So we're going to get multiple iteration's worth of events. The right thing to do is take the lock unconditionally in `iter_events`. We can allocate the new tracker outside the memory allocation profiler lock so that we don't need to worry about reentrancy/deadlock issues if we start profiling that allocation. Then, the only thing we do under the lock is swap out the global tracker, so it's safe to take the lock unconditionally. Fixes #11831 TODO - regression test? --- ddtrace/profiling/collector/_memalloc.c | 22 +++++++++++++------ ...loc-iter-events-null-780fd50bbebbf616.yaml | 4 ++++ 2 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/profiling-memalloc-iter-events-null-780fd50bbebbf616.yaml diff --git a/ddtrace/profiling/collector/_memalloc.c b/ddtrace/profiling/collector/_memalloc.c index f3de61a7b2c..4192af517b6 100644 --- a/ddtrace/profiling/collector/_memalloc.c +++ b/ddtrace/profiling/collector/_memalloc.c @@ -380,18 +380,26 @@ iterevents_new(PyTypeObject* type, PyObject* Py_UNUSED(args), PyObject* Py_UNUSE } IterEventsState* iestate = (IterEventsState*)type->tp_alloc(type, 0); - if (!iestate) + if (!iestate) { + PyErr_SetString(PyExc_RuntimeError, "failed to allocate IterEventsState"); return NULL; + } - /* reset the current traceback list */ - if (memlock_trylock(&g_memalloc_lock)) { - iestate->alloc_tracker = global_alloc_tracker; - global_alloc_tracker = alloc_tracker_new(); - memlock_unlock(&g_memalloc_lock); - } else { + /* Reset the current traceback list. Do this outside lock so we can track it, + * and avoid reentrancy/deadlock problems, if we start tracking the raw + * allocator domain */ + alloc_tracker_t* tracker = alloc_tracker_new(); + if (!tracker) { + PyErr_SetString(PyExc_RuntimeError, "failed to allocate new allocation tracker"); Py_TYPE(iestate)->tp_free(iestate); return NULL; } + + memlock_lock(&g_memalloc_lock); + iestate->alloc_tracker = global_alloc_tracker; + global_alloc_tracker = tracker; + memlock_unlock(&g_memalloc_lock); + iestate->seq_index = 0; PyObject* iter_and_count = PyTuple_New(3); diff --git a/releasenotes/notes/profiling-memalloc-iter-events-null-780fd50bbebbf616.yaml b/releasenotes/notes/profiling-memalloc-iter-events-null-780fd50bbebbf616.yaml new file mode 100644 index 00000000000..52a43cbd2a1 --- /dev/null +++ b/releasenotes/notes/profiling-memalloc-iter-events-null-780fd50bbebbf616.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + profiling: fix SystemError from the memory profiler returning NULL when collecting events