Skip to content

Commit 0128298

Browse files
committed
[PROF-7307] Wire up allocation sampling into CpuAndWallTimeWorker
**What does this PR do?** In #2657 we added support for the `Collectors::ThreadContext` to sample a given object allocation. But until now, triggering these kinds of allocations was only done in our test suite. This PR adds the missing part: actually sampling objects as Ruby allocates them. In the `Collectors::CpuAndWallTimeWorker`, we already had a `TracePoint` that is called on every new object allocation (`RUBY_INTERNAL_EVENT_NEWOBJ`), but we were only using it to count allocations. This PR changes the `TracePoint` code to actually call into the `Collectors::ThreadContext` and thus allow allocation sampling to happen. **Motivation:** This PR is another step towards the allocation profiling feature. **Additional Notes:** It's not yet possible to turn on allocation profiling via configuration. I am reserving that change for later. But it's easy to do: change the `profiling/component.rb` to use `alloc_samples_enabled: true` and `allocation_sample_every: 1`. This will result in a big overhead impact to the application. I'll be working on this in later PRs. In particular, the sampling approach we're using (`allocation_sample_every`), is only a placeholder; it will get replaced before we get to beta. This is why I don't want to add it as a real user-available setting -- because it's temporary. **How to test the change?** This change includes test coverage. You can also manually check it out, by following the instructions in the "Additional Notes". (There's a feature flag needed for this data to show up in the Datadog UX).
1 parent 737db7b commit 0128298

File tree

6 files changed

+100
-7
lines changed

6 files changed

+100
-7
lines changed

ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c

+38-6
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ struct cpu_and_wall_time_worker_state {
8383
bool allocation_counting_enabled;
8484
bool no_signals_workaround_enabled;
8585
bool dynamic_sampling_rate_enabled;
86+
int allocation_sample_every; // Temporarily used for development/testing of allocation profiling
8687
VALUE self_instance;
8788
VALUE thread_context_collector_instance;
8889
VALUE idle_sampling_helper_instance;
@@ -150,7 +151,8 @@ static VALUE _native_initialize(
150151
VALUE idle_sampling_helper_instance,
151152
VALUE allocation_counting_enabled,
152153
VALUE no_signals_workaround_enabled,
153-
VALUE dynamic_sampling_rate_enabled
154+
VALUE dynamic_sampling_rate_enabled,
155+
VALUE allocation_sample_every
154156
);
155157
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
156158
static VALUE _native_sampling_loop(VALUE self, VALUE instance);
@@ -183,9 +185,10 @@ static void grab_gvl_and_sample(void);
183185
static void reset_stats(struct cpu_and_wall_time_worker_state *state);
184186
static void sleep_for(uint64_t time_ns);
185187
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
186-
static void on_newobj_event(DDTRACE_UNUSED VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
188+
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
187189
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
188190
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
191+
static VALUE rescued_sample_allocation(VALUE tracepoint_data);
189192

190193
// Note on sampler global state safety:
191194
//
@@ -223,7 +226,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
223226
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
224227
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
225228

226-
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 7);
229+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8);
227230
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
228231
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
229232
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
@@ -261,6 +264,7 @@ static VALUE _native_new(VALUE klass) {
261264
state->allocation_counting_enabled = false;
262265
state->no_signals_workaround_enabled = false;
263266
state->dynamic_sampling_rate_enabled = true;
267+
state->allocation_sample_every = 0;
264268
state->thread_context_collector_instance = Qnil;
265269
state->idle_sampling_helper_instance = Qnil;
266270
state->owner_thread = Qnil;
@@ -287,12 +291,14 @@ static VALUE _native_initialize(
287291
VALUE idle_sampling_helper_instance,
288292
VALUE allocation_counting_enabled,
289293
VALUE no_signals_workaround_enabled,
290-
VALUE dynamic_sampling_rate_enabled
294+
VALUE dynamic_sampling_rate_enabled,
295+
VALUE allocation_sample_every
291296
) {
292297
ENFORCE_BOOLEAN(gc_profiling_enabled);
293298
ENFORCE_BOOLEAN(allocation_counting_enabled);
294299
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
295300
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
301+
ENFORCE_TYPE(allocation_sample_every, T_FIXNUM);
296302

297303
struct cpu_and_wall_time_worker_state *state;
298304
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
@@ -301,6 +307,12 @@ static VALUE _native_initialize(
301307
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
302308
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
303309
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
310+
state->allocation_sample_every = NUM2INT(allocation_sample_every);
311+
312+
if (state->allocation_sample_every < 0) {
313+
rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every);
314+
}
315+
304316
state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
305317
state->idle_sampling_helper_instance = idle_sampling_helper_instance;
306318
state->gc_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_GC_ENTER | RUBY_INTERNAL_EVENT_GC_EXIT, on_gc_event, NULL /* unused */);
@@ -880,7 +892,7 @@ static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
880892

881893
// Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
882894
// when the RUBY_INTERNAL_EVENT_NEWOBJ event is triggered.
883-
static void on_newobj_event(DDTRACE_UNUSED VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
895+
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
884896
// Update thread-local allocation count
885897
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
886898
allocation_count = 0;
@@ -907,7 +919,12 @@ static void on_newobj_event(DDTRACE_UNUSED VALUE tracepoint_data, DDTRACE_UNUSED
907919
// defined as not being able to allocate) sets this.
908920
state->during_sample = true;
909921

910-
// TODO: Sampling goes here (calling into `thread_context_collector_sample_allocation`)
922+
// TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before
923+
// beta), and having something here allows us to test the rest of feature, sampling decision aside.
924+
if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) {
925+
// Rescue against any exceptions that happen during sampling
926+
safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
927+
}
911928

912929
state->during_sample = false;
913930
}
@@ -929,3 +946,18 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
929946
return result;
930947
}
931948
}
949+
950+
static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
951+
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
952+
953+
// This should not happen in a normal situation because on_newobj_event already checked for this, but just in case...
954+
if (state == NULL) return Qnil;
955+
956+
rb_trace_arg_t *data = rb_tracearg_from_tracepoint(tracepoint_data);
957+
VALUE new_object = rb_tracearg_object(data);
958+
959+
thread_context_collector_sample_allocation(state->thread_context_collector_instance, state->allocation_sample_every, new_object);
960+
961+
// Return a dummy VALUE because we're called from rb_rescue2 which requires it
962+
return Qnil;
963+
}

lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb

+10-1
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,22 @@ def initialize(
2121
idle_sampling_helper: IdleSamplingHelper.new,
2222
# **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the
2323
# profiler overhead!
24-
dynamic_sampling_rate_enabled: true
24+
dynamic_sampling_rate_enabled: true,
25+
allocation_sample_every: 0 # Currently only for testing; Setting this to > 0 can add a lot of overhead!
2526
)
2627
unless dynamic_sampling_rate_enabled
2728
Datadog.logger.warn(
2829
'Profiling dynamic sampling rate disabled. This should only be used for testing, and will increase overhead!'
2930
)
3031
end
3132

33+
if allocation_counting_enabled && allocation_sample_every > 0
34+
Datadog.logger.warn(
35+
"Enabled experimental allocation profiling: allocation_sample_every=#{allocation_sample_every}. This is " \
36+
'experimental, not recommended, and will increase overhead!'
37+
)
38+
end
39+
3240
self.class._native_initialize(
3341
self,
3442
thread_context_collector,
@@ -37,6 +45,7 @@ def initialize(
3745
allocation_counting_enabled,
3846
no_signals_workaround_enabled,
3947
dynamic_sampling_rate_enabled,
48+
allocation_sample_every,
4049
)
4150
@worker_thread = nil
4251
@failure_exception = nil

lib/datadog/profiling/component.rb

+1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
9191
allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled,
9292
no_signals_workaround_enabled: no_signals_workaround_enabled,
9393
thread_context_collector: thread_context_collector,
94+
allocation_sample_every: 0,
9495
)
9596
else
9697
load_pprof_support

sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ module Datadog
1414
thread_context_collector: Datadog::Profiling::Collectors::ThreadContext,
1515
?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper,
1616
?dynamic_sampling_rate_enabled: bool,
17+
?allocation_sample_every: Integer,
1718
) -> void
1819

1920
def self._native_initialize: (
@@ -24,6 +25,7 @@ module Datadog
2425
bool allocation_counting_enabled,
2526
bool no_signals_workaround_enabled,
2627
bool dynamic_sampling_rate_enabled,
28+
::Integer allocation_sample_every,
2729
) -> true
2830

2931
def start: () -> bool?

spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb

+48
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,54 @@
412412
expect(stats.fetch(:trigger_sample_attempts)).to eq(stats.fetch(:postponed_job_success))
413413
end
414414
end
415+
416+
context 'when allocation sampling is enabled' do
417+
let(:options) { { allocation_sample_every: 1 } }
418+
419+
before do
420+
allow(Datadog.logger).to receive(:warn)
421+
end
422+
423+
it 'logs a warning message mentioning this is experimental' do
424+
expect(Datadog.logger).to receive(:warn).with(/Enabled experimental allocation profiling/)
425+
426+
start
427+
end
428+
429+
it 'records allocated objects' do
430+
stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))
431+
432+
start
433+
434+
123.times { CpuAndWallTimeWorkerSpec::TestStruct.new }
435+
allocation_line = __LINE__ - 1
436+
437+
cpu_and_wall_time_worker.stop
438+
439+
allocation_sample =
440+
samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current)
441+
.find { |s| s.labels[:'allocation class'] == 'CpuAndWallTimeWorkerSpec::TestStruct' }
442+
443+
expect(allocation_sample.values).to include(:'alloc-samples' => 123)
444+
expect(allocation_sample.locations.first.lineno).to eq allocation_line
445+
end
446+
end
447+
448+
context 'when allocation sampling is disabled' do
449+
let(:options) { { allocation_sample_every: 0 } }
450+
451+
it 'does not record allocations' do
452+
stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))
453+
454+
start
455+
456+
123.times { CpuAndWallTimeWorkerSpec::TestStruct.new }
457+
458+
cpu_and_wall_time_worker.stop
459+
460+
expect(samples_from_pprof(recorder.serialize!).map(&:values)).to all(include(:'alloc-samples' => 0))
461+
end
462+
end
415463
end
416464

417465
describe 'Ractor safety' do

spec/datadog/profiling/component_spec.rb

+1
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@
167167
allocation_counting_enabled: anything,
168168
no_signals_workaround_enabled: :no_signals_result,
169169
thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext),
170+
allocation_sample_every: 0,
170171
)
171172

172173
build_profiler_component

0 commit comments

Comments
 (0)