Skip to content

Commit

Permalink
DEBUG-3317 report probe status errored if probe fails to instrument (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
p-datadog authored Jan 27, 2025
1 parent 494e0e1 commit 6cdf92e
Show file tree
Hide file tree
Showing 13 changed files with 260 additions and 45 deletions.
2 changes: 2 additions & 0 deletions lib/datadog/di/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def initialize(settings, agent_settings, logger, code_tracker: nil, telemetry: n
@agent_settings = agent_settings
@logger = logger
@telemetry = telemetry
@code_tracker = code_tracker
@redactor = Redactor.new(settings)
@serializer = Serializer.new(settings, redactor, telemetry: telemetry)
@instrumenter = Instrumenter.new(settings, serializer, logger, code_tracker: code_tracker, telemetry: telemetry)
Expand All @@ -90,6 +91,7 @@ def initialize(settings, agent_settings, logger, code_tracker: nil, telemetry: n
attr_reader :agent_settings
attr_reader :logger
attr_reader :telemetry
attr_reader :code_tracker
attr_reader :instrumenter
attr_reader :transport
attr_reader :probe_notifier_worker
Expand Down
6 changes: 6 additions & 0 deletions lib/datadog/di/probe_notification_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def build_emitting(probe)
status: 'EMITTING',)
end

def build_errored(probe, exc)
build_status(probe,
message: "Instrumentation for probe #{probe.id} failed: #{exc}",
status: 'ERROR',)
end

# Duration is in seconds.
def build_executed(probe,
trace_point: nil, rv: nil, duration: nil, caller_locations: nil,
Expand Down
31 changes: 26 additions & 5 deletions lib/datadog/di/remote.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,28 +49,48 @@ def receivers(telemetry)
begin
probe_spec = parse_content(content)
probe = ProbeBuilder.build_from_remote_config(probe_spec)
payload = component.probe_notification_builder.build_received(probe)
component.probe_notifier_worker.add_status(payload)
probe_notification_builder = component.probe_notification_builder
payload = probe_notification_builder.build_received(probe)
probe_notifier_worker = component.probe_notifier_worker
probe_notifier_worker.add_status(payload)
component.logger.debug { "di: received probe from RC: #{probe.type} #{probe.location}" }

begin
# TODO test exception capture
probe_manager.add_probe(probe)
content.applied
rescue DI::Error::DITargetNotInRegistry => exc
component.telemetry&.report(exc, description: "Line probe is targeting a loaded file that is not in code tracker")

payload = probe_notification_builder.build_errored(probe, exc)
probe_notifier_worker.add_status(payload)

# If a probe fails to install, we will mark the content
# as errored. On subsequent remote configuration application
# attemps, probe manager will raise the "previously errored"
# exception and we'll rescue it here, again marking the
# content as errored but with a somewhat different exception
# message.
# TODO assert content state (errored for this example)
content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}")
rescue => exc
raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions

component.logger.debug { "di: unhandled exception adding probe in DI remote receiver: #{exc.class}: #{exc}" }
component.telemetry&.report(exc, description: "Unhandled exception adding probe in DI remote receiver")

# TODO test this path
payload = probe_notification_builder.build_errored(probe, exc)
probe_notifier_worker.add_status(payload)

# If a probe fails to install, we will mark the content
# as errored. On subsequent remote configuration application
# attemps, probe manager will raise the "previously errored"
# exception and we'll rescue it here, again marking the
# content as errored but with a somewhat different exception
# message.
# TODO stack trace must be redacted or not sent at all
content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}: #{Array(exc.backtrace).join("\n")}")
# TODO assert content state (errored for this example)
content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}")
end

# Important: even if processing fails for this probe config,
Expand All @@ -84,7 +104,8 @@ def receivers(telemetry)
component.logger.debug { "di: unhandled exception handling probe in DI remote receiver: #{exc.class}: #{exc}" }
component.telemetry&.report(exc, description: "Unhandled exception handling probe in DI remote receiver")

content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}: #{Array(exc.backtrace).join("\n")}")
# TODO assert content state (errored for this example)
content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}")
end
end
end
Expand Down
26 changes: 15 additions & 11 deletions sig/datadog/di/component.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@ module Datadog

@telemetry: untyped

@redactor: untyped
@redactor: Redactor

@serializer: untyped
@serializer: Serializer

@instrumenter: untyped
@instrumenter: Instrumenter

@code_tracker: CodeTracker

@transport: untyped

@probe_notifier_worker: untyped
@probe_notifier_worker: ProbeNotifierWorker

@probe_notification_builder: untyped
@probe_notification_builder: ProbeNotificationBuilder

@probe_manager: untyped
@probe_manager: ProbeManager

def self.build: (untyped settings, untyped agent_settings, Core::Logger logger, ?telemetry: untyped?) -> (nil | untyped)

Expand All @@ -38,17 +40,19 @@ module Datadog

attr_reader telemetry: untyped

attr_reader instrumenter: untyped
attr_reader code_tracker: CodeTracker

attr_reader instrumenter: Instrumenter

attr_reader transport: untyped

attr_reader probe_notifier_worker: untyped
attr_reader probe_notifier_worker: ProbeNotifierWorker

attr_reader probe_notification_builder: untyped
attr_reader probe_notification_builder: ProbeNotificationBuilder

attr_reader probe_manager: untyped
attr_reader probe_manager: ProbeManager

attr_reader redactor: untyped
attr_reader redactor: Redactor

attr_reader serializer: untyped
def shutdown!: (?untyped? replacement) -> untyped
Expand Down
2 changes: 2 additions & 0 deletions sig/datadog/di/probe_notification_builder.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ module Datadog

def build_emitting: (Probe probe) -> Hash[Symbol,untyped]

def build_errored: (Probe probe, Exception exception) -> Hash[Symbol,untyped]

def build_executed: (Probe probe, ?trace_point: TracePoint, ?rv: untyped?, ?duration: Float, ?caller_locations: Array[untyped], ?args: untyped?, ?kwargs: untyped?, ?serialized_entry_args: untyped?) -> Hash[Symbol,untyped]

def build_snapshot: (Probe probe, ?rv: untyped?, ?snapshot: untyped?, ?path: String?, ?duration: Float, ?caller_locations: Array[untyped], ?args: untyped?, ?kwargs: untyped?, ?serialized_entry_args: untyped?) -> Hash[Symbol,untyped]
Expand Down
3 changes: 3 additions & 0 deletions sig/datadog/di/probe_notifier_worker.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ module Datadog
def start: () -> void
def stop: (?::Integer timeout) -> void
def flush: () -> void

def add_status: (Hash[Symbol, untyped]) -> void
def add_snapshot: (Hash[Symbol, untyped]) -> void

private

Expand Down
6 changes: 6 additions & 0 deletions spec/datadog/core/configuration/components_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@
settings.dynamic_instrumentation.enabled = true
end

after do
# Shutdown DI if present because it creates a background thread.
# On JRuby DI is not present.
components.dynamic_instrumentation&.shutdown!
end

context 'MRI' do
before(:all) do
skip 'Test requires MRI' if PlatformHelpers.jruby?
Expand Down
98 changes: 87 additions & 11 deletions spec/datadog/di/integration/everything_from_remote_config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,25 @@ def target_method
}
end

let(:expected_errored_payload) do
{
path: '/debugger/v1/diagnostics',
ddsource: 'dd_debugger',
debugger: {
diagnostics: {
parentId: nil,
probeId: '11',
probeVersion: 0,
runtimeId: LOWERCASE_UUID_REGEXP,
status: 'ERROR',
},
},
message: /Instrumentation for probe 11 failed: File matching probe path \(instrumentation_integration_test_class.rb\) was loaded and is not in code tracker registry:/,
service: 'rspec',
timestamp: Integer,
}
end

let(:expected_snapshot_payload) do
{
path: '/debugger/v1/input',
Expand Down Expand Up @@ -211,9 +230,11 @@ def target_method

let(:payloads) { [] }

def do_rc
def do_rc(expect_hook: true)
expect(probe_manager).to receive(:add_probe).and_call_original
expect(instrumenter).to receive(:hook_method).and_call_original
if expect_hook
expect(instrumenter).to receive(:hook_method).and_call_original
end
# Events can be batched, meaning +post+ could be called once or twice
# depending on how threads are scheduled by the VM.
expect(component.transport.send(:client)).to receive(:post).at_least(:once) do |env|
Expand Down Expand Up @@ -254,18 +275,29 @@ def do_rc
end
end

context 'method probe received matching a loaded class' do
def assert_received_and_installed
expect(payloads).to be_a(Array)
expect(payloads.length).to eq 2
def assert_received_and_installed
expect(payloads).to be_a(Array)
expect(payloads.length).to eq 2

received_payload = payloads.shift
expect(received_payload).to match(expected_received_payload)
received_payload = payloads.shift
expect(received_payload).to match(expected_received_payload)

installed_payload = payloads.shift
expect(installed_payload).to match(expected_installed_payload)
end
installed_payload = payloads.shift
expect(installed_payload).to match(expected_installed_payload)
end

def assert_received_and_errored
expect(payloads).to be_a(Array)
expect(payloads.length).to eq 2

received_payload = payloads.shift
expect(received_payload).to match(expected_received_payload)

installed_payload = payloads.shift
expect(installed_payload).to match(expected_errored_payload)
end

context 'method probe received matching a loaded class' do
let(:probe_spec) do
{id: '11', name: 'bar', type: 'LOG_PROBE', where: {typeName: 'EverythingFromRemoteConfigSpecTestClass', methodName: 'target_method'}}
end
Expand Down Expand Up @@ -334,4 +366,48 @@ def assert_received_and_installed
end
end
end

context 'line probe' do
with_code_tracking

context 'line probe received targeting loaded code not in code tracker' do
let(:probe_spec) do
{id: '11', name: 'bar', type: 'LOG_PROBE', where: {
sourceFile: 'instrumentation_integration_test_class.rb', lines: [22]
}}
end

before do
begin
Object.send(:remove_const, :InstrumentationIntegrationTestClass)
rescue
nil
end
# Files loaded via 'load' do not get added to $LOADED_FEATURES,
# use 'require'.
# Note that the other tests use 'load' because they want the
# code to always be loaded.
require_relative 'instrumentation_integration_test_class'
expect($LOADED_FEATURES.detect do |path|
File.basename(path) == 'instrumentation_integration_test_class.rb'
end).to be_truthy
component.code_tracker.clear

# We want the probe status to be reported, therefore need to
# disable exception propagation.
settings.dynamic_instrumentation.internal.propagate_all_exceptions = false
end

it 'instruments code and adds probe to installed list' do
expect_lazy_log_many(logger, :debug,
/received probe from RC:/,
/error processing probe configuration:.*File matching probe path.*was loaded and is not in code tracker registry/,)

do_rc(expect_hook: false)
assert_received_and_errored

expect(probe_manager.installed_probes.length).to eq 0
end
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,38 @@ def test_method
a = 21
password = 'password'
redacted = {b: 33, session: 'blah'}
# padding
# padding
# padding
# padding
a * 2 # line 10
end # line 11
# The following condition causes instrumentation trace point callback
# to be invoked multiple times in CircleCI on Ruby 3.0-3.2 and 3.4
#if true || password || redacted
if true
a * 2 # line 10
end
end # line 12

# padding
# padding
# padding

def test_method_with_block
array = [1]
array.each do |value|
value_copy = value
end # line 17
value
end # line 22
end

# padding
# padding
# padding

def test_method_with_conditional
if false
a = 1
else # line 23
else # line 32
a = 2
end # line 25
end # line 34
a
end

end # line 29
end # line 38

# Comment - line 31
# Comment - line 40
Loading

0 comments on commit 6cdf92e

Please sign in to comment.