Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEBUG-3317 report probe status errored if probe fails to instrument #4301

Merged
merged 14 commits into from
Jan 27, 2025
2 changes: 2 additions & 0 deletions lib/datadog/di/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def initialize(settings, agent_settings, logger, code_tracker: nil, telemetry: n
@agent_settings = agent_settings
@logger = logger
@telemetry = telemetry
@code_tracker = code_tracker
@redactor = Redactor.new(settings)
@serializer = Serializer.new(settings, redactor, telemetry: telemetry)
@instrumenter = Instrumenter.new(settings, serializer, logger, code_tracker: code_tracker, telemetry: telemetry)
Expand All @@ -90,6 +91,7 @@ def initialize(settings, agent_settings, logger, code_tracker: nil, telemetry: n
attr_reader :agent_settings
attr_reader :logger
attr_reader :telemetry
attr_reader :code_tracker
attr_reader :instrumenter
attr_reader :transport
attr_reader :probe_notifier_worker
Expand Down
6 changes: 6 additions & 0 deletions lib/datadog/di/probe_notification_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def build_emitting(probe)
status: 'EMITTING',)
end

def build_errored(probe, exc)
build_status(probe,
message: "Instrumentation for probe #{probe.id} failed: #{exc}",
status: 'ERROR',)
end

# Duration is in seconds.
def build_executed(probe,
trace_point: nil, rv: nil, duration: nil, caller_locations: nil,
Expand Down
31 changes: 26 additions & 5 deletions lib/datadog/di/remote.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,28 +49,48 @@ def receivers(telemetry)
begin
probe_spec = parse_content(content)
probe = ProbeBuilder.build_from_remote_config(probe_spec)
payload = component.probe_notification_builder.build_received(probe)
component.probe_notifier_worker.add_status(payload)
probe_notification_builder = component.probe_notification_builder
payload = probe_notification_builder.build_received(probe)
probe_notifier_worker = component.probe_notifier_worker
probe_notifier_worker.add_status(payload)
component.logger.debug { "di: received probe from RC: #{probe.type} #{probe.location}" }

begin
# TODO test exception capture
probe_manager.add_probe(probe)
content.applied
rescue DI::Error::DITargetNotInRegistry => exc
component.telemetry&.report(exc, description: "Line probe is targeting a loaded file that is not in code tracker")

payload = probe_notification_builder.build_errored(probe, exc)
probe_notifier_worker.add_status(payload)

# If a probe fails to install, we will mark the content
# as errored. On subsequent remote configuration application
# attemps, probe manager will raise the "previously errored"
# exception and we'll rescue it here, again marking the
# content as errored but with a somewhat different exception
# message.
# TODO assert content state (errored for this example)
content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}")
rescue => exc
raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions

component.logger.debug { "di: unhandled exception adding probe in DI remote receiver: #{exc.class}: #{exc}" }
component.telemetry&.report(exc, description: "Unhandled exception adding probe in DI remote receiver")

# TODO test this path
payload = probe_notification_builder.build_errored(probe, exc)
probe_notifier_worker.add_status(payload)

# If a probe fails to install, we will mark the content
# as errored. On subsequent remote configuration application
# attemps, probe manager will raise the "previously errored"
# exception and we'll rescue it here, again marking the
# content as errored but with a somewhat different exception
# message.
# TODO stack trace must be redacted or not sent at all
content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}: #{Array(exc.backtrace).join("\n")}")
# TODO assert content state (errored for this example)
content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}")
end

# Important: even if processing fails for this probe config,
Expand All @@ -84,7 +104,8 @@ def receivers(telemetry)
component.logger.debug { "di: unhandled exception handling probe in DI remote receiver: #{exc.class}: #{exc}" }
component.telemetry&.report(exc, description: "Unhandled exception handling probe in DI remote receiver")

content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}: #{Array(exc.backtrace).join("\n")}")
# TODO assert content state (errored for this example)
content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}")
end
end
end
Expand Down
26 changes: 15 additions & 11 deletions sig/datadog/di/component.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@ module Datadog

@telemetry: untyped

@redactor: untyped
@redactor: Redactor

@serializer: untyped
@serializer: Serializer

@instrumenter: untyped
@instrumenter: Instrumenter

@code_tracker: CodeTracker

@transport: untyped

@probe_notifier_worker: untyped
@probe_notifier_worker: ProbeNotifierWorker

@probe_notification_builder: untyped
@probe_notification_builder: ProbeNotificationBuilder

@probe_manager: untyped
@probe_manager: ProbeManager

def self.build: (untyped settings, untyped agent_settings, Core::Logger logger, ?telemetry: untyped?) -> (nil | untyped)

Expand All @@ -38,17 +40,19 @@ module Datadog

attr_reader telemetry: untyped

attr_reader instrumenter: untyped
attr_reader code_tracker: CodeTracker

attr_reader instrumenter: Instrumenter

attr_reader transport: untyped

attr_reader probe_notifier_worker: untyped
attr_reader probe_notifier_worker: ProbeNotifierWorker

attr_reader probe_notification_builder: untyped
attr_reader probe_notification_builder: ProbeNotificationBuilder

attr_reader probe_manager: untyped
attr_reader probe_manager: ProbeManager

attr_reader redactor: untyped
attr_reader redactor: Redactor

attr_reader serializer: untyped
def shutdown!: (?untyped? replacement) -> untyped
Expand Down
2 changes: 2 additions & 0 deletions sig/datadog/di/probe_notification_builder.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ module Datadog

def build_emitting: (Probe probe) -> Hash[Symbol,untyped]

def build_errored: (Probe probe, Exception exception) -> Hash[Symbol,untyped]

def build_executed: (Probe probe, ?trace_point: TracePoint, ?rv: untyped?, ?duration: Float, ?caller_locations: Array[untyped], ?args: untyped?, ?kwargs: untyped?, ?serialized_entry_args: untyped?) -> Hash[Symbol,untyped]

def build_snapshot: (Probe probe, ?rv: untyped?, ?snapshot: untyped?, ?path: String?, ?duration: Float, ?caller_locations: Array[untyped], ?args: untyped?, ?kwargs: untyped?, ?serialized_entry_args: untyped?) -> Hash[Symbol,untyped]
Expand Down
3 changes: 3 additions & 0 deletions sig/datadog/di/probe_notifier_worker.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ module Datadog
def start: () -> void
def stop: (?::Integer timeout) -> void
def flush: () -> void

def add_status: (Hash[Symbol, untyped]) -> void
def add_snapshot: (Hash[Symbol, untyped]) -> void

private

Expand Down
6 changes: 6 additions & 0 deletions spec/datadog/core/configuration/components_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@
settings.dynamic_instrumentation.enabled = true
end

after do
# Shutdown DI if present because it creates a background thread.
# On JRuby DI is not present.
components.dynamic_instrumentation&.shutdown!
end

context 'MRI' do
before(:all) do
skip 'Test requires MRI' if PlatformHelpers.jruby?
Expand Down
98 changes: 87 additions & 11 deletions spec/datadog/di/integration/everything_from_remote_config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,25 @@ def target_method
}
end

let(:expected_errored_payload) do
{
path: '/debugger/v1/diagnostics',
ddsource: 'dd_debugger',
debugger: {
diagnostics: {
parentId: nil,
probeId: '11',
probeVersion: 0,
runtimeId: LOWERCASE_UUID_REGEXP,
status: 'ERROR',
},
},
message: /Instrumentation for probe 11 failed: File matching probe path \(instrumentation_integration_test_class.rb\) was loaded and is not in code tracker registry:/,
service: 'rspec',
timestamp: Integer,
}
end

let(:expected_snapshot_payload) do
{
path: '/debugger/v1/input',
Expand Down Expand Up @@ -211,9 +230,11 @@ def target_method

let(:payloads) { [] }

def do_rc
def do_rc(expect_hook: true)
expect(probe_manager).to receive(:add_probe).and_call_original
expect(instrumenter).to receive(:hook_method).and_call_original
if expect_hook
expect(instrumenter).to receive(:hook_method).and_call_original
end
# Events can be batched, meaning +post+ could be called once or twice
# depending on how threads are scheduled by the VM.
expect(component.transport.send(:client)).to receive(:post).at_least(:once) do |env|
Expand Down Expand Up @@ -254,18 +275,29 @@ def do_rc
end
end

context 'method probe received matching a loaded class' do
def assert_received_and_installed
expect(payloads).to be_a(Array)
expect(payloads.length).to eq 2
def assert_received_and_installed
expect(payloads).to be_a(Array)
expect(payloads.length).to eq 2

received_payload = payloads.shift
expect(received_payload).to match(expected_received_payload)
received_payload = payloads.shift
expect(received_payload).to match(expected_received_payload)

installed_payload = payloads.shift
expect(installed_payload).to match(expected_installed_payload)
end
installed_payload = payloads.shift
expect(installed_payload).to match(expected_installed_payload)
end

def assert_received_and_errored
expect(payloads).to be_a(Array)
expect(payloads.length).to eq 2

received_payload = payloads.shift
expect(received_payload).to match(expected_received_payload)

installed_payload = payloads.shift
expect(installed_payload).to match(expected_errored_payload)
end

context 'method probe received matching a loaded class' do
let(:probe_spec) do
{id: '11', name: 'bar', type: 'LOG_PROBE', where: {typeName: 'EverythingFromRemoteConfigSpecTestClass', methodName: 'target_method'}}
end
Expand Down Expand Up @@ -334,4 +366,48 @@ def assert_received_and_installed
end
end
end

context 'line probe' do
with_code_tracking

context 'line probe received targeting loaded code not in code tracker' do
let(:probe_spec) do
{id: '11', name: 'bar', type: 'LOG_PROBE', where: {
sourceFile: 'instrumentation_integration_test_class.rb', lines: [22]
}}
end

before do
begin
Object.send(:remove_const, :InstrumentationIntegrationTestClass)
rescue
nil
end
# Files loaded via 'load' do not get added to $LOADED_FEATURES,
# use 'require'.
# Note that the other tests use 'load' because they want the
# code to always be loaded.
require_relative 'instrumentation_integration_test_class'
expect($LOADED_FEATURES.detect do |path|
File.basename(path) == 'instrumentation_integration_test_class.rb'
end).to be_truthy
component.code_tracker.clear

# We want the probe status to be reported, therefore need to
# disable exception propagation.
settings.dynamic_instrumentation.internal.propagate_all_exceptions = false
end

it 'instruments code and adds probe to installed list' do
expect_lazy_log_many(logger, :debug,
/received probe from RC:/,
/error processing probe configuration:.*File matching probe path.*was loaded and is not in code tracker registry/,)

do_rc(expect_hook: false)
assert_received_and_errored

expect(probe_manager.installed_probes.length).to eq 0
end
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,38 @@ def test_method
a = 21
password = 'password'
redacted = {b: 33, session: 'blah'}
# padding
# padding
# padding
# padding
a * 2 # line 10
end # line 11
# The following condition causes instrumentation trace point callback
# to be invoked multiple times in CircleCI on Ruby 3.0-3.2 and 3.4
#if true || password || redacted
if true
a * 2 # line 10
end
end # line 12

# padding
# padding
# padding

def test_method_with_block
array = [1]
array.each do |value|
value_copy = value
end # line 17
value
end # line 22
end

# padding
# padding
# padding

def test_method_with_conditional
if false
a = 1
else # line 23
else # line 32
a = 2
end # line 25
end # line 34
a
end

end # line 29
end # line 38

# Comment - line 31
# Comment - line 40
Loading
Loading