|
| 1 | +=begin |
| 2 | +
|
| 3 | +This benchmark verifies that the rate limits used for dynamic instrumentation |
| 4 | +probes are attainable. |
| 5 | +
|
| 6 | +Each benchmark performs as many operations as the rate limit permits - |
| 7 | +5000 for a basic probe and 1 for enriched probe. If the benchmark |
| 8 | +produces a rate of fewer than 1 "instructions" per second, the rate limit is |
| 9 | +not being reached. A result of more than 1 "instruction" per second |
| 10 | +means the rate limit is being reached. |
| 11 | +
|
| 12 | +Note that the number of "instructions per second" reported by benchmark/ips |
| 13 | +does not reflect how many times the instrumentation creates a snapshot - |
| 14 | +there can (and normally are) invocations of the target method that do not |
| 15 | +produce DI snapshots due to rate limit but these invocations are counted in |
| 16 | +the "instructions per second" reported by benchmark/ips. |
| 17 | +
|
| 18 | +The default dynamic instrumentation settings for the probe notifier worker |
| 19 | +(queue capacity of 100 and minimum send interval of 3 seconds) mean an |
| 20 | +effective rate limit of 30 snapshots per second for basic probes, |
| 21 | +which is shared across all probes in the application, which is significantly |
| 22 | +below the 5000 snapshots per second per probe that DI is theoretically |
| 23 | +supposed to achieve. However, to increase actual attainable snapshot rate |
| 24 | +to 5000/second, the probe notifier worker needs to be changed to send |
| 25 | +multiple network requests for a single queue processing run or be more |
| 26 | +aggressive in flushing the snapshots to the network when the queue is getting |
| 27 | +full. In either case care needs to be taken not to starve customer applications |
| 28 | +of CPU. |
| 29 | +
|
| 30 | +=end |
| 31 | + |
| 32 | +# Used to quickly run benchmark under RSpec as part of the usual test suite, to validate it didn't bitrot |
| 33 | +VALIDATE_BENCHMARK_MODE = ENV['VALIDATE_BENCHMARK'] == 'true' |
| 34 | + |
| 35 | +return unless __FILE__ == $PROGRAM_NAME || VALIDATE_BENCHMARK_MODE |
| 36 | + |
| 37 | +require 'benchmark/ips' |
| 38 | +require 'datadog' |
| 39 | +require 'webrick' |
| 40 | + |
| 41 | +class DISnapshotBenchmark |
| 42 | + # If we are validating the benchmark a single operation is sufficient. |
| 43 | + BASIC_RATE_LIMIT = VALIDATE_BENCHMARK_MODE ? 1 : 5000 |
| 44 | + ENRICHED_RATE_LIMIT = 1 |
| 45 | + |
| 46 | + def initialize |
| 47 | + |
| 48 | + Datadog::DI.activate_tracking! |
| 49 | + |
| 50 | + Datadog.configure do |c| |
| 51 | + c.remote.enabled = true |
| 52 | + c.dynamic_instrumentation.enabled = true |
| 53 | + c.dynamic_instrumentation.internal.development = true |
| 54 | + |
| 55 | + # Increase queue capacity and reduce min send interval |
| 56 | + # to be able to send more snapshots out. |
| 57 | + # The default settings will result in dropped snapshots |
| 58 | + # way before non-enriched probe rate limit is reached. |
| 59 | + c.dynamic_instrumentation.internal.snapshot_queue_capacity = 10000 |
| 60 | + c.dynamic_instrumentation.internal.min_send_interval = 1 |
| 61 | + end |
| 62 | + |
| 63 | + Thread.new do |
| 64 | + # If there is an actual Datadog agent running locally, the server |
| 65 | + # used in this benchmark will fail to start. |
| 66 | + # Using an actual Datadog agent instead of the fake server should not |
| 67 | + # affect the indication of whether the rate limit is reachable |
| 68 | + # since the agent shouldn't take longer to process than the fake |
| 69 | + # web server (and the agent should also run on another core), |
| 70 | + # however using a real agent would forego reports of the number of |
| 71 | + # snapshots submitted and their size. |
| 72 | + server.start |
| 73 | + end |
| 74 | + |
| 75 | + require_relative 'support/di_snapshot_target' |
| 76 | + end |
| 77 | + |
| 78 | + def run_benchmark |
| 79 | + probe = Datadog::DI::Probe.new( |
| 80 | + id: 1, type: :log, |
| 81 | + type_name: 'DISnapshotTarget', method_name: 'test_method', |
| 82 | + rate_limit: BASIC_RATE_LIMIT, |
| 83 | + ) |
| 84 | + |
| 85 | + unless probe_manager.add_probe(probe) |
| 86 | + raise "Failed to instrument method (without snapshot capture)" |
| 87 | + end |
| 88 | + |
| 89 | + @received_snapshot_count = 0 |
| 90 | + @received_snapshot_bytes = 0 |
| 91 | + |
| 92 | + Benchmark.ips do |x| |
| 93 | + benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 } |
| 94 | + x.config( |
| 95 | + **benchmark_time, |
| 96 | + ) |
| 97 | + |
| 98 | + x.report('method probe - basic') do |
| 99 | + BASIC_RATE_LIMIT.times do |
| 100 | + DISnapshotTarget.new.test_method |
| 101 | + end |
| 102 | + Datadog::DI.component.probe_notifier_worker.flush |
| 103 | + end |
| 104 | + |
| 105 | + x.save! 'di-snapshot-results.json' unless VALIDATE_BENCHMARK_MODE |
| 106 | + x.compare! |
| 107 | + end |
| 108 | + |
| 109 | + # DI does not provide an API to remove a specific probe because |
| 110 | + # this functionality is currently not needed by the product. |
| 111 | + probe_manager.remove_other_probes([]) |
| 112 | + |
| 113 | + puts "Received #{@received_snapshot_count} snapshots, #{@received_snapshot_bytes} bytes total" |
| 114 | + |
| 115 | + probe = Datadog::DI::Probe.new( |
| 116 | + id: 1, type: :log, |
| 117 | + type_name: 'DISnapshotTarget', method_name: 'test_method', |
| 118 | + capture_snapshot: true, |
| 119 | + # Normally rate limit for enriched probes is 1. |
| 120 | + # To get a meaningful number of submissions, increase it to 20. |
| 121 | + # We should get about 200 snapshots in the 10 seconds that the |
| 122 | + # benchmark is supposed to run. |
| 123 | + rate_limit: ENRICHED_RATE_LIMIT, |
| 124 | + ) |
| 125 | + |
| 126 | + unless probe_manager.add_probe(probe) |
| 127 | + raise "Failed to instrument method (with snapshot capture)" |
| 128 | + end |
| 129 | + |
| 130 | + @received_snapshot_count = 0 |
| 131 | + @received_snapshot_bytes = 0 |
| 132 | + |
| 133 | + Benchmark.ips do |x| |
| 134 | + benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 } |
| 135 | + x.config( |
| 136 | + **benchmark_time, |
| 137 | + ) |
| 138 | + |
| 139 | + x.report('method probe - enriched') do |
| 140 | + ENRICHED_RATE_LIMIT.times do |
| 141 | + DISnapshotTarget.new.test_method |
| 142 | + end |
| 143 | + Datadog::DI.component.probe_notifier_worker.flush |
| 144 | + end |
| 145 | + |
| 146 | + x.save! 'di-snapshot-results.json' unless VALIDATE_BENCHMARK_MODE |
| 147 | + x.compare! |
| 148 | + end |
| 149 | + |
| 150 | + probe_manager.remove_other_probes([]) |
| 151 | + |
| 152 | + puts "Received #{@received_snapshot_count} snapshots, #{@received_snapshot_bytes} bytes total" |
| 153 | + |
| 154 | + probe = Datadog::DI::Probe.new( |
| 155 | + id: 1, type: :log, |
| 156 | + file: 'di_snapshot_target.rb', line_no: 20, |
| 157 | + capture_snapshot: false, |
| 158 | + rate_limit: BASIC_RATE_LIMIT, |
| 159 | + ) |
| 160 | + |
| 161 | + unless probe_manager.add_probe(probe) |
| 162 | + raise "Failed to instrument line (with snapshot capture)" |
| 163 | + end |
| 164 | + |
| 165 | + @received_snapshot_count = 0 |
| 166 | + @received_snapshot_bytes = 0 |
| 167 | + |
| 168 | + Benchmark.ips do |x| |
| 169 | + benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 } |
| 170 | + x.config( |
| 171 | + **benchmark_time, |
| 172 | + ) |
| 173 | + |
| 174 | + x.report('line probe - basic') do |
| 175 | + BASIC_RATE_LIMIT.times do |
| 176 | + DISnapshotTarget.new.test_method |
| 177 | + end |
| 178 | + Datadog::DI.component.probe_notifier_worker.flush |
| 179 | + end |
| 180 | + |
| 181 | + x.save! 'di-snapshot-results.json' unless VALIDATE_BENCHMARK_MODE |
| 182 | + x.compare! |
| 183 | + end |
| 184 | + |
| 185 | + probe_manager.remove_other_probes([]) |
| 186 | + |
| 187 | + puts "Received #{@received_snapshot_count} snapshots, #{@received_snapshot_bytes} bytes total" |
| 188 | + |
| 189 | + probe = Datadog::DI::Probe.new( |
| 190 | + id: 1, type: :log, |
| 191 | + file: 'di_snapshot_target.rb', line_no: 20, |
| 192 | + capture_snapshot: true, |
| 193 | + rate_limit: ENRICHED_RATE_LIMIT, |
| 194 | + ) |
| 195 | + |
| 196 | + unless probe_manager.add_probe(probe) |
| 197 | + raise "Failed to instrument line (with snapshot capture)" |
| 198 | + end |
| 199 | + |
| 200 | + @received_snapshot_count = 0 |
| 201 | + @received_snapshot_bytes = 0 |
| 202 | + |
| 203 | + Benchmark.ips do |x| |
| 204 | + benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 } |
| 205 | + x.config( |
| 206 | + **benchmark_time, |
| 207 | + ) |
| 208 | + |
| 209 | + x.report('line probe - enriched') do |
| 210 | + ENRICHED_RATE_LIMIT.times do |
| 211 | + DISnapshotTarget.new.test_method |
| 212 | + end |
| 213 | + Datadog::DI.component.probe_notifier_worker.flush |
| 214 | + end |
| 215 | + |
| 216 | + x.save! 'di-snapshot-results.json' unless VALIDATE_BENCHMARK_MODE |
| 217 | + x.compare! |
| 218 | + end |
| 219 | + |
| 220 | + probe_manager.remove_other_probes([]) |
| 221 | + |
| 222 | + puts "Received #{@received_snapshot_count} snapshots, #{@received_snapshot_bytes} bytes total" |
| 223 | + end |
| 224 | + |
| 225 | + private |
| 226 | + |
| 227 | + def probe_manager |
| 228 | + Datadog::DI.component.probe_manager |
| 229 | + end |
| 230 | + |
| 231 | + def server |
| 232 | + WEBrick::HTTPServer.new( |
| 233 | + Port: 8126, |
| 234 | + ).tap do |server| |
| 235 | + @received_snapshot_count = 0 |
| 236 | + @received_snapshot_bytes = 0 |
| 237 | + |
| 238 | + server.mount_proc('/debugger/v1/diagnostics') do |req, res| |
| 239 | + # This request is a multipart form post |
| 240 | + end |
| 241 | + |
| 242 | + server.mount_proc('/debugger/v1/input') do |req, res| |
| 243 | + payload = JSON.parse(req.body) |
| 244 | + @received_snapshot_count += payload.length |
| 245 | + @received_snapshot_bytes += req.body.length |
| 246 | + end |
| 247 | + end |
| 248 | + end |
| 249 | + |
| 250 | + attr_reader :received_snapshot_count |
| 251 | +end |
| 252 | + |
| 253 | +puts "Current pid is #{Process.pid}" |
| 254 | + |
| 255 | +DISnapshotBenchmark.new.instance_exec do |
| 256 | + run_benchmark |
| 257 | +end |
0 commit comments