Skip to content

Commit e13fd30

Browse files
authored
fix(statsd): Measure time in nanos (#4495)
A lot of our instrumented code implicitly assumes sub-millisecond precision, but we round all timer metrics to milliseconds. This becomes a problem especially when computing the sum over a large number of sub-millisecond time frames (e.g. envelope buffer busy time).
1 parent bb90af7 commit e13fd30

File tree

3 files changed

+59
-24
lines changed

3 files changed

+59
-24
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
- Add data categories for LogItem and LogByte. ([#4455](https://github.com/getsentry/relay/pull/4455))
2525
- Add option to drop transaction attachments. ([#4466](https://github.com/getsentry/relay/pull/4466))
26+
- Add sub-millisecond precision to internal timer metrics. ([#4495](https://github.com/getsentry/relay/pull/4495))
2627

2728
## 25.1.0
2829

relay-server/src/middlewares/body_timing.rs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,11 @@ mod tests {
170170
}
171171
}
172172

173+
fn captures_match(captures: Vec<String>, expected: &str) -> bool {
174+
let (head, tail) = expected.split_once("|h").unwrap();
175+
captures.len() == 1 && captures[0].starts_with(head) && captures[0].ends_with(tail)
176+
}
177+
173178
#[test]
174179
fn test_empty_body() {
175180
let captures = with_capturing_test_client(|| {
@@ -182,10 +187,10 @@ mod tests {
182187

183188
let _ = pinned.poll_frame(&mut cx);
184189
});
185-
assert_eq!(
190+
assert!(captures_match(
186191
captures,
187-
["requests.body_read.duration:0|ms|#route:unknown,size:lt1KB,status:completed"]
188-
);
192+
"requests.body_read.duration:0|h|#route:unknown,size:lt1KB,status:completed"
193+
));
189194
}
190195

191196
#[test]
@@ -201,10 +206,10 @@ mod tests {
201206
let _ = pinned.as_mut().poll_frame(&mut cx);
202207
let _ = pinned.as_mut().poll_frame(&mut cx);
203208
});
204-
assert_eq!(
209+
assert!(captures_match(
205210
captures,
206-
["requests.body_read.duration:0|ms|#route:unknown,size:lt1KB,status:completed"]
207-
);
211+
"requests.body_read.duration:0|h|#route:unknown,size:lt1KB,status:completed"
212+
));
208213
}
209214

210215
#[test]
@@ -219,10 +224,11 @@ mod tests {
219224

220225
let _ = pinned.poll_frame(&mut cx);
221226
});
222-
assert_eq!(
227+
228+
assert!(captures_match(
223229
captures,
224-
["requests.body_read.duration:0|ms|#route:unknown,size:lt1KB,status:dropped"]
225-
)
230+
"requests.body_read.duration:0|h|#route:unknown,size:lt1KB,status:dropped"
231+
))
226232
}
227233

228234
#[test]
@@ -246,10 +252,10 @@ mod tests {
246252
let pinned = Pin::new(&mut timed_body);
247253
let _ = pinned.poll_frame(&mut cx);
248254
});
249-
assert_eq!(
255+
assert!(captures_match(
250256
captures,
251-
["requests.body_read.duration:0|ms|#route:unknown,size:lt1KB,status:failed"]
252-
)
257+
"requests.body_read.duration:0|h|#route:unknown,size:lt1KB,status:failed"
258+
))
253259
}
254260

255261
#[test]
@@ -266,10 +272,10 @@ mod tests {
266272
let mut pinned = Pin::new(&mut timed_body);
267273
while let Poll::Ready(Some(Ok(_))) = pinned.as_mut().poll_frame(&mut cx) {}
268274
});
269-
assert_eq!(
275+
assert!(captures_match(
270276
captures,
271-
["requests.body_read.duration:0|ms|#route:unknown,size:lt10KB,status:completed"]
272-
)
277+
"requests.body_read.duration:0|h|#route:unknown,size:lt10KB,status:completed"
278+
))
273279
}
274280

275281
#[test]

relay-statsd/src/lib.rs

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,9 @@ macro_rules! metric {
604604
$crate::with_client(|client| {
605605
use $crate::_pred::*;
606606
client.send_metric(
607-
client.time_with_tags(&$crate::TimerMetric::name(&$id), $value)
607+
// NOTE: cadence histograms support Duration out of the box and converts it to nanos,
608+
// but we want milliseconds for historical reasons.
609+
client.histogram_with_tags(&$crate::TimerMetric::name(&$id), $value.as_nanos() as f64 / 1e6)
608610
$(.with_tag(stringify!($k), $v))*
609611
)
610612
})
@@ -614,22 +616,21 @@ macro_rules! metric {
614616
(timer($id:expr), $($k:ident = $v:expr,)* $block:block) => {{
615617
let now = std::time::Instant::now();
616618
let rv = {$block};
617-
$crate::with_client(|client| {
618-
use $crate::_pred::*;
619-
client.send_metric(
620-
client.time_with_tags(&$crate::TimerMetric::name(&$id), now.elapsed())
621-
$(.with_tag(stringify!($k), $v))*
622-
)
623-
});
619+
$crate::metric!(timer($id) = now.elapsed() $(, $k = $v)*);
624620
rv
625621
}};
626622
}
627623

628624
#[cfg(test)]
629625
mod tests {
626+
use std::time::Duration;
627+
630628
use cadence::{NopMetricSink, StatsdClient};
631629

632-
use crate::{set_client, with_capturing_test_client, with_client, GaugeMetric, MetricsClient};
630+
use crate::{
631+
set_client, with_capturing_test_client, with_client, GaugeMetric, MetricsClient,
632+
TimerMetric,
633+
};
633634

634635
enum TestGauges {
635636
Foo,
@@ -683,4 +684,31 @@ mod tests {
683684
// After setting the global client,the current client must change:
684685
assert_ne!(client1, client2);
685686
}
687+
688+
struct TestTimer;
689+
690+
impl TimerMetric for TestTimer {
691+
fn name(&self) -> &'static str {
692+
"timer"
693+
}
694+
}
695+
696+
#[test]
697+
fn nanos_rounding_error() {
698+
let one_day = Duration::from_secs(60 * 60 * 24);
699+
let captures = with_capturing_test_client(|| {
700+
metric!(timer(TestTimer) = one_day + Duration::from_nanos(1),);
701+
});
702+
703+
// for "short" durations, precision is preserved:
704+
assert_eq!(captures, ["timer:86400000.000001|h"]); // h is for histogram, not hours
705+
706+
let one_year = Duration::from_secs(60 * 60 * 24 * 365);
707+
let captures = with_capturing_test_client(|| {
708+
metric!(timer(TestTimer) = one_year + Duration::from_nanos(1),);
709+
});
710+
711+
// for very long durations, precision is lost:
712+
assert_eq!(captures, ["timer:31536000000|h"]);
713+
}
686714
}

0 commit comments

Comments
 (0)