chore(llm-monitoring): Extract AI data from data instead of measureme…

…nts (#3630) We need to start porting things over to data, since `measurements` is being deprecated and will be removed from the APIs. For backcompat, this goes data -> measurement -> metric
getsentry · May 22, 2024 · b61cba2 · b61cba2
1 parent 1c1be35
commit b61cba2
Show file tree

Hide file tree

Showing 6 changed files with 141 additions and 24 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@
 **Internal**:
 
 - Send microsecond precision timestamps. ([#3613](https://github.com/getsentry/relay/pull/3613))
+- Pull AI token counts from the 'data' section as well. ([#3630](https://github.com/getsentry/relay/pull/3630))
 - Map outcome reasons for dynamic sampling to reduced set of values. ([#3623](https://github.com/getsentry/relay/pull/3623))
 - Extract status for spans. ([#3606](https://github.com/getsentry/relay/pull/3606))
 - Forward `received_at` timestamp for buckets sent to Kafka. ([#3561](https://github.com/getsentry/relay/pull/3561))

diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs
@@ -643,20 +643,46 @@ pub fn extract_tags(
     span_tags
 }
 
+fn value_to_f64(val: Option<&Value>) -> Option<f64> {
+    match val {
+        Some(Value::F64(f)) => Some(*f),
+        Some(Value::I64(i)) => Some(*i as f64),
+        Some(Value::U64(u)) => Some(*u as f64),
+        _ => None,
+    }
+}
+
 /// Copies specific numeric values from span data to span measurements.
 pub fn extract_measurements(span: &mut Span, is_mobile: bool) {
     let Some(span_op) = span.op.as_str() else {
         return;
     };
 
+    if span_op.starts_with("ai.") {
+        if let Some(data) = span.data.value() {
+            for (field, key) in [
+                (&data.ai_total_tokens_used, "ai_total_tokens_used"),
+                (&data.ai_completion_tokens_used, "ai_completion_tokens_used"),
+                (&data.ai_prompt_tokens_used, "ai_prompt_tokens_used"),
+            ] {
+                if let Some(value) = value_to_f64(field.value()) {
+                    let measurements = span.measurements.get_or_insert_with(Default::default);
+                    measurements.insert(
+                        key.into(),
+                        Measurement {
+                            value: value.into(),
+                            unit: MetricUnit::None.into(),
+                        }
+                        .into(),
+                    );
+                }
+            }
+        }
+    }
+
     if span_op.starts_with("cache.") {
         if let Some(data) = span.data.value() {
-            if let Some(value) = match &data.cache_item_size.value() {
-                Some(Value::F64(f)) => Some(*f),
-                Some(Value::I64(i)) => Some(*i as f64),
-                Some(Value::U64(u)) => Some(*u as f64),
-                _ => None,
-            } {
+            if let Some(value) = value_to_f64(data.cache_item_size.value()) {
                 let measurements = span.measurements.get_or_insert_with(Default::default);
                 measurements.insert(
                     "cache.item_size".to_owned(),
@@ -686,12 +712,7 @@ pub fn extract_measurements(span: &mut Span, is_mobile: bool) {
                     "http.response_transfer_size",
                 ),
             ] {
-                if let Some(value) = match field.value() {
-                    Some(Value::F64(f)) => Some(*f),
-                    Some(Value::I64(i)) => Some(*i as f64),
-                    Some(Value::U64(u)) => Some(*u as f64),
-                    _ => None,
-                } {
+                if let Some(value) = value_to_f64(field.value()) {
                     let measurements = span.measurements.get_or_insert_with(Default::default);
                     measurements.insert(
                         key.into(),
@@ -722,12 +743,7 @@ pub fn extract_measurements(span: &mut Span, is_mobile: bool) {
                     "messaging.message.body.size",
                 ),
             ] {
-                if let Some(value) = match field.value() {
-                    Some(Value::F64(f)) => Some(*f),
-                    Some(Value::I64(i)) => Some(*i as f64),
-                    Some(Value::U64(u)) => Some(*u as f64),
-                    _ => None,
-                } {
+                if let Some(value) = value_to_f64(field.value()) {
                     let measurements = span.measurements.get_or_insert_with(Default::default);
                     measurements.insert(
                         key.into(),
@@ -754,12 +770,7 @@ pub fn extract_measurements(span: &mut Span, is_mobile: bool) {
                     MetricUnit::Duration(DurationUnit::Second),
                 ),
             ] {
-                if let Some(value) = match field.value() {
-                    Some(Value::F64(f)) => Some(*f),
-                    Some(Value::I64(i)) => Some(*i as f64),
-                    Some(Value::U64(u)) => Some(*u as f64),
-                    _ => None,
-                } {
+                if let Some(value) = value_to_f64(field.value()) {
                     let measurements = span.measurements.get_or_insert_with(Default::default);
                     measurements.insert(
                         key.into(),
@@ -1463,6 +1474,75 @@ LIMIT 1
         assert!(!tags_3.contains_key("raw_domain"));
     }
 
+    #[test]
+    fn test_ai_extraction() {
+        let json = r#"
+            {
+                "spans": [
+                    {
+                        "timestamp": 1694732408.3145,
+                        "start_timestamp": 1694732407.8367,
+                        "exclusive_time": 477.800131,
+                        "description": "OpenAI Chat Completion",
+                        "op": "ai.chat_completions.openai",
+                        "span_id": "97c0ef9770a02f9d",
+                        "parent_span_id": "9756d8d7b2b364ff",
+                        "trace_id": "77aeb1c16bb544a4a39b8d42944947a3",
+                        "data": {
+                            "ai.total_tokens.used": 300,
+                            "ai.completion_tokens.used": 200,
+                            "ai.prompt_tokens.used": 100,
+                            "ai.streaming": true,
+                            "ai.pipeline.name": "My AI pipeline"
+                        },
+                        "hash": "e2fae740cccd3781"
+                    }
+                ]
+            }
+        "#;
+
+        let mut event = Annotated::<Event>::from_json(json)
+            .unwrap()
+            .into_value()
+            .unwrap();
+
+        extract_span_tags_from_event(&mut event, 200);
+
+        let span = &event
+            .spans
+            .value()
+            .unwrap()
+            .first()
+            .unwrap()
+            .value()
+            .unwrap();
+        let tags = span.sentry_tags.value().unwrap();
+        let measurements = span.measurements.value().unwrap();
+
+        assert_eq!(
+            tags.get("ai_pipeline_group").unwrap().as_str(),
+            Some("68e6cafc5b68d276")
+        );
+        assert_debug_snapshot!(measurements, @r###"
+        Measurements(
+            {
+                "ai_completion_tokens_used": Measurement {
+                    value: 200.0,
+                    unit: None,
+                },
+                "ai_prompt_tokens_used": Measurement {
+                    value: 100.0,
+                    unit: None,
+                },
+                "ai_total_tokens_used": Measurement {
+                    value: 300.0,
+                    unit: None,
+                },
+            },
+        )
+        "###);
+    }
+
     #[test]
     fn test_cache_extraction() {
         let json = r#"

diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs
@@ -178,6 +178,18 @@ pub struct SpanData {
     #[metastructure(field = "app_start_type")] // TODO: no dot?
     pub app_start_type: Annotated<Value>,
 
+    /// The total tokens that were used by an LLM call
+    #[metastructure(field = "ai.total_tokens.used")]
+    pub ai_total_tokens_used: Annotated<Value>,
+
+    /// The input tokens used by an LLM call (usually cheaper than output tokens)
+    #[metastructure(field = "ai.prompt_tokens.used")]
+    pub ai_prompt_tokens_used: Annotated<Value>,
+
+    /// The output tokens used by an LLM call (the ones the LLM actually generated)
+    #[metastructure(field = "ai.completion_tokens.used")]
+    pub ai_completion_tokens_used: Annotated<Value>,
+
     /// The client's browser name.
     #[metastructure(field = "browser.name")]
     pub browser_name: Annotated<String>,
@@ -604,6 +616,9 @@ mod tests {
         insta::assert_debug_snapshot!(data, @r###"
         SpanData {
             app_start_type: ~,
+            ai_total_tokens_used: ~,
+            ai_prompt_tokens_used: ~,
+            ai_completion_tokens_used: ~,
             browser_name: ~,
             code_filepath: String(
                 "task.py",

diff --git a/relay-event-schema/src/protocol/span/convert.rs b/relay-event-schema/src/protocol/span/convert.rs
@@ -269,6 +269,9 @@ mod tests {
             ),
             data: SpanData {
                 app_start_type: ~,
+                ai_total_tokens_used: ~,
+                ai_prompt_tokens_used: ~,
+                ai_completion_tokens_used: ~,
                 browser_name: "Chrome",
                 code_filepath: ~,
                 code_lineno: ~,

diff --git a/...napshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap b/...napshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap
@@ -77,6 +77,9 @@ expression: "(&event.value().unwrap().spans, metrics)"
             profile_id: ~,
             data: SpanData {
                 app_start_type: ~,
+                ai_total_tokens_used: ~,
+                ai_prompt_tokens_used: ~,
+                ai_completion_tokens_used: ~,
                 browser_name: ~,
                 code_filepath: ~,
                 code_lineno: ~,
@@ -395,6 +398,9 @@ expression: "(&event.value().unwrap().spans, metrics)"
                 app_start_type: String(
                     "cold",
                 ),
+                ai_total_tokens_used: ~,
+                ai_prompt_tokens_used: ~,
+                ai_completion_tokens_used: ~,
                 browser_name: ~,
                 code_filepath: ~,
                 code_lineno: ~,
@@ -491,6 +497,9 @@ expression: "(&event.value().unwrap().spans, metrics)"
                 app_start_type: String(
                     "cold",
                 ),
+                ai_total_tokens_used: ~,
+                ai_prompt_tokens_used: ~,
+                ai_completion_tokens_used: ~,
                 browser_name: ~,
                 code_filepath: ~,
                 code_lineno: ~,
@@ -633,6 +642,9 @@ expression: "(&event.value().unwrap().spans, metrics)"
             profile_id: ~,
             data: SpanData {
                 app_start_type: ~,
+                ai_total_tokens_used: ~,
+                ai_prompt_tokens_used: ~,
+                ai_completion_tokens_used: ~,
                 browser_name: ~,
                 code_filepath: ~,
                 code_lineno: ~,
@@ -729,6 +741,9 @@ expression: "(&event.value().unwrap().spans, metrics)"
             profile_id: ~,
             data: SpanData {
                 app_start_type: ~,
+                ai_total_tokens_used: ~,
+                ai_prompt_tokens_used: ~,
+                ai_completion_tokens_used: ~,
                 browser_name: ~,
                 code_filepath: ~,
                 code_lineno: ~,

diff --git a/relay-spans/src/span.rs b/relay-spans/src/span.rs
@@ -625,6 +625,9 @@ mod tests {
             ),
             data: SpanData {
                 app_start_type: ~,
+                ai_total_tokens_used: ~,
+                ai_prompt_tokens_used: ~,
+                ai_completion_tokens_used: ~,
                 browser_name: "Chrome",
                 code_filepath: ~,
                 code_lineno: ~,