fix: Anthropic reports input tokens in first message delta (#2001)

mathislucka · claude · web-flow · commit a288fae722c2 · 2025-06-26T15:03:54.000+02:00
* fix: Anthropic reports input tokens in first message delta * fix: improve Anthropic streaming usage token collection - Extract first_chunk_meta variable for better readability and performance - Add explicit chunks check before accessing first chunk for safety - Combine input tokens from message_start and output tokens from message_delta 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py
@@ -414,7 +414,31 @@ def _convert_streaming_chunks_to_chat_message(
 
         # Update meta information
         last_chunk_meta = chunks[-1].meta
-        usage = self._get_openai_compatible_usage(last_chunk_meta)
+
+        # Combine usage from first chunk (input_tokens) and last chunk (output_tokens)
+        combined_usage = {}
+
+        # Get input tokens from first chunk (message_start)
+        if chunks:
+            first_chunk_meta = chunks[0].meta
+            if first_chunk_meta.get("type") == "message_start":
+                first_chunk_usage = first_chunk_meta.get("message", {}).get("usage", {})
+                if "input_tokens" in first_chunk_usage:
+                    combined_usage["input_tokens"] = first_chunk_usage["input_tokens"]
+
+        # Get output tokens from last chunk (message_delta)
+        last_chunk_usage = last_chunk_meta.get("usage", {})
+        if "output_tokens" in last_chunk_usage:
+            combined_usage["output_tokens"] = last_chunk_usage["output_tokens"]
+        elif "completion_tokens" in last_chunk_usage:
+            combined_usage["output_tokens"] = last_chunk_usage["completion_tokens"]
+
+        # Add any other usage fields from the last chunk
+        for key, value in last_chunk_usage.items():
+            if key not in combined_usage:
+                combined_usage[key] = value
+
+        usage = self._get_openai_compatible_usage({"usage": combined_usage})
         message._meta.update(
             {
                 "model": model,
diff --git a/integrations/anthropic/tests/test_chat_generator.py b/integrations/anthropic/tests/test_chat_generator.py
@@ -403,6 +403,23 @@ def test_convert_streaming_chunks_to_chat_message(self):
         """
         # Create a sequence of streaming chunks that simulate Anthropic's response
         chunks = [
+            # Message start with input tokens
+            StreamingChunk(
+                content="",
+                meta={
+                    "type": "message_start",
+                    "message": {
+                        "id": "msg_123",
+                        "type": "message",
+                        "role": "assistant",
+                        "content": [],
+                        "model": "claude-3-sonnet",
+                        "stop_reason": None,
+                        "stop_sequence": None,
+                        "usage": {"input_tokens": 25, "output_tokens": 0},
+                    },
+                },
+            ),
             # Initial text content
             StreamingChunk(
                 content="",
@@ -479,7 +496,7 @@ def test_convert_streaming_chunks_to_chat_message(self):
         assert message._meta["model"] == "claude-3-sonnet"
         assert message._meta["index"] == 0
         assert message._meta["finish_reason"] == "tool_use"
-        assert message._meta["usage"] == {"completion_tokens": 40}
+        assert message._meta["usage"] == {"prompt_tokens": 25, "completion_tokens": 40}
 
     def test_convert_streaming_chunks_to_chat_message_malformed_json(self, caplog):
         """
@@ -558,6 +575,23 @@ def test_convert_streaming_chunks_to_chat_message_tool_call_with_empty_arguments
         Test converting streaming chunks with an empty tool call arguments
         """
         chunks = [
+            # Message start with input tokens
+            StreamingChunk(
+                content="",
+                meta={
+                    "type": "message_start",
+                    "message": {
+                        "id": "msg_456",
+                        "type": "message",
+                        "role": "assistant",
+                        "content": [],
+                        "model": "claude-3-sonnet",
+                        "stop_reason": None,
+                        "stop_sequence": None,
+                        "usage": {"input_tokens": 50, "output_tokens": 0},
+                    },
+                },
+            ),
             StreamingChunk(
                 content="",
                 meta={
@@ -659,7 +693,7 @@ def test_convert_streaming_chunks_to_chat_message_tool_call_with_empty_arguments
             "cache_creation_input_tokens": None,
             "cache_read_input_tokens": None,
             "completion_tokens": 69,
-            "prompt_tokens": None,
+            "prompt_tokens": 50,
             "server_tool_use": None,
         }