Anthropic Citations API Support (#8382)

krrishdholakia · web-flow · commit 1dd3713f1a1b · 2025-02-07T22:27:01.000-08:00
* test(test_anthropic_completion.py): add test ensuring anthropic structured output response is consistent Resolves #8291 * feat(anthropic.py): support citations api with new user document message format Resolves #7970 * fix(anthropic/chat/transformation.py): return citations as a provider-specific-field Resolves #7970 * feat(anthropic/chat/handler.py): add streaming citations support Resolves #7970 * fix(handler.py): fix code qa error * fix(handler.py): only set provider specific fields if non-empty dict * docs(anthropic.md): add citations api to anthropic docs
diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md
@@ -987,6 +987,106 @@ curl http://0.0.0.0:4000/v1/chat/completions \
 </TabItem>
 </Tabs>
 
+## [BETA] Citations API 
+
+Pass `citations: {"enabled": true}` to Anthropic, to get citations on your document responses. 
+
+Note: This interface is in BETA. If you have feedback on how citations should be returned, please [tell us here](https://github.com/BerriAI/litellm/issues/7970#issuecomment-2644437943)
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+
+resp = completion(
+    model="claude-3-5-sonnet-20241022",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "document",
+                    "source": {
+                        "type": "text",
+                        "media_type": "text/plain",
+                        "data": "The grass is green. The sky is blue.",
+                    },
+                    "title": "My Document",
+                    "context": "This is a trustworthy document.",
+                    "citations": {"enabled": True},
+                },
+                {
+                    "type": "text",
+                    "text": "What color is the grass and sky?",
+                },
+            ],
+        }
+    ],
+)
+
+citations = resp.choices[0].message.provider_specific_fields["citations"]
+
+assert citations is not None
+```
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+    - model_name: anthropic-claude
+      litellm_params:
+        model: anthropic/claude-3-5-sonnet-20241022
+        api_key: os.environ/ANTHROPIC_API_KEY
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Test it! 
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+  "model": "anthropic-claude",
+  "messages": [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "document",
+                "source": {
+                    "type": "text",
+                    "media_type": "text/plain",
+                    "data": "The grass is green. The sky is blue.",
+                },
+                "title": "My Document",
+                "context": "This is a trustworthy document.",
+                "citations": {"enabled": True},
+            },
+            {
+                "type": "text",
+                "text": "What color is the grass and sky?",
+            },
+        ],
+    }
+  ]
+}'
+```
+
+</TabItem>
+</Tabs>
+
 ## Usage - passing 'user_id' to Anthropic
 
 LiteLLM translates the OpenAI `user` param to Anthropic's `metadata[user_id]` param.
diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -1421,6 +1421,8 @@ def anthropic_messages_pt(  # noqa: PLR0915
                             )
 
                             user_content.append(_content_element)
+                        elif m.get("type", "") == "document":
+                            user_content.append(cast(AnthropicMessagesDocumentParam, m))
                 elif isinstance(user_message_types_block["content"], str):
                     _anthropic_content_text_element: AnthropicMessagesTextParam = {
                         "type": "text",
diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py
@@ -809,7 +809,10 @@ def return_processed_chunk_logic(  # noqa
                     if self.sent_first_chunk is False:
                         completion_obj["role"] = "assistant"
                         self.sent_first_chunk = True
-
+                    if response_obj.get("provider_specific_fields") is not None:
+                        completion_obj["provider_specific_fields"] = response_obj[
+                            "provider_specific_fields"
+                        ]
                     model_response.choices[0].delta = Delta(**completion_obj)
                     _index: Optional[int] = completion_obj.get("index")
                     if _index is not None:
diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py
@@ -4,7 +4,7 @@
 
 import copy
 import json
-from typing import Any, Callable, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import httpx  # type: ignore
 
@@ -506,6 +506,29 @@ def _handle_usage(
 
         return usage_block
 
+    def _content_block_delta_helper(self, chunk: dict):
+        text = ""
+        tool_use: Optional[ChatCompletionToolCallChunk] = None
+        provider_specific_fields = {}
+        content_block = ContentBlockDelta(**chunk)  # type: ignore
+        self.content_blocks.append(content_block)
+        if "text" in content_block["delta"]:
+            text = content_block["delta"]["text"]
+        elif "partial_json" in content_block["delta"]:
+            tool_use = {
+                "id": None,
+                "type": "function",
+                "function": {
+                    "name": None,
+                    "arguments": content_block["delta"]["partial_json"],
+                },
+                "index": self.tool_index,
+            }
+        elif "citation" in content_block["delta"]:
+            provider_specific_fields["citation"] = content_block["delta"]["citation"]
+
+        return text, tool_use, provider_specific_fields
+
     def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
         try:
             type_chunk = chunk.get("type", "") or ""
@@ -515,27 +538,17 @@ def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
             is_finished = False
             finish_reason = ""
             usage: Optional[ChatCompletionUsageBlock] = None
+            provider_specific_fields: Dict[str, Any] = {}
 
             index = int(chunk.get("index", 0))
             if type_chunk == "content_block_delta":
                 """
                 Anthropic content chunk
                 chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
                 """
-                content_block = ContentBlockDelta(**chunk)  # type: ignore
-                self.content_blocks.append(content_block)
-                if "text" in content_block["delta"]:
-                    text = content_block["delta"]["text"]
-                elif "partial_json" in content_block["delta"]:
-                    tool_use = {
-                        "id": None,
-                        "type": "function",
-                        "function": {
-                            "name": None,
-                            "arguments": content_block["delta"]["partial_json"],
-                        },
-                        "index": self.tool_index,
-                    }
+                text, tool_use, provider_specific_fields = (
+                    self._content_block_delta_helper(chunk=chunk)
+                )
             elif type_chunk == "content_block_start":
                 """
                 event: content_block_start
@@ -628,6 +641,9 @@ def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
                 finish_reason=finish_reason,
                 usage=usage,
                 index=index,
+                provider_specific_fields=(
+                    provider_specific_fields if provider_specific_fields else None
+                ),
             )
 
             return returned_chunk
diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
@@ -628,6 +628,7 @@ def transform_response(
             )
         else:
             text_content = ""
+            citations: List[Any] = []
             tool_calls: List[ChatCompletionToolCallChunk] = []
             for idx, content in enumerate(completion_response["content"]):
                 if content["type"] == "text":
@@ -645,10 +646,14 @@ def transform_response(
                             index=idx,
                         )
                     )
+                ## CITATIONS
+                if content.get("citations", None) is not None:
+                    citations.append(content["citations"])
 
             _message = litellm.Message(
                 tool_calls=tool_calls,
                 content=text_content or None,
+                provider_specific_fields={"citations": citations},
             )
 
             ## HANDLE JSON MODE - anthropic returns single function call
diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py
@@ -92,10 +92,17 @@ class AnthropicMessagesImageParam(TypedDict, total=False):
     cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
 
 
+class CitationsObject(TypedDict):
+    enabled: bool
+
+
 class AnthropicMessagesDocumentParam(TypedDict, total=False):
     type: Required[Literal["document"]]
     source: Required[AnthropicContentParamSource]
     cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+    title: str
+    context: str
+    citations: Optional[CitationsObject]
 
 
 class AnthropicMessagesToolResultContent(TypedDict):
@@ -173,6 +180,11 @@ class ContentTextBlockDelta(TypedDict):
     text: str
 
 
+class ContentCitationsBlockDelta(TypedDict):
+    type: Literal["citations"]
+    citation: dict
+
+
 class ContentJsonBlockDelta(TypedDict):
     """
     "delta": {"type": "input_json_delta","partial_json": "{\"location\": \"San Fra"}}
@@ -185,7 +197,9 @@ class ContentJsonBlockDelta(TypedDict):
 class ContentBlockDelta(TypedDict):
     type: Literal["content_block_delta"]
     index: int
-    delta: Union[ContentTextBlockDelta, ContentJsonBlockDelta]
+    delta: Union[
+        ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta
+    ]
 
 
 class ContentBlockStop(TypedDict):
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
@@ -382,10 +382,29 @@ class ChatCompletionAudioObject(ChatCompletionContentPartInputAudioParam):
     pass
 
 
+class DocumentObject(TypedDict):
+    type: Literal["text"]
+    media_type: str
+    data: str
+
+
+class CitationsObject(TypedDict):
+    enabled: bool
+
+
+class ChatCompletionDocumentObject(TypedDict):
+    type: Literal["document"]
+    source: DocumentObject
+    title: str
+    context: str
+    citations: Optional[CitationsObject]
+
+
 OpenAIMessageContentListBlock = Union[
     ChatCompletionTextObject,
     ChatCompletionImageObject,
     ChatCompletionAudioObject,
+    ChatCompletionDocumentObject,
 ]
 
 OpenAIMessageContent = Union[
@@ -460,6 +479,7 @@ class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total
     "text",
     "image_url",
     "input_audio",
+    "document",
 ]  # used for validating user messages. Prevent users from accidentally sending anthropic messages.
 
 AllMessageValues = Union[
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
@@ -551,6 +551,7 @@ def __init__(
     ):
         super(Delta, self).__init__(**params)
         provider_specific_fields: Dict[str, Any] = {}
+
         if "reasoning_content" in params:
             provider_specific_fields["reasoning_content"] = params["reasoning_content"]
             setattr(self, "reasoning_content", params["reasoning_content"])
diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py

Original file line number	Diff line number	Diff line change
`@@ -1421,6 +1421,8 @@ def anthropic_messages_pt( # noqa: PLR0915`
`1421`	`1421`	`)`
`1422`	`1422`
`1423`	`1423`	`user_content.append(_content_element)`
	`1424`	`+ elif m.get("type", "") == "document":`
	`1425`	`+ user_content.append(cast(AnthropicMessagesDocumentParam, m))`
`1424`	`1426`	`elif isinstance(user_message_types_block["content"], str):`
`1425`	`1427`	`_anthropic_content_text_element: AnthropicMessagesTextParam = {`
`1426`	`1428`	`"type": "text",`
Original file line number	Diff line number	Diff line change
`@@ -628,6 +628,7 @@ def transform_response(`
`628`	`628`	`)`
`629`	`629`	`else:`
`630`	`630`	`text_content = ""`
	`631`	`+ citations: List[Any] = []`
`631`	`632`	`tool_calls: List[ChatCompletionToolCallChunk] = []`
`632`	`633`	`for idx, content in enumerate(completion_response["content"]):`
`633`	`634`	`if content["type"] == "text":`
`@@ -645,10 +646,14 @@ def transform_response(`
`645`	`646`	`index=idx,`
`646`	`647`	`)`
`647`	`648`	`)`
	`649`	`+ ## CITATIONS`
	`650`	`+ if content.get("citations", None) is not None:`
	`651`	`+ citations.append(content["citations"])`
`648`	`652`
`649`	`653`	`_message = litellm.Message(`
`650`	`654`	`tool_calls=tool_calls,`
`651`	`655`	`content=text_content or None,`
	`656`	`+ provider_specific_fields={"citations": citations},`
`652`	`657`	`)`
`653`	`658`
`654`	`659`	`## HANDLE JSON MODE - anthropic returns single function call`