diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
index dacd21f426e3..def4c597f2c5 100644
--- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
+++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
@@ -3,7 +3,8 @@
import time
import traceback
import uuid
-from typing import Dict, Iterable, List, Literal, Optional, Union
+import re
+from typing import Dict, Iterable, List, Literal, Optional, Union, Tuple
import litellm
from litellm._logging import verbose_logger
@@ -220,6 +221,16 @@ def _handle_invalid_parallel_tool_calls(
# if there is a JSONDecodeError, return the original tool_calls
return tool_calls
+def _parse_content_for_reasoning(message_text: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
+ if not message_text:
+ return None, None
+
+ reasoning_match = re.match(r"(.*?)(.*)", message_text, re.DOTALL)
+
+ if reasoning_match:
+ return reasoning_match.group(1), reasoning_match.group(2)
+
+ return None, message_text
class LiteLLMResponseObjectHandler:
@@ -432,8 +443,14 @@ def convert_to_model_response_object( # noqa: PLR0915
for field in choice["message"].keys():
if field not in message_keys:
provider_specific_fields[field] = choice["message"][field]
+
+ # Handle reasoning models that display `reasoning_content` within `content`
+ reasoning_content, content = _parse_content_for_reasoning(choice["message"].get("content", None))
+ if reasoning_content:
+ provider_specific_fields["reasoning_content"] = reasoning_content
+
message = Message(
- content=choice["message"].get("content", None),
+ content=content,
role=choice["message"]["role"] or "assistant",
function_call=choice["message"].get("function_call", None),
tool_calls=tool_calls,
diff --git a/litellm/llms/infinity/rerank/transformation.py b/litellm/llms/infinity/rerank/transformation.py
index 2d34e5299a22..f8bc02fe0113 100644
--- a/litellm/llms/infinity/rerank/transformation.py
+++ b/litellm/llms/infinity/rerank/transformation.py
@@ -20,6 +20,15 @@
class InfinityRerankConfig(CohereRerankConfig):
+ def get_complete_url(self, api_base: Optional[str], model: str) -> str:
+ if api_base is None:
+ raise ValueError("api_base is required for Infinity rerank")
+ # Remove trailing slashes and ensure clean base URL
+ api_base = api_base.rstrip("/")
+ if not api_base.endswith("/rerank"):
+ api_base = f"{api_base}/rerank"
+ return api_base
+
def validate_environment(
self,
headers: dict,
diff --git a/litellm/utils.py b/litellm/utils.py
index c89c3d5c9aae..7d73548f89ab 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -89,6 +89,7 @@
convert_to_model_response_object,
convert_to_streaming_response,
convert_to_streaming_response_async,
+ _parse_content_for_reasoning,
)
from litellm.litellm_core_utils.llm_response_utils.get_api_base import get_api_base
from litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt import (
diff --git a/tests/litellm_utils_tests/test_utils.py b/tests/litellm_utils_tests/test_utils.py
index 4a2f63b51dbf..75630c81d89a 100644
--- a/tests/litellm_utils_tests/test_utils.py
+++ b/tests/litellm_utils_tests/test_utils.py
@@ -864,6 +864,18 @@ def test_convert_model_response_object():
== '{"type":"error","error":{"type":"invalid_request_error","message":"Output blocked by content filtering policy"}}'
)
+@pytest.mark.parametrize(
+ "content, expected_reasoning, expected_content",
+ [
+ (None, None, None),
+ ("I am thinking hereThe sky is a canvas of blue", "I am thinking here", "The sky is a canvas of blue"),
+ ("I am a regular response", None, "I am a regular response"),
+
+ ]
+)
+def test_parse_content_for_reasoning(content, expected_reasoning, expected_content):
+ assert(litellm.utils._parse_content_for_reasoning(content) == (expected_reasoning, expected_content))
+
@pytest.mark.parametrize(
"model, expected_bool",
diff --git a/tests/llm_translation/test_azure_ai.py b/tests/llm_translation/test_azure_ai.py
index e6741c4cbfaf..efb183bda09e 100644
--- a/tests/llm_translation/test_azure_ai.py
+++ b/tests/llm_translation/test_azure_ai.py
@@ -13,7 +13,7 @@
from litellm.llms.anthropic.chat import ModelResponseIterator
import httpx
import json
-from respx import MockRouter
+from litellm.llms.custom_httpx.http_handler import HTTPHandler
load_dotenv()
import io
@@ -184,3 +184,45 @@ def test_completion_azure_ai_command_r():
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
+
+def test_azure_deepseek_reasoning_content():
+ import json
+
+ client = HTTPHandler()
+
+ with patch.object(client, "post") as mock_post:
+ mock_response = MagicMock()
+
+ mock_response.text = json.dumps(
+ {
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "message": {
+ "content": "I am thinking here\n\nThe sky is a canvas of blue",
+ "role": "assistant",
+ }
+ }
+ ],
+ }
+ )
+
+ mock_response.status_code = 200
+ # Add required response attributes
+ mock_response.headers = {"Content-Type": "application/json"}
+ mock_response.json = lambda: json.loads(mock_response.text)
+ mock_post.return_value = mock_response
+
+
+ response = litellm.completion(
+ model='azure_ai/deepseek-r1',
+ messages=[{"role": "user", "content": "Hello, world!"}],
+ api_base="https://litellm8397336933.services.ai.azure.com/models/chat/completions",
+ api_key="my-fake-api-key",
+ client=client
+ )
+
+ print(response)
+ assert(response.choices[0].message.reasoning_content == "I am thinking here")
+ assert(response.choices[0].message.content == "\n\nThe sky is a canvas of blue")
diff --git a/tests/llm_translation/test_infinity.py b/tests/llm_translation/test_infinity.py
index bab64a4da30b..a7c17bde400f 100644
--- a/tests/llm_translation/test_infinity.py
+++ b/tests/llm_translation/test_infinity.py
@@ -69,7 +69,7 @@ def return_val():
_url = mock_post.call_args.kwargs["url"]
print("Arguments passed to API=", args_to_api)
print("url = ", _url)
- assert _url == "https://api.infinity.ai/v1/rerank"
+ assert _url == "https://api.infinity.ai/rerank"
request_data = json.loads(args_to_api)
assert request_data["query"] == expected_payload["query"]
@@ -133,7 +133,7 @@ def return_val():
_url = mock_post.call_args.kwargs["url"]
print("Arguments passed to API=", args_to_api)
print("url = ", _url)
- assert _url == "https://env.infinity.ai/v1/rerank"
+ assert _url == "https://env.infinity.ai/rerank"
request_data = json.loads(args_to_api)
assert request_data["query"] == expected_payload["query"]