From 97f9875f050c4d6d6ce820ee5e2882724bcc59ae Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 7 Feb 2025 16:34:37 -0800 Subject: [PATCH] fix(azure.py): add timeout param + elapsed time in azure timeout error --- litellm/llms/azure/azure.py | 14 +++++++++++--- litellm/proxy/_new_secret_config.yaml | 6 ++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py index 91f2835e2d43..5294bd714127 100644 --- a/litellm/llms/azure/azure.py +++ b/litellm/llms/azure/azure.py @@ -5,7 +5,7 @@ from typing import Any, Callable, Dict, List, Literal, Optional, Union import httpx # type: ignore -from openai import AsyncAzureOpenAI, AzureOpenAI +from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI import litellm from litellm.caching.caching import DualCache @@ -305,6 +305,7 @@ async def make_azure_openai_chat_completion_request( - call chat.completions.create.with_raw_response when litellm.return_response_headers is True - call chat.completions.create by default """ + start_time = time.time() try: raw_response = await azure_client.chat.completions.with_raw_response.create( **data, timeout=timeout @@ -313,6 +314,11 @@ async def make_azure_openai_chat_completion_request( headers = dict(raw_response.headers) response = raw_response.parse() return headers, response + except APITimeoutError as e: + end_time = time.time() + time_delta = round(end_time - start_time, 2) + e.message += f" - timeout value={timeout}, time taken={time_delta} seconds" + raise e except Exception as e: raise e @@ -642,6 +648,7 @@ async def acompletion( ) raise AzureOpenAIError(status_code=500, message=str(e)) except Exception as e: + message = getattr(e, "message", str(e)) ## LOGGING logging_obj.post_call( input=data["messages"], @@ -652,7 +659,7 @@ async def acompletion( if hasattr(e, "status_code"): raise e else: - raise AzureOpenAIError(status_code=500, message=str(e)) + raise AzureOpenAIError(status_code=500, message=message) def streaming( self, @@ -797,10 +804,11 @@ async def async_streaming( status_code = getattr(e, "status_code", 500) error_headers = getattr(e, "headers", None) error_response = getattr(e, "response", None) + message = getattr(e, "message", str(e)) if error_headers is None and error_response: error_headers = getattr(error_response, "headers", None) raise AzureOpenAIError( - status_code=status_code, message=str(e), headers=error_headers + status_code=status_code, message=message, headers=error_headers ) async def aembedding( diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 1a434666197f..ca9d8751637c 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -5,6 +5,12 @@ model_list: - model_name: gpt-4 litellm_params: model: gpt-3.5-turbo + - model_name: azure-gpt-35-turbo + litellm_params: + model: azure/chatgpt-v-2 + api_key: os.environ/AZURE_API_KEY + api_base: os.environ/AZURE_API_BASE + timeout: 0.000000001 - model_name: o3-mini litellm_params: model: o3-mini