From 97f9875f050c4d6d6ce820ee5e2882724bcc59ae Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 7 Feb 2025 16:34:37 -0800
Subject: [PATCH] fix(azure.py): add timeout param + elapsed time in azure
 timeout error

---
 litellm/llms/azure/azure.py           | 14 +++++++++++---
 litellm/proxy/_new_secret_config.yaml |  6 ++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py
index 91f2835e2d43..5294bd714127 100644
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@@ -5,7 +5,7 @@
 from typing import Any, Callable, Dict, List, Literal, Optional, Union
 
 import httpx  # type: ignore
-from openai import AsyncAzureOpenAI, AzureOpenAI
+from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI
 
 import litellm
 from litellm.caching.caching import DualCache
@@ -305,6 +305,7 @@ async def make_azure_openai_chat_completion_request(
         - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
         - call chat.completions.create by default
         """
+        start_time = time.time()
         try:
             raw_response = await azure_client.chat.completions.with_raw_response.create(
                 **data, timeout=timeout
@@ -313,6 +314,11 @@ async def make_azure_openai_chat_completion_request(
             headers = dict(raw_response.headers)
             response = raw_response.parse()
             return headers, response
+        except APITimeoutError as e:
+            end_time = time.time()
+            time_delta = round(end_time - start_time, 2)
+            e.message += f" - timeout value={timeout}, time taken={time_delta} seconds"
+            raise e
         except Exception as e:
             raise e
 
@@ -642,6 +648,7 @@ async def acompletion(
             )
             raise AzureOpenAIError(status_code=500, message=str(e))
         except Exception as e:
+            message = getattr(e, "message", str(e))
             ## LOGGING
             logging_obj.post_call(
                 input=data["messages"],
@@ -652,7 +659,7 @@ async def acompletion(
             if hasattr(e, "status_code"):
                 raise e
             else:
-                raise AzureOpenAIError(status_code=500, message=str(e))
+                raise AzureOpenAIError(status_code=500, message=message)
 
     def streaming(
         self,
@@ -797,10 +804,11 @@ async def async_streaming(
             status_code = getattr(e, "status_code", 500)
             error_headers = getattr(e, "headers", None)
             error_response = getattr(e, "response", None)
+            message = getattr(e, "message", str(e))
             if error_headers is None and error_response:
                 error_headers = getattr(error_response, "headers", None)
             raise AzureOpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code, message=message, headers=error_headers
             )
 
     async def aembedding(
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 1a434666197f..ca9d8751637c 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -5,6 +5,12 @@ model_list:
   - model_name: gpt-4
     litellm_params:
       model: gpt-3.5-turbo
+  - model_name: azure-gpt-35-turbo
+    litellm_params:
+      model: azure/chatgpt-v-2
+      api_key: os.environ/AZURE_API_KEY
+      api_base: os.environ/AZURE_API_BASE
+      timeout: 0.000000001
   - model_name: o3-mini
     litellm_params:
       model: o3-mini