Litellm dev 02 07 2025 p2 (#8377)

krrishdholakia · web-flow · commit f651d51f26c2 · 2025-02-07T17:30:38.000-08:00
* fix(caching_routes.py): mask redis password on `/cache/ping` route

* fix(caching_routes.py): fix linting erro

* fix(caching_routes.py): fix linting error on caching routes

* fix: fix test - ignore mask_dict - has a breakpoint

* fix(azure.py): add timeout param + elapsed time in azure timeout error

* fix(http_handler.py): add elapsed time to http timeout request

makes it easier to debug how long request took before failing
diff --git a/litellm/litellm_core_utils/sensitive_data_masker.py b/litellm/litellm_core_utils/sensitive_data_masker.py
@@ -0,0 +1,81 @@
+from typing import Any, Dict, Optional, Set
+
+
+class SensitiveDataMasker:
+    def __init__(
+        self,
+        sensitive_patterns: Optional[Set[str]] = None,
+        visible_prefix: int = 4,
+        visible_suffix: int = 4,
+        mask_char: str = "*",
+    ):
+        self.sensitive_patterns = sensitive_patterns or {
+            "password",
+            "secret",
+            "key",
+            "token",
+            "auth",
+            "credential",
+            "access",
+            "private",
+            "certificate",
+        }
+
+        self.visible_prefix = visible_prefix
+        self.visible_suffix = visible_suffix
+        self.mask_char = mask_char
+
+    def _mask_value(self, value: str) -> str:
+        if not value or len(str(value)) < (self.visible_prefix + self.visible_suffix):
+            return value
+
+        value_str = str(value)
+        masked_length = len(value_str) - (self.visible_prefix + self.visible_suffix)
+        return f"{value_str[:self.visible_prefix]}{self.mask_char * masked_length}{value_str[-self.visible_suffix:]}"
+
+    def is_sensitive_key(self, key: str) -> bool:
+        key_lower = str(key).lower()
+        result = any(pattern in key_lower for pattern in self.sensitive_patterns)
+        return result
+
+    def mask_dict(
+        self, data: Dict[str, Any], depth: int = 0, max_depth: int = 10
+    ) -> Dict[str, Any]:
+        if depth >= max_depth:
+            return data
+
+        masked_data: Dict[str, Any] = {}
+        for k, v in data.items():
+            try:
+                if isinstance(v, dict):
+                    masked_data[k] = self.mask_dict(v, depth + 1)
+                elif hasattr(v, "__dict__") and not isinstance(v, type):
+                    masked_data[k] = self.mask_dict(vars(v), depth + 1)
+                elif self.is_sensitive_key(k):
+                    str_value = str(v) if v is not None else ""
+                    masked_data[k] = self._mask_value(str_value)
+                else:
+                    masked_data[k] = (
+                        v if isinstance(v, (int, float, bool, str)) else str(v)
+                    )
+            except Exception:
+                masked_data[k] = "<unable to serialize>"
+
+        return masked_data
+
+
+# Usage example:
+"""
+masker = SensitiveDataMasker()
+data = {
+    "api_key": "sk-1234567890abcdef",
+    "redis_password": "very_secret_pass",
+    "port": 6379
+}
+masked = masker.mask_dict(data)
+# Result: {
+#    "api_key": "sk-1****cdef",
+#    "redis_password": "very****pass",
+#    "port": 6379
+# }
+"""
diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py
@@ -5,7 +5,7 @@
 from typing import Any, Callable, Dict, List, Literal, Optional, Union
 
 import httpx  # type: ignore
-from openai import AsyncAzureOpenAI, AzureOpenAI
+from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI
 
 import litellm
 from litellm.caching.caching import DualCache
@@ -305,6 +305,7 @@ async def make_azure_openai_chat_completion_request(
         - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
         - call chat.completions.create by default
         """
+        start_time = time.time()
         try:
             raw_response = await azure_client.chat.completions.with_raw_response.create(
                 **data, timeout=timeout
@@ -313,6 +314,11 @@ async def make_azure_openai_chat_completion_request(
             headers = dict(raw_response.headers)
             response = raw_response.parse()
             return headers, response
+        except APITimeoutError as e:
+            end_time = time.time()
+            time_delta = round(end_time - start_time, 2)
+            e.message += f" - timeout value={timeout}, time taken={time_delta} seconds"
+            raise e
         except Exception as e:
             raise e
 
@@ -642,6 +648,7 @@ async def acompletion(
             )
             raise AzureOpenAIError(status_code=500, message=str(e))
         except Exception as e:
+            message = getattr(e, "message", str(e))
             ## LOGGING
             logging_obj.post_call(
                 input=data["messages"],
@@ -652,7 +659,7 @@ async def acompletion(
             if hasattr(e, "status_code"):
                 raise e
             else:
-                raise AzureOpenAIError(status_code=500, message=str(e))
+                raise AzureOpenAIError(status_code=500, message=message)
 
     def streaming(
         self,
@@ -797,10 +804,11 @@ async def async_streaming(
             status_code = getattr(e, "status_code", 500)
             error_headers = getattr(e, "headers", None)
             error_response = getattr(e, "response", None)
+            message = getattr(e, "message", str(e))
             if error_headers is None and error_response:
                 error_headers = getattr(error_response, "headers", None)
             raise AzureOpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code, message=message, headers=error_headers
             )
 
     async def aembedding(
diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
@@ -1,5 +1,6 @@
 import asyncio
 import os
+import time
 from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
 
 import httpx
@@ -179,6 +180,7 @@ async def post(
         stream: bool = False,
         logging_obj: Optional[LiteLLMLoggingObject] = None,
     ):
+        start_time = time.time()
         try:
             if timeout is None:
                 timeout = self.timeout
@@ -207,14 +209,16 @@ async def post(
             finally:
                 await new_client.aclose()
         except httpx.TimeoutException as e:
+            end_time = time.time()
+            time_delta = round(end_time - start_time, 3)
             headers = {}
             error_response = getattr(e, "response", None)
             if error_response is not None:
                 for key, value in error_response.headers.items():
                     headers["response_headers-{}".format(key)] = value
 
             raise litellm.Timeout(
-                message=f"Connection timed out after {timeout} seconds.",
+                message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
                 model="default-model-name",
                 llm_provider="litellm-httpx-handler",
                 headers=headers,
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
@@ -5,14 +5,20 @@ model_list:
   - model_name: gpt-4
     litellm_params:
       model: gpt-3.5-turbo
+  - model_name: azure-gpt-35-turbo
+    litellm_params:
+      model: azure/chatgpt-v-2
+      api_key: os.environ/AZURE_API_KEY
+      api_base: os.environ/AZURE_API_BASE
+      timeout: 0.000000001
   - model_name: o3-mini
     litellm_params:
       model: o3-mini
       rpm: 3
   - model_name: anthropic-claude
     litellm_params:
       model: claude-3-5-haiku-20241022
-      mock_response: Hi!
+      timeout: 0.000000001
   - model_name: groq/*
     litellm_params:
       model: groq/*
@@ -28,16 +34,11 @@ model_list:
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
 
-general_settings:
-  enable_jwt_auth: True
-  litellm_jwtauth:
-    team_id_jwt_field: "client_id"
-    team_id_upsert: true
-    scope_mappings:
-      - scope: litellm.api.consumer
-        models: ["anthropic-claude"]
-        routes: ["/v1/chat/completions"]
-      - scope: litellm.api.gpt_3_5_turbo
-        models: ["gpt-3.5-turbo-testing"]
-    enforce_scope_based_access: true
-    enforce_rbac: true
+litellm_settings:
+  cache: true
+      
+
+router_settings:
+  redis_host: os.environ/REDIS_HOST
+  redis_password: os.environ/REDIS_PASSWORD
+  redis_port: os.environ/REDIS_PORT
diff --git a/litellm/proxy/caching_routes.py b/litellm/proxy/caching_routes.py
@@ -1,12 +1,15 @@
-import copy
+from typing import Any, Dict
 
 from fastapi import APIRouter, Depends, HTTPException, Request
 
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import RedisCache
+from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 
+masker = SensitiveDataMasker()
+
 router = APIRouter(
     prefix="/cache",
     tags=["caching"],
@@ -21,27 +24,30 @@ async def cache_ping():
     """
     Endpoint for checking if cache can be pinged
     """
-    litellm_cache_params = {}
-    specific_cache_params = {}
+    litellm_cache_params: Dict[str, Any] = {}
+    specific_cache_params: Dict[str, Any] = {}
     try:
 
         if litellm.cache is None:
             raise HTTPException(
                 status_code=503, detail="Cache not initialized. litellm.cache is None"
             )
-
+        litellm_cache_params = {}
+        specific_cache_params = {}
         for k, v in vars(litellm.cache).items():
             try:
                 if k == "cache":
                     continue
-                litellm_cache_params[k] = str(copy.deepcopy(v))
+                litellm_cache_params[k] = v
             except Exception:
                 litellm_cache_params[k] = "<unable to copy or convert>"
         for k, v in vars(litellm.cache.cache).items():
             try:
-                specific_cache_params[k] = str(v)
+                specific_cache_params[k] = v
             except Exception:
                 specific_cache_params[k] = "<unable to copy or convert>"
+        litellm_cache_params = masker.mask_dict(litellm_cache_params)
+        specific_cache_params = masker.mask_dict(specific_cache_params)
         if litellm.cache.type == "redis":
             # ping the redis cache
             ping_response = await litellm.cache.ping()
@@ -56,6 +62,7 @@ async def cache_ping():
                 messages=[{"role": "user", "content": "test from litellm"}],
             )
             verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
+
             return {
                 "status": "healthy",
                 "cache_type": litellm.cache.type,
diff --git a/tests/code_coverage_tests/recursive_detector.py b/tests/code_coverage_tests/recursive_detector.py
@@ -13,6 +13,7 @@
     "add_object_type",
     "strip_field",
     "_transform_prompt",
+    "mask_dict",
 ]
 
 

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@`
`13`	`13`	`"add_object_type",`
`14`	`14`	`"strip_field",`
`15`	`15`	`"_transform_prompt",`
	`16`	`+ "mask_dict",`
`16`	`17`	`]`
`17`	`18`
`18`	`19`