fix(completion/handler.py): fix azure text async completion + streaming

krrishdholakia · krrishdholakia · commit be4c56a33506 · 2025-02-06T17:29:19.000-08:00
diff --git a/litellm/llms/azure/completion/handler.py b/litellm/llms/azure/completion/handler.py
@@ -131,6 +131,7 @@ def completion(  # noqa: PLR0915
                         timeout=timeout,
                         client=client,
                         logging_obj=logging_obj,
+                        max_retries=max_retries,
                     )
             elif "stream" in optional_params and optional_params["stream"] is True:
                 return self.streaming(
@@ -236,17 +237,12 @@ async def acompletion(
         timeout: Any,
         model_response: ModelResponse,
         logging_obj: Any,
+        max_retries: int,
         azure_ad_token: Optional[str] = None,
         client=None,  # this is the AsyncAzureOpenAI
     ):
         response = None
         try:
-            max_retries = data.pop("max_retries", 2)
-            if not isinstance(max_retries, int):
-                raise AzureOpenAIError(
-                    status_code=422, message="max retries must be an int"
-                )
-
             # init AzureOpenAI Client
             azure_client_params = {
                 "api_version": api_version,
diff --git a/tests/llm_translation/test_azure_openai.py b/tests/llm_translation/test_azure_openai.py
@@ -496,22 +496,31 @@ async def test_async_azure_max_retries_0(
 
 
 @pytest.mark.parametrize("max_retries", [0, 4])
+@pytest.mark.parametrize("stream", [True, False])
+@pytest.mark.parametrize("sync_mode", [True, False])
 @patch("litellm.llms.azure.completion.handler.select_azure_base_url_or_endpoint")
-def test_azure_instruct(mock_select_azure_base_url_or_endpoint, max_retries):
-    from litellm import completion
+@pytest.mark.asyncio
+async def test_azure_instruct(
+    mock_select_azure_base_url_or_endpoint, max_retries, stream, sync_mode
+):
+    from litellm import completion, acompletion
+
+    args = {
+        "model": "azure_text/instruct-model",
+        "messages": [
+            {"role": "user", "content": "What is the weather like in Boston?"}
+        ],
+        "max_tokens": 10,
+        "max_retries": max_retries,
+    }
 
     try:
-        response = completion(
-            model="azure_text/instruct-model",
-            messages=[
-                {"role": "user", "content": "What is the weather like in Boston?"}
-            ],
-            max_tokens=10,
-            max_retries=max_retries,
-        )
-        print("response", response)
-    except Exception as e:
-        print(e)
+        if sync_mode:
+            completion(**args)
+        else:
+            await acompletion(**args)
+    except Exception:
+        pass
 
     mock_select_azure_base_url_or_endpoint.assert_called_once()
     assert (