Skip to content

Commit be4c56a

Browse files
fix(completion/handler.py): fix azure text async completion + streaming
1 parent 94aaf69 commit be4c56a

File tree

2 files changed

+24
-19
lines changed

2 files changed

+24
-19
lines changed

litellm/llms/azure/completion/handler.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ def completion( # noqa: PLR0915
131131
timeout=timeout,
132132
client=client,
133133
logging_obj=logging_obj,
134+
max_retries=max_retries,
134135
)
135136
elif "stream" in optional_params and optional_params["stream"] is True:
136137
return self.streaming(
@@ -236,17 +237,12 @@ async def acompletion(
236237
timeout: Any,
237238
model_response: ModelResponse,
238239
logging_obj: Any,
240+
max_retries: int,
239241
azure_ad_token: Optional[str] = None,
240242
client=None, # this is the AsyncAzureOpenAI
241243
):
242244
response = None
243245
try:
244-
max_retries = data.pop("max_retries", 2)
245-
if not isinstance(max_retries, int):
246-
raise AzureOpenAIError(
247-
status_code=422, message="max retries must be an int"
248-
)
249-
250246
# init AzureOpenAI Client
251247
azure_client_params = {
252248
"api_version": api_version,

tests/llm_translation/test_azure_openai.py

+22-13
Original file line numberDiff line numberDiff line change
@@ -496,22 +496,31 @@ async def test_async_azure_max_retries_0(
496496

497497

498498
@pytest.mark.parametrize("max_retries", [0, 4])
499+
@pytest.mark.parametrize("stream", [True, False])
500+
@pytest.mark.parametrize("sync_mode", [True, False])
499501
@patch("litellm.llms.azure.completion.handler.select_azure_base_url_or_endpoint")
500-
def test_azure_instruct(mock_select_azure_base_url_or_endpoint, max_retries):
501-
from litellm import completion
502+
@pytest.mark.asyncio
503+
async def test_azure_instruct(
504+
mock_select_azure_base_url_or_endpoint, max_retries, stream, sync_mode
505+
):
506+
from litellm import completion, acompletion
507+
508+
args = {
509+
"model": "azure_text/instruct-model",
510+
"messages": [
511+
{"role": "user", "content": "What is the weather like in Boston?"}
512+
],
513+
"max_tokens": 10,
514+
"max_retries": max_retries,
515+
}
502516

503517
try:
504-
response = completion(
505-
model="azure_text/instruct-model",
506-
messages=[
507-
{"role": "user", "content": "What is the weather like in Boston?"}
508-
],
509-
max_tokens=10,
510-
max_retries=max_retries,
511-
)
512-
print("response", response)
513-
except Exception as e:
514-
print(e)
518+
if sync_mode:
519+
completion(**args)
520+
else:
521+
await acompletion(**args)
522+
except Exception:
523+
pass
515524

516525
mock_select_azure_base_url_or_endpoint.assert_called_once()
517526
assert (

0 commit comments

Comments
 (0)