Skip to content

Commit 441d005

Browse files
Hiosdramarkpollack
authored andcommitted
GH-3300 | Add max_completion_tokens to Azure OpenAI configuration options
Add maxCompletionTokens support for Azure OpenAI integration This commit implements comprehensive support for the maxCompletionTokens parameter in the Azure OpenAI integration to enable compatibility with GPT-5 and reasoning models (o1, o3, o4-mini series). Key changes: - Add maxCompletionTokens field to AzureOpenAiChatOptions with JSON serialization - Implement builder pattern support with mutual exclusivity validation - Add 'last-set-wins' validation logic that clears conflicting parameters with warnings - Enhance javadoc with model-specific usage guidance for reasoning vs non-reasoning models - Update documentation with parameter usage patterns and mutual exclusivity rules Testing improvements: - Add comprehensive unit tests for builder validation and mutual exclusivity scenarios - Update integration tests to test maxTokens and maxCompletionTokens separately - Fix test configuration to prevent default parameter conflicts - Verify end-to-end functionality with real Azure OpenAI API calls The implementation follows Azure OpenAI API constraints where maxTokens and maxCompletionTokens are mutually exclusive. The validation logic prevents API errors by automatically clearing the previously set parameter when both are configured, with clear warning messages to guide developers. This enables proper support for: - Reasoning models (o1, o3, o4-mini) that require maxCompletionTokens - Non-reasoning models (gpt-4o, gpt-3.5-turbo) that use maxTokens - Future model compatibility without breaking changes Signed-off-by: Oskar Drozda <[email protected]> Signed-off-by: Mark Pollack <[email protected]>
1 parent 528155a commit 441d005

File tree

5 files changed

+469
-19
lines changed

5 files changed

+469
-19
lines changed

models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,11 @@ private ChatCompletionsOptions merge(ChatCompletionsOptions fromAzureOptions,
720720
mergedAzureOptions.setMaxTokens((fromAzureOptions.getMaxTokens() != null) ? fromAzureOptions.getMaxTokens()
721721
: toSpringAiOptions.getMaxTokens());
722722

723+
if (fromAzureOptions.getMaxCompletionTokens() != null || toSpringAiOptions.getMaxCompletionTokens() != null) {
724+
mergedAzureOptions.setMaxCompletionTokens((fromAzureOptions.getMaxCompletionTokens() != null)
725+
? fromAzureOptions.getMaxCompletionTokens() : toSpringAiOptions.getMaxCompletionTokens());
726+
}
727+
723728
mergedAzureOptions.setLogitBias(fromAzureOptions.getLogitBias() != null ? fromAzureOptions.getLogitBias()
724729
: toSpringAiOptions.getLogitBias());
725730

@@ -803,6 +808,10 @@ private ChatCompletionsOptions merge(AzureOpenAiChatOptions fromSpringAiOptions,
803808
mergedAzureOptions.setMaxTokens(fromSpringAiOptions.getMaxTokens());
804809
}
805810

811+
if (fromSpringAiOptions.getMaxCompletionTokens() != null) {
812+
mergedAzureOptions.setMaxCompletionTokens(fromSpringAiOptions.getMaxCompletionTokens());
813+
}
814+
806815
if (fromSpringAiOptions.getLogitBias() != null) {
807816
mergedAzureOptions.setLogitBias(fromSpringAiOptions.getLogitBias());
808817
}
@@ -894,6 +903,9 @@ private ChatCompletionsOptions copy(ChatCompletionsOptions fromOptions) {
894903
if (fromOptions.getMaxTokens() != null) {
895904
copyOptions.setMaxTokens(fromOptions.getMaxTokens());
896905
}
906+
if (fromOptions.getMaxCompletionTokens() != null) {
907+
copyOptions.setMaxCompletionTokens(fromOptions.getMaxCompletionTokens());
908+
}
897909
if (fromOptions.getLogitBias() != null) {
898910
copyOptions.setLogitBias(fromOptions.getLogitBias());
899911
}

models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java

Lines changed: 121 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -32,6 +32,8 @@
3232
import com.fasterxml.jackson.annotation.JsonInclude.Include;
3333
import com.fasterxml.jackson.annotation.JsonProperty;
3434

35+
import org.slf4j.Logger;
36+
import org.slf4j.LoggerFactory;
3537
import org.springframework.ai.model.tool.ToolCallingChatOptions;
3638
import org.springframework.ai.tool.ToolCallback;
3739
import org.springframework.lang.Nullable;
@@ -52,8 +54,26 @@
5254
@JsonInclude(Include.NON_NULL)
5355
public class AzureOpenAiChatOptions implements ToolCallingChatOptions {
5456

57+
private static final Logger logger = LoggerFactory.getLogger(AzureOpenAiChatOptions.class);
58+
5559
/**
56-
* The maximum number of tokens to generate.
60+
* The maximum number of tokens to generate in the chat completion. The total length
61+
* of input tokens and generated tokens is limited by the model's context length.
62+
*
63+
* <p>
64+
* <strong>Model-specific usage:</strong>
65+
* </p>
66+
* <ul>
67+
* <li><strong>Use for non-reasoning models</strong> (e.g., gpt-4o,
68+
* gpt-3.5-turbo)</li>
69+
* <li><strong>Cannot be used with reasoning models</strong> (e.g., o1, o3, o4-mini
70+
* series)</li>
71+
* </ul>
72+
*
73+
* <p>
74+
* <strong>Mutual exclusivity:</strong> This parameter cannot be used together with
75+
* {@link #maxCompletionTokens}. Setting both will result in an API error.
76+
* </p>
5777
*/
5878
@JsonProperty("max_tokens")
5979
private Integer maxTokens;
@@ -167,6 +187,28 @@ public class AzureOpenAiChatOptions implements ToolCallingChatOptions {
167187
@JsonProperty("top_log_probs")
168188
private Integer topLogProbs;
169189

190+
/**
191+
* An upper bound for the number of tokens that can be generated for a completion,
192+
* including visible output tokens and reasoning tokens.
193+
*
194+
* <p>
195+
* <strong>Model-specific usage:</strong>
196+
* </p>
197+
* <ul>
198+
* <li><strong>Required for reasoning models</strong> (e.g., o1, o3, o4-mini
199+
* series)</li>
200+
* <li><strong>Cannot be used with non-reasoning models</strong> (e.g., gpt-4o,
201+
* gpt-3.5-turbo)</li>
202+
* </ul>
203+
*
204+
* <p>
205+
* <strong>Mutual exclusivity:</strong> This parameter cannot be used together with
206+
* {@link #maxTokens}. Setting both will result in an API error.
207+
* </p>
208+
*/
209+
@JsonProperty("max_completion_tokens")
210+
private Integer maxCompletionTokens;
211+
170212
/*
171213
* If provided, the configuration options for available Azure OpenAI chat
172214
* enhancements.
@@ -266,6 +308,7 @@ public static AzureOpenAiChatOptions fromOptions(AzureOpenAiChatOptions fromOpti
266308
.frequencyPenalty(fromOptions.getFrequencyPenalty() != null ? fromOptions.getFrequencyPenalty() : null)
267309
.logitBias(fromOptions.getLogitBias())
268310
.maxTokens(fromOptions.getMaxTokens())
311+
.maxCompletionTokens(fromOptions.getMaxCompletionTokens())
269312
.N(fromOptions.getN())
270313
.presencePenalty(fromOptions.getPresencePenalty() != null ? fromOptions.getPresencePenalty() : null)
271314
.stop(fromOptions.getStop() != null ? new ArrayList<>(fromOptions.getStop()) : null)
@@ -300,6 +343,14 @@ public void setMaxTokens(Integer maxTokens) {
300343
this.maxTokens = maxTokens;
301344
}
302345

346+
public Integer getMaxCompletionTokens() {
347+
return this.maxCompletionTokens;
348+
}
349+
350+
public void setMaxCompletionTokens(Integer maxCompletionTokens) {
351+
this.maxCompletionTokens = maxCompletionTokens;
352+
}
353+
303354
public Map<String, Integer> getLogitBias() {
304355
return this.logitBias;
305356
}
@@ -510,6 +561,7 @@ public boolean equals(Object o) {
510561
&& Objects.equals(this.enableStreamUsage, that.enableStreamUsage)
511562
&& Objects.equals(this.reasoningEffort, that.reasoningEffort)
512563
&& Objects.equals(this.toolContext, that.toolContext) && Objects.equals(this.maxTokens, that.maxTokens)
564+
&& Objects.equals(this.maxCompletionTokens, that.maxCompletionTokens)
513565
&& Objects.equals(this.frequencyPenalty, that.frequencyPenalty)
514566
&& Objects.equals(this.presencePenalty, that.presencePenalty)
515567
&& Objects.equals(this.temperature, that.temperature) && Objects.equals(this.topP, that.topP);
@@ -520,8 +572,8 @@ public int hashCode() {
520572
return Objects.hash(this.logitBias, this.user, this.n, this.stop, this.deploymentName, this.responseFormat,
521573
this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled, this.seed, this.logprobs,
522574
this.topLogProbs, this.enhancements, this.streamOptions, this.reasoningEffort, this.enableStreamUsage,
523-
this.toolContext, this.maxTokens, this.frequencyPenalty, this.presencePenalty, this.temperature,
524-
this.topP);
575+
this.toolContext, this.maxTokens, this.maxCompletionTokens, this.frequencyPenalty, this.presencePenalty,
576+
this.temperature, this.topP);
525577
}
526578

527579
public static class Builder {
@@ -551,11 +603,76 @@ public Builder logitBias(Map<String, Integer> logitBias) {
551603
return this;
552604
}
553605

606+
/**
607+
* Sets the maximum number of tokens to generate in the chat completion. The total
608+
* length of input tokens and generated tokens is limited by the model's context
609+
* length.
610+
*
611+
* <p>
612+
* <strong>Model-specific usage:</strong>
613+
* </p>
614+
* <ul>
615+
* <li><strong>Use for non-reasoning models</strong> (e.g., gpt-4o,
616+
* gpt-3.5-turbo)</li>
617+
* <li><strong>Cannot be used with reasoning models</strong> (e.g., o1, o3,
618+
* o4-mini series)</li>
619+
* </ul>
620+
*
621+
* <p>
622+
* <strong>Mutual exclusivity:</strong> This parameter cannot be used together
623+
* with {@link #maxCompletionTokens(Integer)}. If both are set, the last one set
624+
* will be used and the other will be cleared with a warning.
625+
* </p>
626+
* @param maxTokens the maximum number of tokens to generate, or null to unset
627+
* @return this builder instance
628+
*/
554629
public Builder maxTokens(Integer maxTokens) {
630+
if (maxTokens != null && this.options.maxCompletionTokens != null) {
631+
logger
632+
.warn("Both maxTokens and maxCompletionTokens are set. Azure OpenAI API does not support setting both parameters simultaneously. "
633+
+ "The previously set maxCompletionTokens ({}) will be cleared and maxTokens ({}) will be used.",
634+
this.options.maxCompletionTokens, maxTokens);
635+
this.options.maxCompletionTokens = null;
636+
}
555637
this.options.maxTokens = maxTokens;
556638
return this;
557639
}
558640

641+
/**
642+
* Sets an upper bound for the number of tokens that can be generated for a
643+
* completion, including visible output tokens and reasoning tokens.
644+
*
645+
* <p>
646+
* <strong>Model-specific usage:</strong>
647+
* </p>
648+
* <ul>
649+
* <li><strong>Required for reasoning models</strong> (e.g., o1, o3, o4-mini
650+
* series)</li>
651+
* <li><strong>Cannot be used with non-reasoning models</strong> (e.g., gpt-4o,
652+
* gpt-3.5-turbo)</li>
653+
* </ul>
654+
*
655+
* <p>
656+
* <strong>Mutual exclusivity:</strong> This parameter cannot be used together
657+
* with {@link #maxTokens(Integer)}. If both are set, the last one set will be
658+
* used and the other will be cleared with a warning.
659+
* </p>
660+
* @param maxCompletionTokens the maximum number of completion tokens to generate,
661+
* or null to unset
662+
* @return this builder instance
663+
*/
664+
public Builder maxCompletionTokens(Integer maxCompletionTokens) {
665+
if (maxCompletionTokens != null && this.options.maxTokens != null) {
666+
logger
667+
.warn("Both maxTokens and maxCompletionTokens are set. Azure OpenAI API does not support setting both parameters simultaneously. "
668+
+ "The previously set maxTokens ({}) will be cleared and maxCompletionTokens ({}) will be used.",
669+
this.options.maxTokens, maxCompletionTokens);
670+
this.options.maxTokens = null;
671+
}
672+
this.options.maxCompletionTokens = maxCompletionTokens;
673+
return this;
674+
}
675+
559676
public Builder N(Integer n) {
560677
this.options.n = n;
561678
return this;

models/spring-ai-azure-openai/src/test/java/org/springframework/ai/azure/openai/AzureOpenAiChatModelIT.java

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,133 @@ void multiModalityImageResource() {
282282
assertThat(response).containsAnyOf("bananas", "apple", "bowl", "basket", "fruit stand");
283283
}
284284

285+
@Test
286+
void testMaxCompletionTokensBlocking() {
287+
// Test with a very low maxCompletionTokens to verify it limits the response
288+
String prompt = """
289+
Write a detailed essay about the history of artificial intelligence,
290+
including its origins, major milestones, key researchers, current applications,
291+
and future prospects. Make it comprehensive and detailed.
292+
""";
293+
294+
// @formatter:off
295+
ChatResponse response = ChatClient.create(this.chatModel).prompt()
296+
.options(AzureOpenAiChatOptions.builder()
297+
.deploymentName("gpt-4o")
298+
.maxCompletionTokens(50)
299+
.build())
300+
.user(prompt)
301+
.call()
302+
.chatResponse();
303+
// @formatter:on
304+
305+
String content = response.getResult().getOutput().getText();
306+
logger.info("Response with maxCompletionTokens=50: {}", content);
307+
308+
// Verify the response is limited and not empty
309+
assertThat(content).isNotEmpty();
310+
311+
// The response should be relatively short due to the 50 token limit
312+
// We can't test exact token count but can verify it's significantly shorter than
313+
// unlimited
314+
assertThat(content.length()).isLessThan(500); // Rough approximation for 50 tokens
315+
316+
// Verify usage metadata if available
317+
if (response.getMetadata() != null && response.getMetadata().getUsage() != null) {
318+
var usage = response.getMetadata().getUsage();
319+
logger.info("Token usage - Total: {}, Prompt: {}, Completion: {}", usage.getTotalTokens(),
320+
usage.getPromptTokens(), usage.getCompletionTokens());
321+
322+
// The completion tokens should be limited by maxCompletionTokens
323+
if (usage.getCompletionTokens() != null) {
324+
assertThat(usage.getCompletionTokens()).isLessThanOrEqualTo(50);
325+
}
326+
}
327+
}
328+
329+
@Test
330+
void testMaxCompletionTokensStreaming() {
331+
String prompt = """
332+
Write a detailed explanation of machine learning algorithms,
333+
covering supervised learning, unsupervised learning, and reinforcement learning.
334+
Include examples and applications for each type.
335+
""";
336+
337+
// @formatter:off
338+
String content = ChatClient.create(this.chatModel).prompt()
339+
.options(AzureOpenAiChatOptions.builder()
340+
.deploymentName("gpt-4o")
341+
.maxCompletionTokens(30)
342+
.build())
343+
.user(prompt)
344+
.stream()
345+
.content()
346+
.collectList()
347+
.block()
348+
.stream()
349+
.collect(Collectors.joining());
350+
// @formatter:on
351+
352+
logger.info("Streaming response with maxCompletionTokens=30: {}", content);
353+
354+
// Verify the response is limited and not empty
355+
assertThat(content).isNotEmpty();
356+
357+
// The response should be very short due to the 30 token limit
358+
assertThat(content.length()).isLessThan(300); // Rough approximation for 30 tokens
359+
}
360+
361+
@Test
362+
void testMaxCompletionTokensOptionsBuilder() {
363+
// Test that maxCompletionTokens can be set via builder and is properly retrieved
364+
AzureOpenAiChatOptions options = AzureOpenAiChatOptions.builder()
365+
.deploymentName("gpt-4o")
366+
.maxCompletionTokens(100)
367+
.temperature(0.7)
368+
.build();
369+
370+
assertThat(options.getMaxCompletionTokens()).isEqualTo(100);
371+
assertThat(options.getDeploymentName()).isEqualTo("gpt-4o");
372+
assertThat(options.getTemperature()).isEqualTo(0.7);
373+
}
374+
375+
@Test
376+
void testMaxTokensForNonReasoningModels() {
377+
// Test maxTokens parameter for non-reasoning models (e.g., gpt-4o)
378+
// maxTokens limits total tokens (input + output)
379+
String prompt = "Explain quantum computing in simple terms. Please provide a detailed explanation.";
380+
381+
// @formatter:off
382+
ChatResponse response = ChatClient.create(this.chatModel).prompt()
383+
.options(AzureOpenAiChatOptions.builder()
384+
.deploymentName("gpt-4o")
385+
.maxTokens(100) // Total tokens limit for non-reasoning models
386+
.build())
387+
.user(prompt)
388+
.call()
389+
.chatResponse();
390+
// @formatter:on
391+
392+
String content = response.getResult().getOutput().getText();
393+
logger.info("Response with maxTokens=100: {}", content);
394+
395+
assertThat(content).isNotEmpty();
396+
397+
// Verify usage metadata if available
398+
if (response.getMetadata() != null && response.getMetadata().getUsage() != null) {
399+
var usage = response.getMetadata().getUsage();
400+
logger.info("Token usage - Total: {}, Prompt: {}, Completion: {}", usage.getTotalTokens(),
401+
usage.getPromptTokens(), usage.getCompletionTokens());
402+
403+
// Total tokens should be close to maxTokens (Azure may slightly exceed the
404+
// limit)
405+
if (usage.getTotalTokens() != null) {
406+
assertThat(usage.getTotalTokens()).isLessThanOrEqualTo(150); // Allow some
407+
// tolerance
408+
}
409+
}
410+
}
411+
285412
record ActorsFilms(String actor, List<String> movies) {
286413

287414
}
@@ -306,7 +433,7 @@ public OpenAIClientBuilder openAIClientBuilder() {
306433
public AzureOpenAiChatModel azureOpenAiChatModel(OpenAIClientBuilder openAIClientBuilder) {
307434
return AzureOpenAiChatModel.builder()
308435
.openAIClientBuilder(openAIClientBuilder)
309-
.defaultOptions(AzureOpenAiChatOptions.builder().deploymentName("gpt-4o").maxTokens(1000).build())
436+
.defaultOptions(AzureOpenAiChatOptions.builder().deploymentName("gpt-4o").build())
310437
.build();
311438
}
312439

0 commit comments

Comments
 (0)