spring-projects
diff --git a/‎models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java
Lines changed: 12 additions & 0 deletions b/‎models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java
Lines changed: 12 additions & 0 deletions
diff --git a/‎models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java
Lines changed: 121 additions & 4 deletions b/‎models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java
Lines changed: 121 additions & 4 deletions
diff --git a/‎models/spring-ai-azure-openai/src/test/java/org/springframework/ai/azure/openai/AzureOpenAiChatModelIT.java
Lines changed: 128 additions & 1 deletion b/‎models/spring-ai-azure-openai/src/test/java/org/springframework/ai/azure/openai/AzureOpenAiChatModelIT.java
Lines changed: 128 additions & 1 deletion
@@ -720,6 +720,11 @@ private ChatCompletionsOptions merge(ChatCompletionsOptions fromAzureOptions,
 		mergedAzureOptions.setMaxTokens((fromAzureOptions.getMaxTokens() != null) ? fromAzureOptions.getMaxTokens()
 				: toSpringAiOptions.getMaxTokens());
 
+		if (fromAzureOptions.getMaxCompletionTokens() != null || toSpringAiOptions.getMaxCompletionTokens() != null) {
+			mergedAzureOptions.setMaxCompletionTokens((fromAzureOptions.getMaxCompletionTokens() != null)
+					? fromAzureOptions.getMaxCompletionTokens() : toSpringAiOptions.getMaxCompletionTokens());
+		}
+
 		mergedAzureOptions.setLogitBias(fromAzureOptions.getLogitBias() != null ? fromAzureOptions.getLogitBias()
 				: toSpringAiOptions.getLogitBias());
 
@@ -803,6 +808,10 @@ private ChatCompletionsOptions merge(AzureOpenAiChatOptions fromSpringAiOptions,
 			mergedAzureOptions.setMaxTokens(fromSpringAiOptions.getMaxTokens());
 		}
 
+		if (fromSpringAiOptions.getMaxCompletionTokens() != null) {
+			mergedAzureOptions.setMaxCompletionTokens(fromSpringAiOptions.getMaxCompletionTokens());
+		}
+
 		if (fromSpringAiOptions.getLogitBias() != null) {
 			mergedAzureOptions.setLogitBias(fromSpringAiOptions.getLogitBias());
 		}
@@ -894,6 +903,9 @@ private ChatCompletionsOptions copy(ChatCompletionsOptions fromOptions) {
 		if (fromOptions.getMaxTokens() != null) {
 			copyOptions.setMaxTokens(fromOptions.getMaxTokens());
 		}
+		if (fromOptions.getMaxCompletionTokens() != null) {
+			copyOptions.setMaxCompletionTokens(fromOptions.getMaxCompletionTokens());
+		}
 		if (fromOptions.getLogitBias() != null) {
 			copyOptions.setLogitBias(fromOptions.getLogitBias());
 		}
 
@@ -1,5 +1,5 @@
 /*
- * Copyright 2023-2024 the original author or authors.
+ * Copyright 2023-2025 the original author or authors.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,6 +32,8 @@
 import com.fasterxml.jackson.annotation.JsonInclude.Include;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.springframework.ai.model.tool.ToolCallingChatOptions;
 import org.springframework.ai.tool.ToolCallback;
 import org.springframework.lang.Nullable;
@@ -52,8 +54,26 @@
 @JsonInclude(Include.NON_NULL)
 public class AzureOpenAiChatOptions implements ToolCallingChatOptions {
 
+	private static final Logger logger = LoggerFactory.getLogger(AzureOpenAiChatOptions.class);
+
 	/**
-	 * The maximum number of tokens to generate.
+	 * The maximum number of tokens to generate in the chat completion. The total length
+	 * of input tokens and generated tokens is limited by the model's context length.
+	 *
+	 * <p>
+	 * <strong>Model-specific usage:</strong>
+	 * </p>
+	 * <ul>
+	 * <li><strong>Use for non-reasoning models</strong> (e.g., gpt-4o,
+	 * gpt-3.5-turbo)</li>
+	 * <li><strong>Cannot be used with reasoning models</strong> (e.g., o1, o3, o4-mini
+	 * series)</li>
+	 * </ul>
+	 *
+	 * <p>
+	 * <strong>Mutual exclusivity:</strong> This parameter cannot be used together with
+	 * {@link #maxCompletionTokens}. Setting both will result in an API error.
+	 * </p>
 	 */
 	@JsonProperty("max_tokens")
 	private Integer maxTokens;
@@ -167,6 +187,28 @@ public class AzureOpenAiChatOptions implements ToolCallingChatOptions {
 	@JsonProperty("top_log_probs")
 	private Integer topLogProbs;
 
+	/**
+	 * An upper bound for the number of tokens that can be generated for a completion,
+	 * including visible output tokens and reasoning tokens.
+	 *
+	 * <p>
+	 * <strong>Model-specific usage:</strong>
+	 * </p>
+	 * <ul>
+	 * <li><strong>Required for reasoning models</strong> (e.g., o1, o3, o4-mini
+	 * series)</li>
+	 * <li><strong>Cannot be used with non-reasoning models</strong> (e.g., gpt-4o,
+	 * gpt-3.5-turbo)</li>
+	 * </ul>
+	 *
+	 * <p>
+	 * <strong>Mutual exclusivity:</strong> This parameter cannot be used together with
+	 * {@link #maxTokens}. Setting both will result in an API error.
+	 * </p>
+	 */
+	@JsonProperty("max_completion_tokens")
+	private Integer maxCompletionTokens;
+
 	/*
 	 * If provided, the configuration options for available Azure OpenAI chat
 	 * enhancements.
@@ -266,6 +308,7 @@ public static AzureOpenAiChatOptions fromOptions(AzureOpenAiChatOptions fromOpti
 			.frequencyPenalty(fromOptions.getFrequencyPenalty() != null ? fromOptions.getFrequencyPenalty() : null)
 			.logitBias(fromOptions.getLogitBias())
 			.maxTokens(fromOptions.getMaxTokens())
+			.maxCompletionTokens(fromOptions.getMaxCompletionTokens())
 			.N(fromOptions.getN())
 			.presencePenalty(fromOptions.getPresencePenalty() != null ? fromOptions.getPresencePenalty() : null)
 			.stop(fromOptions.getStop() != null ? new ArrayList<>(fromOptions.getStop()) : null)
@@ -300,6 +343,14 @@ public void setMaxTokens(Integer maxTokens) {
 		this.maxTokens = maxTokens;
 	}
 
+	public Integer getMaxCompletionTokens() {
+		return this.maxCompletionTokens;
+	}
+
+	public void setMaxCompletionTokens(Integer maxCompletionTokens) {
+		this.maxCompletionTokens = maxCompletionTokens;
+	}
+
 	public Map<String, Integer> getLogitBias() {
 		return this.logitBias;
 	}
@@ -510,6 +561,7 @@ public boolean equals(Object o) {
 				&& Objects.equals(this.enableStreamUsage, that.enableStreamUsage)
 				&& Objects.equals(this.reasoningEffort, that.reasoningEffort)
 				&& Objects.equals(this.toolContext, that.toolContext) && Objects.equals(this.maxTokens, that.maxTokens)
+				&& Objects.equals(this.maxCompletionTokens, that.maxCompletionTokens)
 				&& Objects.equals(this.frequencyPenalty, that.frequencyPenalty)
 				&& Objects.equals(this.presencePenalty, that.presencePenalty)
 				&& Objects.equals(this.temperature, that.temperature) && Objects.equals(this.topP, that.topP);
@@ -520,8 +572,8 @@ public int hashCode() {
 		return Objects.hash(this.logitBias, this.user, this.n, this.stop, this.deploymentName, this.responseFormat,
 				this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled, this.seed, this.logprobs,
 				this.topLogProbs, this.enhancements, this.streamOptions, this.reasoningEffort, this.enableStreamUsage,
-				this.toolContext, this.maxTokens, this.frequencyPenalty, this.presencePenalty, this.temperature,
-				this.topP);
+				this.toolContext, this.maxTokens, this.maxCompletionTokens, this.frequencyPenalty, this.presencePenalty,
+				this.temperature, this.topP);
 	}
 
 	public static class Builder {
@@ -551,11 +603,76 @@ public Builder logitBias(Map<String, Integer> logitBias) {
 			return this;
 		}
 
+		/**
+		 * Sets the maximum number of tokens to generate in the chat completion. The total
+		 * length of input tokens and generated tokens is limited by the model's context
+		 * length.
+		 *
+		 * <p>
+		 * <strong>Model-specific usage:</strong>
+		 * </p>
+		 * <ul>
+		 * <li><strong>Use for non-reasoning models</strong> (e.g., gpt-4o,
+		 * gpt-3.5-turbo)</li>
+		 * <li><strong>Cannot be used with reasoning models</strong> (e.g., o1, o3,
+		 * o4-mini series)</li>
+		 * </ul>
+		 *
+		 * <p>
+		 * <strong>Mutual exclusivity:</strong> This parameter cannot be used together
+		 * with {@link #maxCompletionTokens(Integer)}. If both are set, the last one set
+		 * will be used and the other will be cleared with a warning.
+		 * </p>
+		 * @param maxTokens the maximum number of tokens to generate, or null to unset
+		 * @return this builder instance
+		 */
 		public Builder maxTokens(Integer maxTokens) {
+			if (maxTokens != null && this.options.maxCompletionTokens != null) {
+				logger
+					.warn("Both maxTokens and maxCompletionTokens are set. Azure OpenAI API does not support setting both parameters simultaneously. "
+							+ "The previously set maxCompletionTokens ({}) will be cleared and maxTokens ({}) will be used.",
+							this.options.maxCompletionTokens, maxTokens);
+				this.options.maxCompletionTokens = null;
+			}
 			this.options.maxTokens = maxTokens;
 			return this;
 		}
 
+		/**
+		 * Sets an upper bound for the number of tokens that can be generated for a
+		 * completion, including visible output tokens and reasoning tokens.
+		 *
+		 * <p>
+		 * <strong>Model-specific usage:</strong>
+		 * </p>
+		 * <ul>
+		 * <li><strong>Required for reasoning models</strong> (e.g., o1, o3, o4-mini
+		 * series)</li>
+		 * <li><strong>Cannot be used with non-reasoning models</strong> (e.g., gpt-4o,
+		 * gpt-3.5-turbo)</li>
+		 * </ul>
+		 *
+		 * <p>
+		 * <strong>Mutual exclusivity:</strong> This parameter cannot be used together
+		 * with {@link #maxTokens(Integer)}. If both are set, the last one set will be
+		 * used and the other will be cleared with a warning.
+		 * </p>
+		 * @param maxCompletionTokens the maximum number of completion tokens to generate,
+		 * or null to unset
+		 * @return this builder instance
+		 */
+		public Builder maxCompletionTokens(Integer maxCompletionTokens) {
+			if (maxCompletionTokens != null && this.options.maxTokens != null) {
+				logger
+					.warn("Both maxTokens and maxCompletionTokens are set. Azure OpenAI API does not support setting both parameters simultaneously. "
+							+ "The previously set maxTokens ({}) will be cleared and maxCompletionTokens ({}) will be used.",
+							this.options.maxTokens, maxCompletionTokens);
+				this.options.maxTokens = null;
+			}
+			this.options.maxCompletionTokens = maxCompletionTokens;
+			return this;
+		}
+
 		public Builder N(Integer n) {
 			this.options.n = n;
 			return this;
 
@@ -282,6 +282,133 @@ void multiModalityImageResource() {
 		assertThat(response).containsAnyOf("bananas", "apple", "bowl", "basket", "fruit stand");
 	}
 
+	@Test
+	void testMaxCompletionTokensBlocking() {
+		// Test with a very low maxCompletionTokens to verify it limits the response
+		String prompt = """
+				Write a detailed essay about the history of artificial intelligence,
+				including its origins, major milestones, key researchers, current applications,
+				and future prospects. Make it comprehensive and detailed.
+				""";
+
+		// @formatter:off
+		ChatResponse response = ChatClient.create(this.chatModel).prompt()
+				.options(AzureOpenAiChatOptions.builder()
+						.deploymentName("gpt-4o")
+						.maxCompletionTokens(50)
+						.build())
+				.user(prompt)
+				.call()
+				.chatResponse();
+		// @formatter:on
+
+		String content = response.getResult().getOutput().getText();
+		logger.info("Response with maxCompletionTokens=50: {}", content);
+
+		// Verify the response is limited and not empty
+		assertThat(content).isNotEmpty();
+
+		// The response should be relatively short due to the 50 token limit
+		// We can't test exact token count but can verify it's significantly shorter than
+		// unlimited
+		assertThat(content.length()).isLessThan(500); // Rough approximation for 50 tokens
+
+		// Verify usage metadata if available
+		if (response.getMetadata() != null && response.getMetadata().getUsage() != null) {
+			var usage = response.getMetadata().getUsage();
+			logger.info("Token usage - Total: {}, Prompt: {}, Completion: {}", usage.getTotalTokens(),
+					usage.getPromptTokens(), usage.getCompletionTokens());
+
+			// The completion tokens should be limited by maxCompletionTokens
+			if (usage.getCompletionTokens() != null) {
+				assertThat(usage.getCompletionTokens()).isLessThanOrEqualTo(50);
+			}
+		}
+	}
+
+	@Test
+	void testMaxCompletionTokensStreaming() {
+		String prompt = """
+				Write a detailed explanation of machine learning algorithms,
+				covering supervised learning, unsupervised learning, and reinforcement learning.
+				Include examples and applications for each type.
+				""";
+
+		// @formatter:off
+		String content = ChatClient.create(this.chatModel).prompt()
+				.options(AzureOpenAiChatOptions.builder()
+						.deploymentName("gpt-4o")
+						.maxCompletionTokens(30)
+						.build())
+				.user(prompt)
+				.stream()
+				.content()
+				.collectList()
+				.block()
+				.stream()
+				.collect(Collectors.joining());
+		// @formatter:on
+
+		logger.info("Streaming response with maxCompletionTokens=30: {}", content);
+
+		// Verify the response is limited and not empty
+		assertThat(content).isNotEmpty();
+
+		// The response should be very short due to the 30 token limit
+		assertThat(content.length()).isLessThan(300); // Rough approximation for 30 tokens
+	}
+
+	@Test
+	void testMaxCompletionTokensOptionsBuilder() {
+		// Test that maxCompletionTokens can be set via builder and is properly retrieved
+		AzureOpenAiChatOptions options = AzureOpenAiChatOptions.builder()
+			.deploymentName("gpt-4o")
+			.maxCompletionTokens(100)
+			.temperature(0.7)
+			.build();
+
+		assertThat(options.getMaxCompletionTokens()).isEqualTo(100);
+		assertThat(options.getDeploymentName()).isEqualTo("gpt-4o");
+		assertThat(options.getTemperature()).isEqualTo(0.7);
+	}
+
+	@Test
+	void testMaxTokensForNonReasoningModels() {
+		// Test maxTokens parameter for non-reasoning models (e.g., gpt-4o)
+		// maxTokens limits total tokens (input + output)
+		String prompt = "Explain quantum computing in simple terms. Please provide a detailed explanation.";
+
+		// @formatter:off
+		ChatResponse response = ChatClient.create(this.chatModel).prompt()
+				.options(AzureOpenAiChatOptions.builder()
+						.deploymentName("gpt-4o")
+						.maxTokens(100)  // Total tokens limit for non-reasoning models
+						.build())
+				.user(prompt)
+				.call()
+				.chatResponse();
+		// @formatter:on
+
+		String content = response.getResult().getOutput().getText();
+		logger.info("Response with maxTokens=100: {}", content);
+
+		assertThat(content).isNotEmpty();
+
+		// Verify usage metadata if available
+		if (response.getMetadata() != null && response.getMetadata().getUsage() != null) {
+			var usage = response.getMetadata().getUsage();
+			logger.info("Token usage - Total: {}, Prompt: {}, Completion: {}", usage.getTotalTokens(),
+					usage.getPromptTokens(), usage.getCompletionTokens());
+
+			// Total tokens should be close to maxTokens (Azure may slightly exceed the
+			// limit)
+			if (usage.getTotalTokens() != null) {
+				assertThat(usage.getTotalTokens()).isLessThanOrEqualTo(150); // Allow some
+																				// tolerance
+			}
+		}
+	}
+
 	record ActorsFilms(String actor, List<String> movies) {
 
 	}
@@ -306,7 +433,7 @@ public OpenAIClientBuilder openAIClientBuilder() {
 		public AzureOpenAiChatModel azureOpenAiChatModel(OpenAIClientBuilder openAIClientBuilder) {
 			return AzureOpenAiChatModel.builder()
 				.openAIClientBuilder(openAIClientBuilder)
-				.defaultOptions(AzureOpenAiChatOptions.builder().deploymentName("gpt-4o").maxTokens(1000).build())
+				.defaultOptions(AzureOpenAiChatOptions.builder().deploymentName("gpt-4o").build())
 				.build();
 		}