openai · rm-openai · Mar 12, 2025 · Mar 12, 2025
diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py
@@ -10,15 +10,34 @@ class ModelSettings:
 
     This class holds optional model configuration parameters (e.g. temperature,
     top_p, penalties, truncation, etc.).
+
+    Not all models/providers support all of these parameters, so please check the API documentation
+    for the specific model and provider you are using.
     """
 
     temperature: float | None = None
+    """The temperature to use when calling the model."""
+
     top_p: float | None = None
+    """The top_p to use when calling the model."""
+
     frequency_penalty: float | None = None
+    """The frequency penalty to use when calling the model."""
+
     presence_penalty: float | None = None
+    """The presence penalty to use when calling the model."""
+
     tool_choice: Literal["auto", "required", "none"] | str | None = None
+    """The tool choice to use when calling the model."""
+
     parallel_tool_calls: bool | None = False
+    """Whether to use parallel tool calls when calling the model."""
+
     truncation: Literal["auto", "disabled"] | None = None
+    """The truncation strategy to use when calling the model."""
+
+    max_tokens: int | None = None
+    """The maximum number of output tokens to generate."""
 
     def resolve(self, override: ModelSettings | None) -> ModelSettings:
         """Produce a new ModelSettings by overlaying any non-None values from the
@@ -33,4 +52,5 @@ def resolve(self, override: ModelSettings | None) -> ModelSettings:
             tool_choice=override.tool_choice or self.tool_choice,
             parallel_tool_calls=override.parallel_tool_calls or self.parallel_tool_calls,
             truncation=override.truncation or self.truncation,
+            max_tokens=override.max_tokens or self.max_tokens,
         )
diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
@@ -503,6 +503,7 @@ async def _fetch_response(
             top_p=self._non_null_or_not_given(model_settings.top_p),
             frequency_penalty=self._non_null_or_not_given(model_settings.frequency_penalty),
             presence_penalty=self._non_null_or_not_given(model_settings.presence_penalty),
+            max_tokens=self._non_null_or_not_given(model_settings.max_tokens),
             tool_choice=tool_choice,
             response_format=response_format,
             parallel_tool_calls=parallel_tool_calls,

diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
@@ -235,6 +235,7 @@ async def _fetch_response(
             temperature=self._non_null_or_not_given(model_settings.temperature),
             top_p=self._non_null_or_not_given(model_settings.top_p),
             truncation=self._non_null_or_not_given(model_settings.truncation),
+            max_output_tokens=self._non_null_or_not_given(model_settings.max_tokens),
             tool_choice=tool_choice,
             parallel_tool_calls=parallel_tool_calls,
             stream=stream,