diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py index d8178ae3..cc4b6cb6 100644 --- a/src/agents/model_settings.py +++ b/src/agents/model_settings.py @@ -10,15 +10,34 @@ class ModelSettings: This class holds optional model configuration parameters (e.g. temperature, top_p, penalties, truncation, etc.). + + Not all models/providers support all of these parameters, so please check the API documentation + for the specific model and provider you are using. """ temperature: float | None = None + """The temperature to use when calling the model.""" + top_p: float | None = None + """The top_p to use when calling the model.""" + frequency_penalty: float | None = None + """The frequency penalty to use when calling the model.""" + presence_penalty: float | None = None + """The presence penalty to use when calling the model.""" + tool_choice: Literal["auto", "required", "none"] | str | None = None + """The tool choice to use when calling the model.""" + parallel_tool_calls: bool | None = False + """Whether to use parallel tool calls when calling the model.""" + truncation: Literal["auto", "disabled"] | None = None + """The truncation strategy to use when calling the model.""" + + max_tokens: int | None = None + """The maximum number of output tokens to generate.""" def resolve(self, override: ModelSettings | None) -> ModelSettings: """Produce a new ModelSettings by overlaying any non-None values from the @@ -33,4 +52,5 @@ def resolve(self, override: ModelSettings | None) -> ModelSettings: tool_choice=override.tool_choice or self.tool_choice, parallel_tool_calls=override.parallel_tool_calls or self.parallel_tool_calls, truncation=override.truncation or self.truncation, + max_tokens=override.max_tokens or self.max_tokens, ) diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index c20f5bb9..50e27fc4 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -503,6 +503,7 @@ async def _fetch_response( top_p=self._non_null_or_not_given(model_settings.top_p), frequency_penalty=self._non_null_or_not_given(model_settings.frequency_penalty), presence_penalty=self._non_null_or_not_given(model_settings.presence_penalty), + max_tokens=self._non_null_or_not_given(model_settings.max_tokens), tool_choice=tool_choice, response_format=response_format, parallel_tool_calls=parallel_tool_calls, diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index e060fb8e..86c82153 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -235,6 +235,7 @@ async def _fetch_response( temperature=self._non_null_or_not_given(model_settings.temperature), top_p=self._non_null_or_not_given(model_settings.top_p), truncation=self._non_null_or_not_given(model_settings.truncation), + max_output_tokens=self._non_null_or_not_given(model_settings.max_tokens), tool_choice=tool_choice, parallel_tool_calls=parallel_tool_calls, stream=stream,