diff --git a/docs/agents.md b/docs/agents.md index 1c314739..1e04f7e9 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -142,4 +142,11 @@ Supplying a list of tools doesn't always mean the LLM will use a tool. You can f !!! note - If requiring tool use, you should consider setting [`Agent.tool_use_behavior`] to stop the Agent from running when a tool output is produced. Otherwise, the Agent might run in an infinite loop, where the LLM produces a tool call , and the tool result is sent to the LLM, and this infinite loops because the LLM is always forced to use a tool. + To prevent infinite loops, the framework automatically resets `tool_choice` to "auto" after a tool call in the following scenarios: + + 1. When `tool_choice` is set to a specific function name (any string that's not "auto", "required", or "none") + 2. When `tool_choice` is set to "required" AND there is only one tool available + + This targeted reset mechanism allows the model to decide whether to make additional tool calls in subsequent turns while avoiding infinite loops in these specific cases. + + If you want the Agent to completely stop after a tool call (rather than continuing with auto mode), you can set [`Agent.tool_use_behavior="stop_on_first_tool"`] which will directly use the tool output as the final response without further LLM processing. diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 2849538d..df3db42f 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import dataclasses import inspect from collections.abc import Awaitable from dataclasses import dataclass @@ -47,10 +48,11 @@ ) from .lifecycle import RunHooks from .logger import logger +from .model_settings import ModelSettings from .models.interface import ModelTracing from .run_context import RunContextWrapper, TContext from .stream_events import RunItemStreamEvent, StreamEvent -from .tool import ComputerTool, FunctionTool, FunctionToolResult +from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool from .tracing import ( SpanError, Trace, @@ -206,6 +208,29 @@ async def execute_tools_and_side_effects( new_step_items.extend([result.run_item for result in function_results]) new_step_items.extend(computer_results) + # Reset tool_choice to "auto" after tool execution to prevent infinite loops + if processed_response.functions or processed_response.computer_actions: + tools = agent.tools + + if ( + run_config.model_settings and + cls._should_reset_tool_choice(run_config.model_settings, tools) + ): + # update the run_config model settings with a copy + new_run_config_settings = dataclasses.replace( + run_config.model_settings, + tool_choice="auto" + ) + run_config = dataclasses.replace(run_config, model_settings=new_run_config_settings) + + if cls._should_reset_tool_choice(agent.model_settings, tools): + # Create a modified copy instead of modifying the original agent + new_model_settings = dataclasses.replace( + agent.model_settings, + tool_choice="auto" + ) + agent = dataclasses.replace(agent, model_settings=new_model_settings) + # Second, check if there are any handoffs if run_handoffs := processed_response.handoffs: return await cls.execute_handoffs( @@ -296,6 +321,24 @@ async def execute_tools_and_side_effects( next_step=NextStepRunAgain(), ) + @classmethod + def _should_reset_tool_choice(cls, model_settings: ModelSettings, tools: list[Tool]) -> bool: + if model_settings is None or model_settings.tool_choice is None: + return False + + # for specific tool choices + if ( + isinstance(model_settings.tool_choice, str) and + model_settings.tool_choice not in ["auto", "required", "none"] + ): + return True + + # for one tool and required tool choice + if model_settings.tool_choice == "required": + return len(tools) == 1 + + return False + @classmethod def process_model_response( cls, diff --git a/tests/test_tool_choice_reset.py b/tests/test_tool_choice_reset.py new file mode 100644 index 00000000..7dae6f63 --- /dev/null +++ b/tests/test_tool_choice_reset.py @@ -0,0 +1,161 @@ +import pytest + +from agents import Agent, ModelSettings, Runner, Tool +from agents._run_impl import RunImpl + +from .fake_model import FakeModel +from .test_responses import ( + get_function_tool, + get_function_tool_call, + get_text_message, +) + + +class TestToolChoiceReset: + + def test_should_reset_tool_choice_direct(self): + """ + Test the _should_reset_tool_choice method directly with various inputs + to ensure it correctly identifies cases where reset is needed. + """ + # Case 1: tool_choice = None should not reset + model_settings = ModelSettings(tool_choice=None) + tools1: list[Tool] = [get_function_tool("tool1")] + # Cast to list[Tool] to fix type checking issues + assert not RunImpl._should_reset_tool_choice(model_settings, tools1) + + # Case 2: tool_choice = "auto" should not reset + model_settings = ModelSettings(tool_choice="auto") + assert not RunImpl._should_reset_tool_choice(model_settings, tools1) + + # Case 3: tool_choice = "none" should not reset + model_settings = ModelSettings(tool_choice="none") + assert not RunImpl._should_reset_tool_choice(model_settings, tools1) + + # Case 4: tool_choice = "required" with one tool should reset + model_settings = ModelSettings(tool_choice="required") + assert RunImpl._should_reset_tool_choice(model_settings, tools1) + + # Case 5: tool_choice = "required" with multiple tools should not reset + model_settings = ModelSettings(tool_choice="required") + tools2: list[Tool] = [get_function_tool("tool1"), get_function_tool("tool2")] + assert not RunImpl._should_reset_tool_choice(model_settings, tools2) + + # Case 6: Specific tool choice should reset + model_settings = ModelSettings(tool_choice="specific_tool") + assert RunImpl._should_reset_tool_choice(model_settings, tools1) + + @pytest.mark.asyncio + async def test_required_tool_choice_with_multiple_runs(self): + """ + Test scenario 1: When multiple runs are executed with tool_choice="required" + Ensure each run works correctly and doesn't get stuck in infinite loop + Also verify that tool_choice remains "required" between runs + """ + # Set up our fake model with responses for two runs + fake_model = FakeModel() + fake_model.add_multiple_turn_outputs([ + [get_text_message("First run response")], + [get_text_message("Second run response")] + ]) + + # Create agent with a custom tool and tool_choice="required" + custom_tool = get_function_tool("custom_tool") + agent = Agent( + name="test_agent", + model=fake_model, + tools=[custom_tool], + model_settings=ModelSettings(tool_choice="required"), + ) + + # First run should work correctly and preserve tool_choice + result1 = await Runner.run(agent, "first run") + assert result1.final_output == "First run response" + assert agent.model_settings.tool_choice == "required", "tool_choice should stay required" + + # Second run should also work correctly with tool_choice still required + result2 = await Runner.run(agent, "second run") + assert result2.final_output == "Second run response" + assert agent.model_settings.tool_choice == "required", "tool_choice should stay required" + + @pytest.mark.asyncio + async def test_required_with_stop_at_tool_name(self): + """ + Test scenario 2: When using required tool_choice with stop_at_tool_names behavior + Ensure it correctly stops at the specified tool + """ + # Set up fake model to return a tool call for second_tool + fake_model = FakeModel() + fake_model.set_next_output([ + get_function_tool_call("second_tool", "{}") + ]) + + # Create agent with two tools and tool_choice="required" and stop_at_tool behavior + first_tool = get_function_tool("first_tool", return_value="first tool result") + second_tool = get_function_tool("second_tool", return_value="second tool result") + + agent = Agent( + name="test_agent", + model=fake_model, + tools=[first_tool, second_tool], + model_settings=ModelSettings(tool_choice="required"), + tool_use_behavior={"stop_at_tool_names": ["second_tool"]}, + ) + + # Run should stop after using second_tool + result = await Runner.run(agent, "run test") + assert result.final_output == "second tool result" + + @pytest.mark.asyncio + async def test_specific_tool_choice(self): + """ + Test scenario 3: When using a specific tool choice name + Ensure it doesn't cause infinite loops + """ + # Set up fake model to return a text message + fake_model = FakeModel() + fake_model.set_next_output([get_text_message("Test message")]) + + # Create agent with specific tool_choice + tool1 = get_function_tool("tool1") + tool2 = get_function_tool("tool2") + tool3 = get_function_tool("tool3") + + agent = Agent( + name="test_agent", + model=fake_model, + tools=[tool1, tool2, tool3], + model_settings=ModelSettings(tool_choice="tool1"), # Specific tool + ) + + # Run should complete without infinite loops + result = await Runner.run(agent, "first run") + assert result.final_output == "Test message" + + @pytest.mark.asyncio + async def test_required_with_single_tool(self): + """ + Test scenario 4: When using required tool_choice with only one tool + Ensure it doesn't cause infinite loops + """ + # Set up fake model to return a tool call followed by a text message + fake_model = FakeModel() + fake_model.add_multiple_turn_outputs([ + # First call returns a tool call + [get_function_tool_call("custom_tool", "{}")], + # Second call returns a text message + [get_text_message("Final response")] + ]) + + # Create agent with a single tool and tool_choice="required" + custom_tool = get_function_tool("custom_tool", return_value="tool result") + agent = Agent( + name="test_agent", + model=fake_model, + tools=[custom_tool], + model_settings=ModelSettings(tool_choice="required"), + ) + + # Run should complete without infinite loops + result = await Runner.run(agent, "first run") + assert result.final_output == "Final response"