Skip to content

Fix potential infinite tool call loop by resetting tool_choice after … #263

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 25, 2025
9 changes: 8 additions & 1 deletion docs/agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,11 @@ Supplying a list of tools doesn't always mean the LLM will use a tool. You can f

!!! note

If requiring tool use, you should consider setting [`Agent.tool_use_behavior`] to stop the Agent from running when a tool output is produced. Otherwise, the Agent might run in an infinite loop, where the LLM produces a tool call , and the tool result is sent to the LLM, and this infinite loops because the LLM is always forced to use a tool.
To prevent infinite loops, the framework automatically resets `tool_choice` to "auto" after a tool call in the following scenarios:

1. When `tool_choice` is set to a specific function name (any string that's not "auto", "required", or "none")
2. When `tool_choice` is set to "required" AND there is only one tool available

This targeted reset mechanism allows the model to decide whether to make additional tool calls in subsequent turns while avoiding infinite loops in these specific cases.

If you want the Agent to completely stop after a tool call (rather than continuing with auto mode), you can set [`Agent.tool_use_behavior="stop_on_first_tool"`] which will directly use the tool output as the final response without further LLM processing.
45 changes: 44 additions & 1 deletion src/agents/_run_impl.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import asyncio
import dataclasses
import inspect
from collections.abc import Awaitable
from dataclasses import dataclass
Expand Down Expand Up @@ -47,10 +48,11 @@
)
from .lifecycle import RunHooks
from .logger import logger
from .model_settings import ModelSettings
from .models.interface import ModelTracing
from .run_context import RunContextWrapper, TContext
from .stream_events import RunItemStreamEvent, StreamEvent
from .tool import ComputerTool, FunctionTool, FunctionToolResult
from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool
from .tracing import (
SpanError,
Trace,
Expand Down Expand Up @@ -206,6 +208,29 @@ async def execute_tools_and_side_effects(
new_step_items.extend([result.run_item for result in function_results])
new_step_items.extend(computer_results)

# Reset tool_choice to "auto" after tool execution to prevent infinite loops
if processed_response.functions or processed_response.computer_actions:
tools = agent.tools

if (

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If modifying the agent model settings is necessary, this should ideally only be done after checking the run config first. This is because the run config overrides agent-specific settings for a run.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. These two operations should actually be parallel rather than sequential, but putting the run_config check first does make more sense. I've made this change in the latest commit.

run_config.model_settings and
cls._should_reset_tool_choice(run_config.model_settings, tools)
):
# update the run_config model settings with a copy
new_run_config_settings = dataclasses.replace(
run_config.model_settings,
tool_choice="auto"
)
run_config = dataclasses.replace(run_config, model_settings=new_run_config_settings)

if cls._should_reset_tool_choice(agent.model_settings, tools):
# Create a modified copy instead of modifying the original agent
new_model_settings = dataclasses.replace(
agent.model_settings,
tool_choice="auto"
)
agent = dataclasses.replace(agent, model_settings=new_model_settings)

# Second, check if there are any handoffs
if run_handoffs := processed_response.handoffs:
return await cls.execute_handoffs(
Expand Down Expand Up @@ -296,6 +321,24 @@ async def execute_tools_and_side_effects(
next_step=NextStepRunAgain(),
)

@classmethod
def _should_reset_tool_choice(cls, model_settings: ModelSettings, tools: list[Tool]) -> bool:
if model_settings is None or model_settings.tool_choice is None:
return False

# for specific tool choices
if (
isinstance(model_settings.tool_choice, str) and
model_settings.tool_choice not in ["auto", "required", "none"]
):
return True

# for one tool and required tool choice
if model_settings.tool_choice == "required":
return len(tools) == 1

return False

@classmethod
def process_model_response(
cls,
Expand Down
161 changes: 161 additions & 0 deletions tests/test_tool_choice_reset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import pytest

from agents import Agent, ModelSettings, Runner, Tool
from agents._run_impl import RunImpl

from .fake_model import FakeModel
from .test_responses import (
get_function_tool,
get_function_tool_call,
get_text_message,
)


class TestToolChoiceReset:

def test_should_reset_tool_choice_direct(self):
"""
Test the _should_reset_tool_choice method directly with various inputs
to ensure it correctly identifies cases where reset is needed.
"""
# Case 1: tool_choice = None should not reset
model_settings = ModelSettings(tool_choice=None)
tools1: list[Tool] = [get_function_tool("tool1")]
# Cast to list[Tool] to fix type checking issues
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)

# Case 2: tool_choice = "auto" should not reset
model_settings = ModelSettings(tool_choice="auto")
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)

# Case 3: tool_choice = "none" should not reset
model_settings = ModelSettings(tool_choice="none")
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)

# Case 4: tool_choice = "required" with one tool should reset
model_settings = ModelSettings(tool_choice="required")
assert RunImpl._should_reset_tool_choice(model_settings, tools1)

# Case 5: tool_choice = "required" with multiple tools should not reset
model_settings = ModelSettings(tool_choice="required")
tools2: list[Tool] = [get_function_tool("tool1"), get_function_tool("tool2")]
assert not RunImpl._should_reset_tool_choice(model_settings, tools2)

# Case 6: Specific tool choice should reset
model_settings = ModelSettings(tool_choice="specific_tool")
assert RunImpl._should_reset_tool_choice(model_settings, tools1)

@pytest.mark.asyncio
async def test_required_tool_choice_with_multiple_runs(self):
"""
Test scenario 1: When multiple runs are executed with tool_choice="required"
Ensure each run works correctly and doesn't get stuck in infinite loop
Also verify that tool_choice remains "required" between runs
"""
# Set up our fake model with responses for two runs
fake_model = FakeModel()
fake_model.add_multiple_turn_outputs([
[get_text_message("First run response")],
[get_text_message("Second run response")]
])

# Create agent with a custom tool and tool_choice="required"
custom_tool = get_function_tool("custom_tool")
agent = Agent(
name="test_agent",
model=fake_model,
tools=[custom_tool],
model_settings=ModelSettings(tool_choice="required"),
)

# First run should work correctly and preserve tool_choice
result1 = await Runner.run(agent, "first run")
assert result1.final_output == "First run response"
assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"

# Second run should also work correctly with tool_choice still required
result2 = await Runner.run(agent, "second run")
assert result2.final_output == "Second run response"
assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"

@pytest.mark.asyncio
async def test_required_with_stop_at_tool_name(self):
"""
Test scenario 2: When using required tool_choice with stop_at_tool_names behavior
Ensure it correctly stops at the specified tool
"""
# Set up fake model to return a tool call for second_tool
fake_model = FakeModel()
fake_model.set_next_output([
get_function_tool_call("second_tool", "{}")
])

# Create agent with two tools and tool_choice="required" and stop_at_tool behavior
first_tool = get_function_tool("first_tool", return_value="first tool result")
second_tool = get_function_tool("second_tool", return_value="second tool result")

agent = Agent(
name="test_agent",
model=fake_model,
tools=[first_tool, second_tool],
model_settings=ModelSettings(tool_choice="required"),
tool_use_behavior={"stop_at_tool_names": ["second_tool"]},
)

# Run should stop after using second_tool
result = await Runner.run(agent, "run test")
assert result.final_output == "second tool result"

@pytest.mark.asyncio
async def test_specific_tool_choice(self):
"""
Test scenario 3: When using a specific tool choice name
Ensure it doesn't cause infinite loops
"""
# Set up fake model to return a text message
fake_model = FakeModel()
fake_model.set_next_output([get_text_message("Test message")])

# Create agent with specific tool_choice
tool1 = get_function_tool("tool1")
tool2 = get_function_tool("tool2")
tool3 = get_function_tool("tool3")

agent = Agent(
name="test_agent",
model=fake_model,
tools=[tool1, tool2, tool3],
model_settings=ModelSettings(tool_choice="tool1"), # Specific tool
)

# Run should complete without infinite loops
result = await Runner.run(agent, "first run")
assert result.final_output == "Test message"

@pytest.mark.asyncio
async def test_required_with_single_tool(self):
"""
Test scenario 4: When using required tool_choice with only one tool
Ensure it doesn't cause infinite loops
"""
# Set up fake model to return a tool call followed by a text message
fake_model = FakeModel()
fake_model.add_multiple_turn_outputs([
# First call returns a tool call
[get_function_tool_call("custom_tool", "{}")],
# Second call returns a text message
[get_text_message("Final response")]
])

# Create agent with a single tool and tool_choice="required"
custom_tool = get_function_tool("custom_tool", return_value="tool result")
agent = Agent(
name="test_agent",
model=fake_model,
tools=[custom_tool],
model_settings=ModelSettings(tool_choice="required"),
)

# Run should complete without infinite loops
result = await Runner.run(agent, "first run")
assert result.final_output == "Final response"