diff --git a/.gitignore b/.gitignore
index 9da95ba3..1def8a6a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
+**/__pycache__/
 *.py[cod]
 *$py.class
 
diff --git a/examples/research_bot/agents/__pycache__/__init__.cpython-313.pyc b/examples/research_bot/agents/__pycache__/__init__.cpython-313.pyc
deleted file mode 100644
index a094b5a5..00000000
Binary files a/examples/research_bot/agents/__pycache__/__init__.cpython-313.pyc and /dev/null differ
diff --git a/examples/research_bot/agents/__pycache__/base_agent.cpython-313.pyc b/examples/research_bot/agents/__pycache__/base_agent.cpython-313.pyc
deleted file mode 100644
index f33d4188..00000000
Binary files a/examples/research_bot/agents/__pycache__/base_agent.cpython-313.pyc and /dev/null differ
diff --git a/examples/research_bot/agents/__pycache__/planner_agent.cpython-313.pyc b/examples/research_bot/agents/__pycache__/planner_agent.cpython-313.pyc
deleted file mode 100644
index b836aacc..00000000
Binary files a/examples/research_bot/agents/__pycache__/planner_agent.cpython-313.pyc and /dev/null differ
diff --git a/examples/research_bot/agents/__pycache__/research_manager_agent.cpython-313.pyc b/examples/research_bot/agents/__pycache__/research_manager_agent.cpython-313.pyc
deleted file mode 100644
index edc3f5ff..00000000
Binary files a/examples/research_bot/agents/__pycache__/research_manager_agent.cpython-313.pyc and /dev/null differ
diff --git a/examples/research_bot/agents/__pycache__/search_agent.cpython-313.pyc b/examples/research_bot/agents/__pycache__/search_agent.cpython-313.pyc
deleted file mode 100644
index b3281242..00000000
Binary files a/examples/research_bot/agents/__pycache__/search_agent.cpython-313.pyc and /dev/null differ
diff --git a/examples/research_bot/agents/__pycache__/summarization_agent.cpython-313.pyc b/examples/research_bot/agents/__pycache__/summarization_agent.cpython-313.pyc
deleted file mode 100644
index b809d7c5..00000000
Binary files a/examples/research_bot/agents/__pycache__/summarization_agent.cpython-313.pyc and /dev/null differ
diff --git a/examples/research_bot/agents/__pycache__/writer_agent.cpython-313.pyc b/examples/research_bot/agents/__pycache__/writer_agent.cpython-313.pyc
deleted file mode 100644
index be550b1e..00000000
Binary files a/examples/research_bot/agents/__pycache__/writer_agent.cpython-313.pyc and /dev/null differ
diff --git a/src/agents/agent_output.py b/src/agents/agent_output.py
index 8140d8c6..0c28800f 100644
--- a/src/agents/agent_output.py
+++ b/src/agents/agent_output.py
@@ -138,7 +138,7 @@ def _type_to_str(t: type[Any]) -> str:
         # It's a simple type like `str`, `int`, etc.
         return t.__name__
     elif args:
-        args_str = ', '.join(_type_to_str(arg) for arg in args)
+        args_str = ", ".join(_type_to_str(arg) for arg in args)
         return f"{origin.__name__}[{args_str}]"
     else:
         return str(t)
diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py
index 78cf9a83..d8178ae3 100644
--- a/src/agents/model_settings.py
+++ b/src/agents/model_settings.py
@@ -11,6 +11,7 @@ class ModelSettings:
     This class holds optional model configuration parameters (e.g. temperature,
     top_p, penalties, truncation, etc.).
     """
+
     temperature: float | None = None
     top_p: float | None = None
     frequency_penalty: float | None = None
diff --git a/tests/LICENSE b/tests/LICENSE
deleted file mode 100644
index e5ad2c5a..00000000
--- a/tests/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2025 OpenAI
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/tests/Makefile b/tests/Makefile
deleted file mode 100644
index 7dd9bbdf..00000000
--- a/tests/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-.PHONY: sync
-sync:
-	uv sync --all-extras --all-packages --group dev
-
-.PHONY: format
-format: 
-	uv run ruff format
-
-.PHONY: lint
-lint: 
-	uv run ruff check
-
-.PHONY: mypy
-mypy: 
-	uv run mypy .
-
-.PHONY: tests
-tests: 
-	uv run pytest 
-
-.PHONY: old_version_tests
-old_version_tests: 
-	UV_PROJECT_ENVIRONMENT=.venv_39 uv run --python 3.9 -m pytest
-	UV_PROJECT_ENVIRONMENT=.venv_39 uv run --python 3.9 -m mypy .
-
-.PHONY: build-docs
-build-docs:
-	uv run mkdocs build
-
-.PHONY: serve-docs
-serve-docs:
-	uv run mkdocs serve
-
-.PHONY: deploy-docs
-deploy-docs:
-	uv run mkdocs gh-deploy --force --verbose
-	
diff --git a/tests/README.md b/tests/README.md
deleted file mode 100644
index 8acd13cb..00000000
--- a/tests/README.md
+++ /dev/null
@@ -1,174 +0,0 @@
-# OpenAI Agents SDK
-
-The OpenAI Agents SDK is a lightweight yet powerful framework for building multi-agent workflows.
-
-<img src="docs/assets/images/orchestration.png" alt="Image of the Agents Tracing UI" style="max-height: 803px;">
-
-### Core concepts:
-
-1. [**Agents**](docs/agents.md): LLMs configured with instructions, tools, guardrails, and handoffs
-2. [**Handoffs**](docs/handoffs.md): Allow agents to transfer control to other agents for specific tasks
-3. [**Guardrails**](docs/guardrails.md): Configurable safety checks for input and output validation
-4. [**Tracing**](docs/tracing.md): Built-in tracking of agent runs, allowing you to view, debug and optimize your workflows
-
-Explore the [examples](examples) directory to see the SDK in action.
-
-## Get started
-
-1. Set up your Python environment
-
-```
-python -m venv env
-source env/bin/activate
-```
-
-2. Install Agents SDK
-
-```
-pip install openai-agents
-```
-
-## Hello world example
-
-```python
-from agents import Agent, Runner
-
-agent = Agent(name="Assistant", instructions="You are a helpful assistant")
-
-result = Runner.run_sync(agent, "Write a haiku about recursion in programming.")
-print(result.final_output)
-
-# Code within the code,
-# Functions calling themselves,
-# Infinite loop's dance.
-```
-
-(_If running this, ensure you set the `OPENAI_API_KEY` environment variable_)
-
-## Handoffs example
-
-```py
-from agents import Agent, Runner
-import asyncio
-
-spanish_agent = Agent(
-    name="Spanish agent",
-    instructions="You only speak Spanish.",
-)
-
-english_agent = Agent(
-    name="English agent",
-    instructions="You only speak English",
-)
-
-triage_agent = Agent(
-    name="Triage agent",
-    instructions="Handoff to the appropriate agent based on the language of the request.",
-    handoffs=[spanish_agent, english_agent],
-)
-
-
-async def main():
-    result = await Runner.run(triage_agent, input="Hola, ¿cómo estás?")
-    print(result.final_output)
-    # ¡Hola! Estoy bien, gracias por preguntar. ¿Y tú, cómo estás?
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-
-## Functions example
-
-```python
-import asyncio
-
-from agents import Agent, Runner, function_tool
-
-
-@function_tool
-def get_weather(city: str) -> str:
-    return f"The weather in {city} is sunny."
-
-
-agent = Agent(
-    name="Hello world",
-    instructions="You are a helpful agent.",
-    tools=[get_weather],
-)
-
-
-async def main():
-    result = await Runner.run(agent, input="What's the weather in Tokyo?")
-    print(result.final_output)
-    # The weather in Tokyo is sunny.
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-
-## The agent loop
-
-When you call `Runner.run()`, we run a loop until we get a final output.
-
-1. We call the LLM, using the model and settings on the agent, and the message history.
-2. The LLM returns a response, which may include tool calls.
-3. If the response has a final output (see below for the more on this), we return it and end the loop.
-4. If the response has a handoff, we set the agent to the new agent and go back to step 1.
-5. We process the tool calls (if any) and append the tool responses messsages. Then we go to step 1.
-
-There is a `max_turns` parameter that you can use to limit the number of times the loop executes.
-
-### Final output
-
-Final output is the last thing the agent produces in the loop.
-
-1.  If you set an `output_type` on the agent, the final output is when the LLM returns something of that type. We use [structured outputs](https://platform.openai.com/docs/guides/structured-outputs) for this.
-2.  If there's no `output_type` (i.e. plain text responses), then the first LLM response without any tool calls or handoffs is considered as the final output.
-
-As a result, the mental model for the agent loop is:
-
-1. If the current agent has an `output_type`, the loop runs until the agent produces structured output matching that type.
-2. If the current agent does not have an `output_type`, the loop runs until the current agent produces a message without any tool calls/handoffs.
-
-## Common agent patterns
-
-The Agents SDK is designed to be highly flexible, allowing you to model a wide range of LLM workflows including deterministic flows, iterative loops, and more. See examples in [`examples/agent_patterns`](examples/agent_patterns).
-
-## Tracing
-
-The Agents SDK includes built-in tracing, making it easy to track and debug the behavior of your agents. Tracing is extensible by design, supporting custom spans and a wide variety of external destinations, including [Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents), [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk), and [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk). See [Tracing](http://openai.github.io/openai-agents-python/tracing.md) for more details.
-
-## Development (only needed if you need to edit the SDK/examples)
-
-0. Ensure you have [`uv`](https://docs.astral.sh/uv/) installed.
-
-```bash
-uv --version
-```
-
-1. Install dependencies
-
-```bash
-make sync
-```
-
-2. (After making changes) lint/test
-
-```
-make tests  # run tests
-make mypy   # run typechecker
-make lint   # run linter
-```
-
-## Acknowledgements
-
-We'd like to acknowledge the excellent work of the open-source community, especially:
-
--   [Pydantic](https://docs.pydantic.dev/latest/) (data validation) and [PydanticAI](https://ai.pydantic.dev/) (advanced agent framework)
--   [MkDocs](https://github.com/squidfunk/mkdocs-material)
--   [Griffe](https://github.com/mkdocstrings/griffe)
--   [uv](https://github.com/astral-sh/uv) and [ruff](https://github.com/astral-sh/ruff)
-
-We're committed to continuing to build the Agents SDK as an open source framework so others in the community can expand on our approach.
diff --git a/tests/docs/agents.md b/tests/docs/agents.md
deleted file mode 100644
index 9b6264b5..00000000
--- a/tests/docs/agents.md
+++ /dev/null
@@ -1,131 +0,0 @@
-# Agents
-
-Agents are the core building block in your apps. An agent is a large language model (LLM), configured with instructions and tools.
-
-## Basic configuration
-
-The most common properties of an agent you'll configure are:
-
--   `instructions`: also known as a developer message or system prompt.
--   `model`: which LLM to use, and optional `model_settings` to configure model tuning parameters like temperature, top_p, etc.
--   `tools`: Tools that the agent can use to achieve its tasks.
-
-```python
-from agents import Agent, ModelSettings, function_tool
-
-def get_weather(city: str) -> str:
-    return f"The weather in {city} is sunny"
-
-agent = Agent(
-    name="Haiku agent",
-    instructions="Always respond in haiku form",
-    model="o3-mini",
-    tools=[function_tool(get_weather)],
-)
-```
-
-## Context
-
-Agents are generic on their `context` type. Context is a dependency-injection tool: it's an object you create and pass to `Runner.run()`, that is passed to every agent, tool, handoff etc, and it serves as a grab bag of dependencies and state for the agent run. You can provide any Python object as the context.
-
-```python
-@dataclass
-class UserContext:
-  uid: str
-  is_pro_user: bool
-
-  async def fetch_purchases() -> list[Purchase]:
-     return ...
-
-agent = Agent[UserContext](
-    ...,
-)
-```
-
-## Output types
-
-By default, agents produce plain text (i.e. `str`) outputs. If you want the agent to produce a particular type of output, you can use the `output_type` parameter. A common choice is to use [Pydantic](https://docs.pydantic.dev/) objects, but we support any type that can be wrapped in a Pydantic [TypeAdapter](https://docs.pydantic.dev/latest/api/type_adapter/) - dataclasses, lists, TypedDict, etc.
-
-```python
-from pydantic import BaseModel
-from agents import Agent
-
-
-class CalendarEvent(BaseModel):
-    name: str
-    date: str
-    participants: list[str]
-
-agent = Agent(
-    name="Calendar extractor",
-    instructions="Extract calendar events from text",
-    output_type=CalendarEvent,
-)
-```
-
-!!! note
-
-    When you pass an `output_type`, that tells the model to use [structured outputs](https://platform.openai.com/docs/guides/structured-outputs) instead of regular plain text responses.
-
-## Handoffs
-
-Handoffs are sub-agents that the agent can delegate to. You provide a list of handoffs, and the agent can choose to delegate to them if relevant. This is a powerful pattern that allows orchestrating modular, specialized agents that excel at a single task. Read more in the [handoffs](handoffs.md) documentation.
-
-```python
-from agents import Agent
-
-booking_agent = Agent(...)
-refund_agent = Agent(...)
-
-triage_agent = Agent(
-    name="Triage agent",
-    instructions=(
-        "Help the user with their questions."
-        "If they ask about booking, handoff to the booking agent."
-        "If they ask about refunds, handoff to the refund agent."
-    ),
-    handoffs=[booking_agent, refund_agent],
-)
-```
-
-## Dynamic instructions
-
-In most cases, you can provide instructions when you create the agent. However, you can also provide dynamic instructions via a function. The function will receive the agent and context, and must return the prompt. Both regular and `async` functions are accepted.
-
-```python
-def dynamic_instructions(
-    context: RunContextWrapper[UserContext], agent: Agent[UserContext]
-) -> str:
-    return f"The user's name is {context.context.name}. Help them with their questions."
-
-
-agent = Agent[UserContext](
-    name="Triage agent",
-    instructions=dynamic_instructions,
-)
-```
-
-## Lifecycle events (hooks)
-
-Sometimes, you want to observe the lifecycle of an agent. For example, you may want to log events, or pre-fetch data when certain events occur. You can hook into the agent lifecycle with the `hooks` property. Subclass the [`AgentHooks`][agents.lifecycle.AgentHooks] class, and override the methods you're interested in.
-
-## Guardrails
-
-Guardrails allow you to run checks/validations on user input, in parallel to the agent running. For example, you could screen the user's input for relevance. Read more in the [guardrails](guardrails.md) documentation.
-
-## Cloning/copying agents
-
-By using the `clone()` method on an agent, you can duplicate an Agent, and optionally change any properties you like.
-
-```python
-pirate_agent = Agent(
-    name="Pirate",
-    instructions="Write like a pirate",
-    model="o3-mini",
-)
-
-robot_agent = pirate_agent.clone(
-    name="Robot",
-    instructions="Write like a robot",
-)
-```
diff --git a/tests/docs/assets/images/favicon-platform.svg b/tests/docs/assets/images/favicon-platform.svg
deleted file mode 100644
index 91ef0aea..00000000
--- a/tests/docs/assets/images/favicon-platform.svg
+++ /dev/null
@@ -1,16 +0,0 @@
-<svg width="512" height="512" viewBox="0 0 512 512" fill="none" xmlns="http://www.w3.org/2000/svg">
-<g clip-path="url(#clip0_1497_2713)">
-<rect width="512" height="512" rx="256" fill="#0000FF"/>
-<g clip-path="url(#clip1_1497_2713)">
-<path d="M215.923 209.432V177.018C215.923 174.288 216.947 172.24 219.334 170.876L284.506 133.344C293.378 128.227 303.955 125.839 314.872 125.839C355.816 125.839 381.75 157.572 381.75 191.35C381.75 193.737 381.75 196.467 381.407 199.197L313.848 159.617C309.755 157.229 305.658 157.229 301.564 159.617L215.923 209.432ZM368.099 335.679V258.224C368.099 253.446 366.051 250.034 361.958 247.646L276.316 197.831L304.294 181.793C306.682 180.43 308.73 180.43 311.118 181.793L376.289 219.325C395.057 230.245 407.68 253.446 407.68 275.964C407.68 301.894 392.327 325.78 368.099 335.676V335.679ZM195.792 267.438L167.813 251.061C165.425 249.698 164.401 247.649 164.401 244.919V169.855C164.401 133.347 192.38 105.708 230.254 105.708C244.586 105.708 257.891 110.486 269.153 119.016L201.937 157.914C197.843 160.302 195.795 163.714 195.795 168.492V267.441L195.792 267.438ZM256.015 302.24L215.923 279.722V231.954L256.015 209.436L296.104 231.954V279.722L256.015 302.24ZM281.776 405.968C267.444 405.968 254.14 401.19 242.877 392.66L310.094 353.762C314.187 351.374 316.235 347.962 316.235 343.184V244.235L344.557 260.611C346.944 261.975 347.968 264.023 347.968 266.753V341.817C347.968 378.325 319.647 405.965 281.776 405.965V405.968ZM200.909 329.88L135.738 292.348C116.97 281.427 104.347 258.227 104.347 235.709C104.347 209.436 120.042 185.893 144.267 175.997V253.791C144.267 258.57 146.315 261.981 150.409 264.369L235.711 313.842L207.733 329.88C205.345 331.243 203.297 331.243 200.909 329.88ZM197.158 385.837C158.602 385.837 130.281 356.834 130.281 321.008C130.281 318.278 130.623 315.548 130.963 312.818L198.179 351.717C202.273 354.104 206.369 354.104 210.463 351.717L296.104 302.243V334.658C296.104 337.388 295.08 339.436 292.693 340.8L227.521 378.332C218.649 383.449 208.072 385.837 197.155 385.837H197.158ZM281.776 426.438C323.062 426.438 357.522 397.096 365.373 358.197C403.586 348.302 428.153 312.475 428.153 275.967C428.153 252.082 417.918 228.882 399.493 212.162C401.199 204.997 402.223 197.831 402.223 190.668C402.223 141.877 362.643 105.365 316.92 105.365C307.709 105.365 298.838 106.729 289.966 109.801C274.61 94.7878 253.455 85.2344 230.254 85.2344C188.968 85.2344 154.509 114.576 146.658 153.475C108.444 163.371 83.877 199.197 83.877 235.705C83.877 259.59 94.1121 282.791 112.537 299.51C110.831 306.676 109.807 313.842 109.807 321.005C109.807 369.796 149.388 406.307 195.11 406.307C204.321 406.307 213.193 404.944 222.064 401.871C237.417 416.885 258.572 426.438 281.776 426.438Z" fill="white"/>
-</g>
-</g>
-<defs>
-<clipPath id="clip0_1497_2713">
-<rect width="512" height="512" rx="256" fill="white"/>
-</clipPath>
-<clipPath id="clip1_1497_2713">
-<rect width="344.276" height="341.204" fill="white" transform="translate(83.877 85.2344)"/>
-</clipPath>
-</defs>
-</svg>
diff --git a/tests/docs/assets/images/orchestration.png b/tests/docs/assets/images/orchestration.png
deleted file mode 100644
index 621a833b..00000000
Binary files a/tests/docs/assets/images/orchestration.png and /dev/null differ
diff --git a/tests/docs/assets/logo.svg b/tests/docs/assets/logo.svg
deleted file mode 100644
index ba36fc2a..00000000
--- a/tests/docs/assets/logo.svg
+++ /dev/null
@@ -1,15 +0,0 @@
-<svg width="721" height="721" viewBox="0 0 721 721" fill="none" xmlns="http://www.w3.org/2000/svg">
-<g clip-path="url(#clip0_1637_2935)">
-<g clip-path="url(#clip1_1637_2935)">
-<path d="M304.246 295.411V249.828C304.246 245.989 305.687 243.109 309.044 241.191L400.692 188.412C413.167 181.215 428.042 177.858 443.394 177.858C500.971 177.858 537.44 222.482 537.44 269.982C537.44 273.34 537.44 277.179 536.959 281.018L441.954 225.358C436.197 222 430.437 222 424.68 225.358L304.246 295.411ZM518.245 472.945V364.024C518.245 357.304 515.364 352.507 509.608 349.149L389.174 279.096L428.519 256.543C431.877 254.626 434.757 254.626 438.115 256.543L529.762 309.323C556.154 324.679 573.905 357.304 573.905 388.971C573.905 425.436 552.315 459.024 518.245 472.941V472.945ZM275.937 376.982L236.592 353.952C233.235 352.034 231.794 349.154 231.794 345.315V239.756C231.794 188.416 271.139 149.548 324.4 149.548C344.555 149.548 363.264 156.268 379.102 168.262L284.578 222.964C278.822 226.321 275.942 231.119 275.942 237.838V376.986L275.937 376.982ZM360.626 425.922L304.246 394.255V327.083L360.626 295.416L417.002 327.083V394.255L360.626 425.922ZM396.852 571.789C376.698 571.789 357.989 565.07 342.151 553.075L436.674 498.374C442.431 495.017 445.311 490.219 445.311 483.499V344.352L485.138 367.382C488.495 369.299 489.936 372.179 489.936 376.018V481.577C489.936 532.917 450.109 571.785 396.852 571.785V571.789ZM283.134 464.79L191.486 412.01C165.094 396.654 147.343 364.029 147.343 332.362C147.343 295.416 169.415 262.309 203.48 248.393V357.791C203.48 364.51 206.361 369.308 212.117 372.665L332.074 442.237L292.729 464.79C289.372 466.707 286.491 466.707 283.134 464.79ZM277.859 543.48C223.639 543.48 183.813 502.695 183.813 452.314C183.813 448.475 184.294 444.636 184.771 440.797L279.295 495.498C285.051 498.856 290.812 498.856 296.568 495.498L417.002 425.927V471.509C417.002 475.349 415.562 478.229 412.204 480.146L320.557 532.926C308.081 540.122 293.206 543.48 277.854 543.48H277.859ZM396.852 600.576C454.911 600.576 503.37 559.313 514.41 504.612C568.149 490.696 602.696 440.315 602.696 388.976C602.696 355.387 588.303 322.762 562.392 299.25C564.791 289.173 566.231 279.096 566.231 269.024C566.231 200.411 510.571 149.067 446.274 149.067C433.322 149.067 420.846 150.984 408.37 155.305C386.775 134.192 357.026 120.758 324.4 120.758C266.342 120.758 217.883 162.02 206.843 216.721C153.104 230.637 118.557 281.018 118.557 332.357C118.557 365.946 132.95 398.571 158.861 422.083C156.462 432.16 155.022 442.237 155.022 452.309C155.022 520.922 210.682 572.266 274.978 572.266C287.931 572.266 300.407 570.349 312.883 566.028C334.473 587.141 364.222 600.576 396.852 600.576Z" fill="white"/>
-</g>
-</g>
-<defs>
-<clipPath id="clip0_1637_2935">
-<rect width="720" height="720" fill="white" transform="translate(0.606934 0.899902)"/>
-</clipPath>
-<clipPath id="clip1_1637_2935">
-<rect width="484.139" height="479.818" fill="white" transform="translate(118.557 120.758)"/>
-</clipPath>
-</defs>
-</svg>
diff --git a/tests/docs/config.md b/tests/docs/config.md
deleted file mode 100644
index 198d7b7e..00000000
--- a/tests/docs/config.md
+++ /dev/null
@@ -1,94 +0,0 @@
-# Configuring the SDK
-
-## API keys and clients
-
-By default, the SDK looks for the `OPENAI_API_KEY` environment variable for LLM requests and tracing, as soon as it is imported. If you are unable to set that environment variable before your app starts, you can use the [set_default_openai_key()][agents.set_default_openai_key] function to set the key.
-
-```python
-from agents import set_default_openai_key
-
-set_default_openai_key("sk-...")
-```
-
-Alternatively, you can also configure an OpenAI client to be used. By default, the SDK creates an `AsyncOpenAI` instance, using the API key from the environment variable or the default key set above. You can chnage this by using the [set_default_openai_client()][agents.set_default_openai_client] function.
-
-```python
-from openai import AsyncOpenAI
-from agents import set_default_openai_client
-
-custom_client = AsyncOpenAI(base_url="...", api_key="...")
-set_default_openai_client(client)
-```
-
-Finally, you can also customize the OpenAI API that is used. By default, we use the OpenAI Responses API. You can override this to use the Chat Completions API by using the [set_default_openai_api()][agents.set_default_openai_api] function.
-
-```python
-from agents import set_default_openai_api
-
-set_default_openai_api("chat_completions")
-```
-
-## Tracing
-
-Tracing is enabled by default. It uses the OpenAI API keys from the section above by default (i.e. the environment variable or the default key you set). You can specifically set the API key used for tracing by using the [`set_tracing_export_api_key`][agents.set_tracing_export_api_key] function.
-
-```python
-from agents import set_tracing_export_api_key
-
-set_tracing_export_api_key("sk-...")
-```
-
-You can also disable tracing entirely by using the [`set_tracing_disabled()`][agents.set_tracing_disabled] function.
-
-```python
-from agents import set_tracing_disabled
-
-set_tracing_disabled(True)
-```
-
-## Debug logging
-
-The SDK has two Python loggers without any handlers set. By default, this means that warnings and errors are sent to `stdout`, but other logs are suppressed.
-
-To enable verbose logging, use the [`enable_verbose_stdout_logging()`][agents.enable_verbose_stdout_logging] function.
-
-```python
-from agents import enable_verbose_stdout_logging
-
-enable_verbose_stdout_logging()
-```
-
-Alternatively, you can customize the logs by adding handlers, filters, formatters, etc. You can read more in the [Python logging guide](https://docs.python.org/3/howto/logging.html).
-
-```python
-import logging
-
-logger =  logging.getLogger("openai.agents") # or openai.agents.tracing for the Tracing logger
-
-# To make all logs show up
-logger.setLevel(logging.DEBUG)
-# To make info and above show up
-logger.setLevel(logging.INFO)
-# To make warning and above show up
-logger.setLevel(logging.WARNING)
-# etc
-
-# You can customize this as needed, but this will output to `stderr` by default
-logger.addHandler(logging.StreamHandler())
-```
-
-### Sensitive data in logs
-
-Certain logs may contain sensitive data (for example, user data). If you want to disable this data from being logged, set the following environment variables.
-
-To disable logging LLM inputs and outputs:
-
-```bash
-export OPENAI_AGENTS_DONT_LOG_MODEL_DATA=1
-```
-
-To disable logging tool inputs and outputs:
-
-```bash
-export OPENAI_AGENTS_DONT_LOG_TOOL_DATA=1
-```
diff --git a/tests/docs/context.md b/tests/docs/context.md
deleted file mode 100644
index 5dcacebe..00000000
--- a/tests/docs/context.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Context management
-
-Context is an overloaded term. There are two main classes of context you might care about:
-
-1. Context available locally to your code: this is data and dependencies you might need when tool functions run, during callbacks like `on_handoff`, in lifecycle hooks, etc.
-2. Context available to LLMs: this is data the LLM sees when generating a response.
-
-## Local context
-
-This is represented via the [`RunContextWrapper`][agents.run_context.RunContextWrapper] class and the [`context`][agents.run_context.RunContextWrapper.context] property within it. The way this works is:
-
-1. You create any Python object you want. A common pattern is to use a dataclass or a Pydantic object.
-2. You pass that object to the various run methods (e.g. `Runner.run(..., **context=whatever**))`.
-3. All your tool calls, lifecycle hooks etc will be passed a wrapper object, `RunContextWrapper[T]`, where `T` represents your context object type which you can access via `wrapper.context`.
-
-The **most important** thing to be aware of: every agent, tool function, lifecycle etc for a given agent run must use the same _type_ of context.
-
-You can use the context for things like:
-
--   Contextual data for your run (e.g. things like a username/uid or other information about the user)
--   Dependencies (e.g. logger objects, data fetchers, etc)
--   Helper functions
-
-!!! danger "Note"
-
-    The context object is **not** sent to the LLM. It is purely a local object that you can read from, write to and call methods on it.
-
-```python
-import asyncio
-from dataclasses import dataclass
-
-from agents import Agent, RunContextWrapper, Runner, function_tool
-
-@dataclass
-class UserInfo:  # (1)!
-    name: str
-    uid: int
-
-async def fetch_user_age(wrapper: RunContextWrapper[UserInfo]) -> str:  # (2)!
-    return f"User {wrapper.context.name} is 47 years old"
-
-async def main():
-    user_info = UserInfo(name="John", uid=123)  # (3)!
-
-    agent = Agent[UserInfo](  # (4)!
-        name="Assistant",
-        tools=[function_tool(fetch_user_age)],
-    )
-
-    result = await Runner.run(
-        starting_agent=agent,
-        input="What is the age of the user?",
-        context=user_info,
-    )
-
-    print(result.final_output)  # (5)!
-    # The user John is 47 years old.
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-
-1. This is the context object. We've used a dataclass here, but you can use any type.
-2. This is a tool. You can see it takes a `RunContextWrapper[UserInfo]`. The tool implementation reads from the context.
-3. We mark the agent with the generic `UserInfo`, so that the typechecker can catch errors (for example, if we tried to pass a tool that took a different context type).
-4. The context is passed to the `run` function.
-5. The agent correctly calls the tool and gets the age.
-
-## Agent/LLM context
-
-When an LLM is called, the **only** data it can see is from the conversation history. This means that if you want to make some new data available to the LLM, you must do it in a way that makes it available in that history. There are a few ways to do this:
-
-1. You can add it to the Agent `instructions`. This is also known as a "system prompt" or "developer message". System prompts can be static strings, or they can be dynamic functions that receive the context and output a string. This is a common tactic for information that is always useful (for example, the user's name or the current date).
-2. Add it to the `input` when calling the `Runner.run` functions. This is similar to the `instructions` tactic, but allows you to have messages that are lower in the [chain of command](https://cdn.openai.com/spec/model-spec-2024-05-08.html#follow-the-chain-of-command).
-3. Expose it via function tools. This is useful for _on-demand_ context - the LLM decides when it needs some data, and can call the tool to fetch that data.
-4. Use retrieval or web search. These are special tools that are able to fetch relevant data from files or databases (retrieval), or from the web (web search). This is useful for "grounding" the response in relevant contextual data.
diff --git a/tests/docs/guardrails.md b/tests/docs/guardrails.md
deleted file mode 100644
index 2b7369c3..00000000
--- a/tests/docs/guardrails.md
+++ /dev/null
@@ -1,154 +0,0 @@
-# Guardrails
-
-Guardrails run _in parallel_ to your agents, enabling you to do checks and validations of user input. For example, imagine you have an agent that uses a very smart (and hence slow/expensive) model to help with customer requests. You wouldn't want malicious users to ask the model to help them with their math homework. So, you can run a guardrail with a fast/cheap model. If the guardrail detects malicious usage, it can immediately raise an error, which stops the expensive model from running and saves you time/money.
-
-There are two kinds of guardrails:
-
-1. Input guardrails run on the initial user input
-2. Output guardrails run on the final agent output
-
-## Input guardrails
-
-Input guardrails run in 3 steps:
-
-1. First, the guardrail receives the same input passed to the agent.
-2. Next, the guardrail function runs to produce a [`GuardrailFunctionOutput`][agents.guardrail.GuardrailFunctionOutput], which is then wrapped in an [`InputGuardrailResult`][agents.guardrail.InputGuardrailResult]
-3. Finally, we check if [`.tripwire_triggered`][agents.guardrail.GuardrailFunctionOutput.tripwire_triggered] is true. If true, an [`InputGuardrailTripwireTriggered`][agents.exceptions.InputGuardrailTripwireTriggered] exception is raised, so you can appropriately respond to the user or handle the exception.
-
-!!! Note
-
-    Input guardrails are intended to run on user input, so an agent's guardrails only run if the agent is the *first* agent. You might wonder, why is the `guardrails` property on the agent instead of passed to `Runner.run`? It's because guardrails tend to be related to the actual Agent - you'd run different guardrails for different agents, so colocating the code is useful for readability.
-
-## Output guardrails
-
-Output guardrailas run in 3 steps:
-
-1. First, the guardrail receives the same input passed to the agent.
-2. Next, the guardrail function runs to produce a [`GuardrailFunctionOutput`][agents.guardrail.GuardrailFunctionOutput], which is then wrapped in an [`OutputGuardrailResult`][agents.guardrail.OutputGuardrailResult]
-3. Finally, we check if [`.tripwire_triggered`][agents.guardrail.GuardrailFunctionOutput.tripwire_triggered] is true. If true, an [`OutputGuardrailTripwireTriggered`][agents.exceptions.OutputGuardrailTripwireTriggered] exception is raised, so you can appropriately respond to the user or handle the exception.
-
-!!! Note
-
-    Output guardrails are intended to run on the final agent input, so an agent's guardrails only run if the agent is the *last* agent. Similar to the input guardrails, we do this because guardrails tend to be related to the actual Agent - you'd run different guardrails for different agents, so colocating the code is useful for readability.
-
-## Tripwires
-
-If the input or output fails the guardrail, the Guardrail can signal this with a tripwire. As soon as we see a guardail that has triggered the tripwires, we immediately raise a `{Input,Output}GuardrailTripwireTriggered` exception and halt the Agent execution.
-
-## Implementing a guardrail
-
-You need to provide a function that receives input, and returns a [`GuardrailFunctionOutput`][agents.guardrail.GuardrailFunctionOutput]. In this example, we'll do this by running an Agent under the hood.
-
-```python
-from pydantic import BaseModel
-from agents import (
-    Agent,
-    GuardrailFunctionOutput,
-    InputGuardrailTripwireTriggered,
-    RunContextWrapper,
-    Runner,
-    TResponseInputItem,
-    input_guardrail,
-)
-
-class MathHomeworkOutput(BaseModel):
-    is_math_homework: bool
-    reasoning: str
-
-guardrail_agent = Agent( # (1)!
-    name="Guardrail check",
-    instructions="Check if the user is asking you to do their math homework.",
-    output_type=MathHomeworkOutput,
-)
-
-
-@input_guardrail
-async def math_guardrail( # (2)!
-    ctx: RunContextWrapper[None], agent: Agent, input: str | list[TResponseInputItem]
-) -> GuardrailFunctionOutput:
-    result = await Runner.run(guardrail_agent, input, context=ctx.context)
-
-    return GuardrailFunctionOutput(
-        output_info=result.final_output, # (3)!
-        tripwire_triggered=result.final_output.is_math_homework,
-    )
-
-
-agent = Agent(  # (4)!
-    name="Customer support agent",
-    instructions="You are a customer support agent. You help customers with their questions.",
-    input_guardrails=[math_guardrail],
-)
-
-async def main():
-    # This should trip the guardrail
-    try:
-        await Runner.run(agent, "Hello, can you help me solve for x: 2x + 3 = 11?")
-        print("Guardrail didn't trip - this is unexpected")
-
-    except InputGuardrailTripwireTriggered:
-        print("Math homework guardrail tripped")
-```
-
-1. We'll use this agent in our guardrail function.
-2. This is the guardrail function that receives the agent's input/context, and returns the result.
-3. We can include extra information in the guardrail result.
-4. This is the actual agent that defines the workflow.
-
-Output guardrails are similar.
-
-```python
-from pydantic import BaseModel
-from agents import (
-    Agent,
-    GuardrailFunctionOutput,
-    OutputGuardrailTripwireTriggered,
-    RunContextWrapper,
-    Runner,
-    output_guardrail,
-)
-class MessageOutput(BaseModel): # (1)!
-    response: str
-
-class MathOutput(BaseModel): # (2)!
-    is_math: bool
-    reasoning: str
-
-guardrail_agent = Agent(
-    name="Guardrail check",
-    instructions="Check if the output includes any math.",
-    output_type=MathOutput,
-)
-
-@output_guardrail
-async def math_guardrail(  # (3)!
-    ctx: RunContextWrapper, agent: Agent, output: MessageOutput
-) -> GuardrailFunctionOutput:
-    result = await Runner.run(guardrail_agent, output.response, context=ctx.context)
-
-    return GuardrailFunctionOutput(
-        output_info=result.final_output,
-        tripwire_triggered=result.final_output.is_math,
-    )
-
-agent = Agent( # (4)!
-    name="Customer support agent",
-    instructions="You are a customer support agent. You help customers with their questions.",
-    output_guardrails=[math_guardrail],
-    output_type=MessageOutput,
-)
-
-async def main():
-    # This should trip the guardrail
-    try:
-        await Runner.run(agent, "Hello, can you help me solve for x: 2x + 3 = 11?")
-        print("Guardrail didn't trip - this is unexpected")
-
-    except OutputGuardrailTripwireTriggered:
-        print("Math output guardrail tripped")
-```
-
-1. This is the actual agent's output type.
-2. This is the guardrail's output type.
-3. This is the guardrail function that receives the agent's output, and returns the result.
-4. This is the actual agent that defines the workflow.
diff --git a/tests/docs/handoffs.md b/tests/docs/handoffs.md
deleted file mode 100644
index 0b868c4a..00000000
--- a/tests/docs/handoffs.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# Handoffs
-
-Handoffs allow an agent to delegate tasks to another agent. This is particularly useful in scenarios where different agents specialize in distinct areas. For example, a customer support app might have agents that each specifically handle tasks like order status, refunds, FAQs, etc.
-
-Handoffs are represented as tools to the LLM. So if there's a handoff to an agent named `Refund Agent`, the tool would be called `transfer_to_refund_agent`.
-
-## Creating a handoff
-
-All agents have a [`handoffs`][agents.agent.Agent.handoffs] param, which can either take an `Agent` directly, or a `Handoff` object that customizes the Handoff.
-
-You can create a handoff using the [`handoff()`][agents.handoffs.handoff] function provided by the Agents SDK. This function allows you to specify the agent to hand off to, along with optional overrides and input filters.
-
-### Basic Usage
-
-Here's how you can create a simple handoff:
-
-```python
-from agents import Agent, handoff
-
-billing_agent = Agent(name="Billing agent")
-refund_agent = Agent(name="Refund agent")
-
-# (1)!
-triage_agent = Agent(name="Triage agent", handoffs=[billing_agent, handoff(refund_agent)])
-```
-
-1. You can use the agent directly (as in `billing_agent`), or you can use the `handoff()` function.
-
-### Customizing handoffs via the `handoff()` function
-
-The [`handoff()`][agents.handoffs.handoff] function lets you customize things.
-
--   `agent`: This is the agent to which things will be handed off.
--   `tool_name_override`: By default, the `Handoff.default_tool_name()` function is used, which resolves to `transfer_to_<agent_name>`. You can override this.
--   `tool_description_override`: Override the default tool description from `Handoff.default_tool_description()`
--   `on_handoff`: A callback function executed when the handoff is invoked. This is useful for things like kicking off some data fetching as soon as you know a handoff is being invoked. This function receives the agent context, and can optionally also receive LLM generated input. The input data is controlled by the `input_type` param.
--   `input_type`: The type of input expected by the handoff (optional).
--   `input_filter`: This lets you filter the input received by the next agent. See below for more.
-
-```python
-from agents import Agent, handoff, RunContextWrapper
-
-def on_handoff(ctx: RunContextWrapper[None]):
-    print("Handoff called")
-
-agent = Agent(name="My agent")
-
-handoff_obj = handoff(
-    agent=agent,
-    on_handoff=on_handoff,
-    tool_name_override="custom_handoff_tool",
-    tool_description_override="Custom description",
-)
-```
-
-## Handoff inputs
-
-In certain situations, you want the LLM to provide some data when it calls a handoff. For example, imagine a handoff to an "Escalation agent". You might want a reason to be provided, so you can log it.
-
-```python
-from pydantic import BaseModel
-
-from agents import Agent, handoff, RunContextWrapper
-
-class EscalationData(BaseModel):
-    reason: str
-
-async def on_handoff(ctx: RunContextWrapper[None], input_data: EscalationData):
-    print(f"Escalation agent called with reason: {input_data.reason}")
-
-agent = Agent(name="Escalation agent")
-
-handoff_obj = handoff(
-    agent=agent,
-    on_handoff=on_handoff,
-    input_type=EscalationData,
-)
-```
-
-## Input filters
-
-When a handoff occurs, it's as though the new agent takes over the conversation, and gets to see the entire previous conversation history. If you want to change this, you can set an [`input_filter`][agents.handoffs.Handoff.input_filter]. An input filter is a function that receives the existing input via a [`HandoffInputData`][agents.handoffs.HandoffInputData], and must return a new `HandoffInputData`.
-
-There are some common patterns (for example removing all tool calls from the history), which are implemented for you in [`agents.extensions.handoff_filters`][]
-
-```python
-from agents import Agent, handoff
-from agents.extensions import handoff_filters
-
-agent = Agent(name="FAQ agent")
-
-handoff_obj = handoff(
-    agent=agent,
-    input_filter=handoff_filters.remove_all_tools, # (1)!
-)
-```
-
-1. This will automatically remove all tools from the history when `FAQ agent` is called.
-
-## Recommended prompts
-
-To make sure that LLMs understand handoffs properly, we recommend including information about handoffs in your agents. We have a suggested prefix in [`agents.extensions.handoff_prompt.RECOMMENDED_PROMPT_PREFIX`][], or you can call [`agents.extensions.handoff_prompt.prompt_with_handoff_instructions`][] to automatically add recommended data to your prompts.
-
-```python
-from agents import Agent
-from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX
-
-billing_agent = Agent(
-    name="Billing agent",
-    instructions=f"""{RECOMMENDED_PROMPT_PREFIX}
-    <Fill in the rest of your prompt here>.""",
-)
-```
diff --git a/tests/docs/index.md b/tests/docs/index.md
deleted file mode 100644
index 28c68708..00000000
--- a/tests/docs/index.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# OpenAI Agents SDK
-
-The OpenAI Agents SDK enables you to build agentic AI apps in a lightweight, easy to use package with very few abstractions. It's a production-ready upgrade of our previous experimentation for agents, [Swarm](https://github.com/openai/swarm/tree/main). The Agents SDK has a very small set of primitives:
-
--   **Agents**, which are LLMs equipped with instructions and tools
--   **Handoffs**, which allow agents to delegate to other agents for specific tasks
--   **Guardrails**, which enable the inputs to agents to be validated
-
-In combination with Python, these primitives are powerful enough to express complex relationships between tools and agents, and allow you to build real world applications without a steep learning curve. In addition, the SDK comes with built-in **tracing** that lets you visualize and debug your agentic flows, as well as evaluate them and even fine-tune models for your application.
-
-## Why use the Agents SDK
-
-The SDK has two driving design principles:
-
-1. Enough features to be worth using, but few enough primitives to make it quick to learn.
-2. Works great out of the box, but you can customize exactly what happens.
-
-Here are the main features of the SDK:
-
--   Agent loop: Built-in agent loop that handles calling tools, sending results to the LLM, and looping until the LLM is done.
--   Python-first: Use built-in language features to orchestrate and chain agents, rather than needing to learn new abstractions.
--   Handoffs: A powerful feature to coordinate and delegate between multiple agents.
--   Guardrails: Run input validations and checks in parallel to your agents, breaking early if the checks fail.
--   Function tools: Turn any Python function into a tool, with automatic schema generation and Pydantic-powered validation.
--   Tracing: Built-in tracing that lets you visualize, debug and monitor your workflows, as well as use the OpenAI suite of evaluation, fine-tuning and distillation tools.
-
-## Installation
-
-```bash
-pip install openai-agents
-```
-
-## Hello world example
-
-```python
-from agents import Agent, Runner
-
-agent = Agent(name="Assistant", instructions="You are a helpful assistant")
-
-result = Runner.run_sync(agent, "Write a haiku about recursion in programming.")
-print(result.final_output)
-
-# Code within the code,
-# Functions calling themselves,
-# Infinite loop's dance.
-```
-
-(_If running this, ensure you set the `OPENAI_API_KEY` environment variable_)
-
-```bash
-export OPENAI_API_KEY=sk-...
-```
diff --git a/tests/docs/models.md b/tests/docs/models.md
deleted file mode 100644
index 7d2ff1ff..00000000
--- a/tests/docs/models.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Models
-
-The Agents SDK comes with out of the box support for OpenAI models in two flavors:
-
--   **Recommended**: the [`OpenAIResponsesModel`][agents.models.openai_responses.OpenAIResponsesModel], which calls OpenAI APIs using the new [Responses API](https://platform.openai.com/docs/api-reference/responses).
--   The [`OpenAIChatCompletionsModel`][agents.models.openai_chatcompletions.OpenAIChatCompletionsModel], which calls OpenAI APIs using the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat).
-
-## Mixing and matching models
-
-Within a single workflow, you may want to use different models for each agent. For example, you could use a smaller, faster model for triage, while using a larger, more capable model for complex tasks. When configuring an [`Agent`][agents.Agent], you can select a specific model by either:
-
-1. Passing the name of an OpenAI model.
-2. Passing any model name + a [`ModelProvider`][agents.models.interface.ModelProvider] that can map that name to a Model instance.
-3. Directly providing a [`Model`][agents.models.interface.Model] implementation.
-
-!!!note
-
-    While our SDK supports both the [`OpenAIResponsesModel`][agents.models.openai_responses.OpenAIResponsesModel] and the[`OpenAIChatCompletionsModel`][agents.models.openai_chatcompletions.OpenAIChatCompletionsModel] shapes, we recommend using a single model shape for each workflow because the two shapes support a different set of features and tools. If your workflow requires mixing and matching model shapes, make sure that all the features you're using are available on both.
-
-```python
-from agents import Agent, Runner, AsyncOpenAI, OpenAIChatCompletionsModel
-import asyncio
-
-spanish_agent = Agent(
-    name="Spanish agent",
-    instructions="You only speak Spanish.",
-    model="o3-mini", # (1)!
-)
-
-english_agent = Agent(
-    name="English agent",
-    instructions="You only speak English",
-    model=OpenAIChatCompletionsModel( # (2)!
-        model="gpt-4o",
-        openai_client=AsyncOpenAI()
-    ),
-)
-
-triage_agent = Agent(
-    name="Triage agent",
-    instructions="Handoff to the appropriate agent based on the language of the request.",
-    handoffs=[spanish_agent, english_agent],
-    model="gpt-3.5-turbo",
-)
-
-async def main():
-    result = await Runner.run(triage_agent, input="Hola, ¿cómo estás?")
-    print(result.final_output)
-```
-
-1.  Sets the the name of an OpenAI model directly.
-2.  Provides a [`Model`][agents.models.interface.Model] implementation.
-
-## Using other LLM providers
-
-Many providers also support the OpenAI API format, which means you can pass a `base_url` to the existing OpenAI model implementations and use them easily. `ModelSettings` is used to configure tuning parameters (e.g., temperature, top_p) for the model you select.
-
-```python
-external_client = AsyncOpenAI(
-    api_key="EXTERNAL_API_KEY",
-    base_url="https://api.external.com/v1/",
-)
-
-spanish_agent = Agent(
-    name="Spanish agent",
-    instructions="You only speak Spanish.",
-    model=OpenAIChatCompletionsModel(
-        model="EXTERNAL_MODEL_NAME",
-        openai_client=external_client,
-    ),
-    model_settings=ModelSettings(temperature=0.5),
-)
-```
diff --git a/tests/docs/multi_agent.md b/tests/docs/multi_agent.md
deleted file mode 100644
index c1182492..00000000
--- a/tests/docs/multi_agent.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Orchestrating multiple agents
-
-Orchestration refers to the flow of agents in your app. Which agents run, in what order, and how do they decide what happens next? There are two main ways to orchestrate agents:
-
-1. Allowing the LLM to make decisions: this uses the intelligence of an LLM to plan, reason, and decide on what steps to take based on that.
-2. Orchestrating via code: determining the flow of agents via your code.
-
-You can mix and match these patterns. Each has their own tradeoffs, described below.
-
-## Orchestrating via LLM
-
-An agent is an LLM equipped with instructions, tools and handoffs. This means that given an open-ended task, the LLM can autonomously plan how it will tackle the task, using tools to take actions and acquire data, and using handoffs to delegate tasks to sub-agents. For example, a research agent could be equipped with tools like:
-
--   Web search to find information online
--   File search and retrieval to search through proprietary data and connections
--   Computer use to take actions on a computer
--   Code execution to do data analysis
--   Handoffs to specialized agents that are great at planning, report writing and more.
-
-This pattern is great when the task is open-ended and you want to rely on the intelligence of an LLM. The most important tactics here are:
-
-1. Invest in good prompts. Make it clear what tools are available, how to use them, and what parameters it must operate within.
-2. Monitor your app and iterate on it. See where things go wrong, and iterate on your prompts.
-3. Allow the agent to introspect and improve. For example, run it in a loop, and let it critique itself; or, provide error messages and let it improve.
-4. Have specialized agents that excel in one task, rather than having a general purpose agent that is expected to be good at anything.
-5. Invest in [evals](https://platform.openai.com/docs/guides/evals). This lets you train your agents to improve and get better at tasks.
-
-## Orchestrating via code
-
-While orchestrating via LLM is powerful, orchestrating via LLM makes tasks more deterministic and predictable, in terms of speed, cost and performance. Common patterns here are:
-
--   Using [structured outputs](https://platform.openai.com/docs/guides/structured-outputs) to generate well formed data that you can inspect with your code. For example, you might ask an agent to classify the task into a few categories, and then pick the next agent based on the category.
--   Chaining multiple agents by transforming the output of one into the input of the next. You can decompose a task like writing a blog post into a series of steps - do research, write an outline, write the blog post, critique it, and then improve it.
--   Running the agent that performs the task in a `while` loop with an agent that evaluates and provides feedback, until the evaluator says the output passes certain criteria.
--   Running multiple agents in parallel, e.g. via Python primitives like `asyncio.gather`. This is useful for speed when you have multiple tasks that don't depend on each other.
-
-We have a number of examples in [`examples/agent_patterns`](https://github.com/openai/openai-agents-python/examples/agent_patterns).
diff --git a/tests/docs/quickstart.md b/tests/docs/quickstart.md
deleted file mode 100644
index 19051f49..00000000
--- a/tests/docs/quickstart.md
+++ /dev/null
@@ -1,186 +0,0 @@
-# Quickstart
-
-## Create a project and virtual environment
-
-You'll only need to do this once.
-
-```bash
-mkdir my_project
-cd my_project
-python -m venv .venv
-```
-
-### Activate the virtual environment
-
-Do this every time you start a new terminal session.
-
-```bash
-source .venv/bin/activate
-```
-
-### Install the Agents SDK
-
-```bash
-pip install openai-agents # or `uv add openai-agents`, etc
-```
-
-### Set an OpenAI API key
-
-If you don't have one, follow [these instructions](https://platform.openai.com/docs/quickstart#create-and-export-an-api-key) to create an OpenAI API key.
-
-```bash
-export OPENAI_API_KEY=sk-...
-```
-
-## Create your first agent
-
-Agents are defined with instructions, a name, and optional config (such as `model_config`)
-
-```python
-from agents import Agent
-
-agent = Agent(
-    name="Math Tutor",
-    instructions="You provide help with math problems. Explain your reasoning at each step and include examples",
-)
-```
-
-## Add a few more agents
-
-Additional agents can be defined in the same way. `handoff_descriptions` provide additional context for determining handoff routing
-
-```python
-from agents import Agent
-
-history_tutor_agent = Agent(
-    name="History Tutor",
-    handoff_description="Specialist agent for historical questions",
-    instructions="You provide assistance with historical queries. Explain important events and context clearly.",
-)
-
-math_tutor_agent = Agent(
-    name="Math Tutor",
-    handoff_description="Specialist agent for math questions",
-    instructions="You provide help with math problems. Explain your reasoning at each step and include examples",
-)
-```
-
-## Define your handoffs
-
-On each agent, you can define an inventory of outgoing handoff options that the agent can choose from to decide how to make progress on their task.
-
-```python
-triage_agent = Agent(
-    name="Triage Agent",
-    instructions="You determine which agent to use based on the user's homework question",
-    handoffs=[history_tutor_agent, math_tutor_agent]
-)
-```
-
-## Run the agent orchestration
-
-Let's check that the workflow runs and the triage agent correctly routes between the two specialist agents.
-
-```python
-from agents import Runner
-
-async def main():
-    result = await Runner.run(triage_agent, "What is the capital of France?")
-    print(result.final_output)
-```
-
-## Add a guardrail
-
-You can define custom guardrails to run on the input or output.
-
-```python
-from agents import GuardrailFunctionOutput, Agent, Runner
-from pydantic import BaseModel
-
-class HomeworkOutput(BaseModel):
-    is_homework: bool
-    reasoning: str
-
-guardrail_agent = Agent(
-    name="Guardrail check",
-    instructions="Check if the user is asking about homework.",
-    output_type=HomeworkOutput,
-)
-
-async def homework_guardrail(ctx, agent, input_data):
-    result = await Runner.run(guardrail_agent, input_data, context=ctx.context)
-    final_output = result.final_output_as(HomeworkOutput)
-    return GuardrailFunctionOutput(
-        output_info=final_output,
-        tripwire_triggered=not final_output.is_homework,
-    )
-```
-
-## Put it all together
-
-Let's put it all together and run the entire workflow, using handoffs and the input guardrail.
-
-```python
-from agents import Agent, InputGuardrail,GuardrailFunctionOutput, Runner
-from pydantic import BaseModel
-import asyncio
-
-class HomeworkOutput(BaseModel):
-    is_homework: bool
-    reasoning: str
-
-guardrail_agent = Agent(
-    name="Guardrail check",
-    instructions="Check if the user is asking about homework.",
-    output_type=HomeworkOutput,
-)
-
-math_tutor_agent = Agent(
-    name="Math Tutor",
-    handoff_description="Specialist agent for math questions",
-    instructions="You provide help with math problems. Explain your reasoning at each step and include examples",
-)
-
-history_tutor_agent = Agent(
-    name="History Tutor",
-    handoff_description="Specialist agent for historical questions",
-    instructions="You provide assistance with historical queries. Explain important events and context clearly.",
-)
-
-
-async def homework_guardrail(ctx, agent, input_data):
-    result = await Runner.run(guardrail_agent, input_data, context=ctx.context)
-    final_output = result.final_output_as(HomeworkOutput)
-    return GuardrailFunctionOutput(
-        output_info=final_output,
-        tripwire_triggered=not final_output.is_homework,
-    )
-
-triage_agent = Agent(
-    name="Triage Agent",
-    instructions="You determine which agent to use based on the user's homework question",
-    handoffs=[history_tutor_agent, math_tutor_agent],
-    input_guardrails=[
-        InputGuardrail(guardrail_function=homework_guardrail),
-    ],
-)
-
-async def main():
-    result = await Runner.run(triage_agent, "what is life")
-    print(result.final_output)
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-
-## View your traces
-
-To review what happened during your agent run, navigate to the [Trace viewer in the OpenAI Dashboard](https://platform.openai.com/traces) to view traces of your agent runs.
-
-## Next steps
-
-Learn how to build more complex agentic flows:
-
--   Learn about how to configure [Agents](agents.md).
--   Learn about [running agents](running_agents.md).
--   Learn about [tools](tools.md), [guardrails](guardrails.md) and [models](models.md).
diff --git a/tests/docs/ref/agent.md b/tests/docs/ref/agent.md
deleted file mode 100644
index 9f8b10d2..00000000
--- a/tests/docs/ref/agent.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Agents`
-
-::: agents.agent
diff --git a/tests/docs/ref/agent_output.md b/tests/docs/ref/agent_output.md
deleted file mode 100644
index e453de03..00000000
--- a/tests/docs/ref/agent_output.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Agent output`
-
-::: agents.agent_output
diff --git a/tests/docs/ref/exceptions.md b/tests/docs/ref/exceptions.md
deleted file mode 100644
index 7c1a2547..00000000
--- a/tests/docs/ref/exceptions.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Exceptions`
-
-::: agents.exceptions
diff --git a/tests/docs/ref/extensions/handoff_filters.md b/tests/docs/ref/extensions/handoff_filters.md
deleted file mode 100644
index 0ffcb13c..00000000
--- a/tests/docs/ref/extensions/handoff_filters.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Handoff filters`
-
-::: agents.extensions.handoff_filters
diff --git a/tests/docs/ref/extensions/handoff_prompt.md b/tests/docs/ref/extensions/handoff_prompt.md
deleted file mode 100644
index ca800765..00000000
--- a/tests/docs/ref/extensions/handoff_prompt.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# `Handoff prompt`
-
-::: agents.extensions.handoff_prompt
-
-    options:
-        members:
-            - RECOMMENDED_PROMPT_PREFIX
-            - prompt_with_handoff_instructions
diff --git a/tests/docs/ref/function_schema.md b/tests/docs/ref/function_schema.md
deleted file mode 100644
index 06aac2a6..00000000
--- a/tests/docs/ref/function_schema.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Function schema`
-
-::: agents.function_schema
diff --git a/tests/docs/ref/guardrail.md b/tests/docs/ref/guardrail.md
deleted file mode 100644
index 17ec929c..00000000
--- a/tests/docs/ref/guardrail.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Guardrails`
-
-::: agents.guardrail
diff --git a/tests/docs/ref/handoffs.md b/tests/docs/ref/handoffs.md
deleted file mode 100644
index 717a9181..00000000
--- a/tests/docs/ref/handoffs.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Handoffs`
-
-::: agents.handoffs
diff --git a/tests/docs/ref/index.md b/tests/docs/ref/index.md
deleted file mode 100644
index 1b8439fa..00000000
--- a/tests/docs/ref/index.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Agents module
-
-::: agents
-
-    options:
-        members:
-            - set_default_openai_key
-            - set_default_openai_client
-            - set_default_openai_api
-            - set_tracing_export_api_key
-            - set_tracing_disabled
-            - set_trace_processors
-            - enable_verbose_stdout_logging
diff --git a/tests/docs/ref/items.md b/tests/docs/ref/items.md
deleted file mode 100644
index 29279e15..00000000
--- a/tests/docs/ref/items.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Items`
-
-::: agents.items
diff --git a/tests/docs/ref/lifecycle.md b/tests/docs/ref/lifecycle.md
deleted file mode 100644
index 432af147..00000000
--- a/tests/docs/ref/lifecycle.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# `Lifecycle`
-
-::: agents.lifecycle
-
-    options:
-        show_source: false
diff --git a/tests/docs/ref/model_settings.md b/tests/docs/ref/model_settings.md
deleted file mode 100644
index f7f411f0..00000000
--- a/tests/docs/ref/model_settings.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Model settings`
-
-::: agents.model_settings
diff --git a/tests/docs/ref/models/interface.md b/tests/docs/ref/models/interface.md
deleted file mode 100644
index e7bd89a8..00000000
--- a/tests/docs/ref/models/interface.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Model interface`
-
-::: agents.models.interface
diff --git a/tests/docs/ref/models/openai_chatcompletions.md b/tests/docs/ref/models/openai_chatcompletions.md
deleted file mode 100644
index 76cf5633..00000000
--- a/tests/docs/ref/models/openai_chatcompletions.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `OpenAI Chat Completions model`
-
-::: agents.models.openai_chatcompletions
diff --git a/tests/docs/ref/models/openai_responses.md b/tests/docs/ref/models/openai_responses.md
deleted file mode 100644
index e1794bae..00000000
--- a/tests/docs/ref/models/openai_responses.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `OpenAI Responses model`
-
-::: agents.models.openai_responses
diff --git a/tests/docs/ref/result.md b/tests/docs/ref/result.md
deleted file mode 100644
index 3a9e4a9b..00000000
--- a/tests/docs/ref/result.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Results`
-
-::: agents.result
diff --git a/tests/docs/ref/run.md b/tests/docs/ref/run.md
deleted file mode 100644
index ddf4475f..00000000
--- a/tests/docs/ref/run.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# `Runner`
-
-::: agents.run
-
-    options:
-        members:
-            - Runner
-            - RunConfig
diff --git a/tests/docs/ref/run_context.md b/tests/docs/ref/run_context.md
deleted file mode 100644
index 49e87305..00000000
--- a/tests/docs/ref/run_context.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Run context`
-
-::: agents.run_context
diff --git a/tests/docs/ref/stream_events.md b/tests/docs/ref/stream_events.md
deleted file mode 100644
index ea484317..00000000
--- a/tests/docs/ref/stream_events.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Streaming events`
-
-::: agents.stream_events
diff --git a/tests/docs/ref/tool.md b/tests/docs/ref/tool.md
deleted file mode 100644
index 887bef75..00000000
--- a/tests/docs/ref/tool.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Tools`
-
-::: agents.tool
diff --git a/tests/docs/ref/tracing/create.md b/tests/docs/ref/tracing/create.md
deleted file mode 100644
index c983e336..00000000
--- a/tests/docs/ref/tracing/create.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Creating traces/spans`
-
-::: agents.tracing.create
diff --git a/tests/docs/ref/tracing/index.md b/tests/docs/ref/tracing/index.md
deleted file mode 100644
index 88a0fe61..00000000
--- a/tests/docs/ref/tracing/index.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Tracing module
-
-::: agents.tracing
diff --git a/tests/docs/ref/tracing/processor_interface.md b/tests/docs/ref/tracing/processor_interface.md
deleted file mode 100644
index 9fb04e86..00000000
--- a/tests/docs/ref/tracing/processor_interface.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Processor interface`
-
-::: agents.tracing.processor_interface
diff --git a/tests/docs/ref/tracing/processors.md b/tests/docs/ref/tracing/processors.md
deleted file mode 100644
index d7ac4af1..00000000
--- a/tests/docs/ref/tracing/processors.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Processors`
-
-::: agents.tracing.processors
diff --git a/tests/docs/ref/tracing/scope.md b/tests/docs/ref/tracing/scope.md
deleted file mode 100644
index 7b5b9fdf..00000000
--- a/tests/docs/ref/tracing/scope.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Scope`
-
-::: agents.tracing.scope
diff --git a/tests/docs/ref/tracing/setup.md b/tests/docs/ref/tracing/setup.md
deleted file mode 100644
index 1dc6a0fe..00000000
--- a/tests/docs/ref/tracing/setup.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Setup`
-
-::: agents.tracing.setup
diff --git a/tests/docs/ref/tracing/span_data.md b/tests/docs/ref/tracing/span_data.md
deleted file mode 100644
index 6ace7a88..00000000
--- a/tests/docs/ref/tracing/span_data.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Span data`
-
-::: agents.tracing.span_data
diff --git a/tests/docs/ref/tracing/spans.md b/tests/docs/ref/tracing/spans.md
deleted file mode 100644
index 9071707c..00000000
--- a/tests/docs/ref/tracing/spans.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# `Spans`
-
-::: agents.tracing.spans
-
-    options:
-        members:
-            - Span
-            - NoOpSpan
-            - SpanImpl
diff --git a/tests/docs/ref/tracing/traces.md b/tests/docs/ref/tracing/traces.md
deleted file mode 100644
index 0b7377f9..00000000
--- a/tests/docs/ref/tracing/traces.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Traces`
-
-::: agents.tracing.traces
diff --git a/tests/docs/ref/tracing/util.md b/tests/docs/ref/tracing/util.md
deleted file mode 100644
index 2be3d58c..00000000
--- a/tests/docs/ref/tracing/util.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Util`
-
-::: agents.tracing.util
diff --git a/tests/docs/ref/usage.md b/tests/docs/ref/usage.md
deleted file mode 100644
index b8b29db5..00000000
--- a/tests/docs/ref/usage.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `Usage`
-
-::: agents.usage
diff --git a/tests/docs/results.md b/tests/docs/results.md
deleted file mode 100644
index d1864fa8..00000000
--- a/tests/docs/results.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# Results
-
-When you call the `Runner.run` methods, you either get a:
-
--   [`RunResult`][agents.result.RunResult] if you call `run` or `run_sync`
--   [`RunResultStreaming`][agents.result.RunResultStreaming] if you call `run_streamed`
-
-Both of these inherit from [`RunResultBase`][agents.result.RunResultBase], which is where most useful information is present.
-
-## Final output
-
-The [`final_output`][agents.result.RunResultBase.final_output] property contains the final output of the last agent that ran. This is either:
-
--   a `str`, if the last agent didn't have an `output_type` defined
--   an object of type `last_agent.output_type`, if the agent had an output type defined.
-
-!!! note
-
-    `final_output` is of type `Any`. We can't statically type this, because of handoffs. If handoffs occur, that means any Agent might be the last agent, so we don't statically know the set of possible output types.
-
-## Inputs for the next turn
-
-You can use [`result.to_input_list()`][agents.result.RunResultBase.to_input_list] to turn the result into an input list that concatenates the original input you provided, to the items generated during the agent run. This makes it convenient to take the outputs of one agent run and pass them into another run, or to run it in a loop and append new user inputs each time.
-
-## Last agent
-
-The [`last_agent`][agents.result.RunResultBase.last_agent] property contains the last agent that ran. Depending on your application, this is often useful for the next time the user inputs something. For example, if you have a frontline triage agent that hands off to a language-specific agent, you can store the last agent, and re-use it the next time the user messages the agent.
-
-## New items
-
-The [`new_items`][agents.result.RunResultBase.new_items] property contains the new items generated during the run. The items are [`RunItem`][agents.items.RunItem]s. A run item wraps the raw item generated by the LLM.
-
--   [`MessageOutputItem`][agents.items.MessageOutputItem] indicates a message from the LLM. The raw item is the message generated.
--   [`HandoffCallItem`][agents.items.HandoffCallItem] indicates that the LLM called the handoff tool. The raw item is the tool call item from the LLM.
--   [`HandoffOutputItem`][agents.items.HandoffOutputItem] indicates that a handoff occured. The raw item is the tool response to the handoff tool call. You can also access the source/target agents from the item.
--   [`ToolCallItem`][agents.items.ToolCallItem] indicates that the LLM invoked a tool.
--   [`ToolCallOutputItem`][agents.items.ToolCallOutputItem] indicates that a tool was called. The raw item is the tool response. You can also access the tool output from the item.
--   [`ReasoningItem`][agents.items.ReasoningItem] indicates a reasoning item from the LLM. The raw item is the reasoning generated.
-
-## Other information
-
-### Guardrail results
-
-The [`input_guardrail_results`][agents.result.RunResultBase.input_guardrail_results] and [`output_guardrail_results`][agents.result.RunResultBase.output_guardrail_results] properties contain the results of the guardrails, if any. Guardrail results can sometimes contain useful information you want to log or store, so we make these available to you.
-
-### Raw responses
-
-The [`raw_responses`][agents.result.RunResultBase.raw_responses] property contains the [`ModelResponse`][agents.items.ModelResponse]s generated by the LLM.
-
-### Original input
-
-The [`input`][agents.result.RunResultBase.input] property contains the original input you provided to the `run` method. In most cases you won't need this, but it's available in case you do.
diff --git a/tests/docs/running_agents.md b/tests/docs/running_agents.md
deleted file mode 100644
index a2f137cf..00000000
--- a/tests/docs/running_agents.md
+++ /dev/null
@@ -1,95 +0,0 @@
-# Running agents
-
-You can run agents via the [`Runner`][agents.run.Runner] class. You have 3 options:
-
-1. [`Runner.run()`][agents.run.Runner.run], which runs async and returns a [`RunResult`][agents.result.RunResult].
-2. [`Runner.run_sync()`][agents.run.Runner.run_sync], which is a sync method and just runs `.run()` under the hood.
-3. [`Runner.run_streamed()`][agents.run.Runner.run_streamed], which runs async and returns a [`RunResultStreaming`][agents.result.RunResultStreaming]. It calls the LLM in streaming mode, and streams those events to you as they are received.
-
-```python
-from agents import Agent, Runner
-
-async def main():
-    agent = Agent(name="Assistant", instructions="You are a helpful assistant")
-
-    result = await Runner.run(agent, "Write a haiku about recursion in programming.")
-    print(result.final_output)
-    # Code within the code,
-    # Functions calling themselves,
-    # Infinite loop's dance.
-```
-
-Read more in the [results guide](results.md).
-
-## The agent loop
-
-When you use the run method in `Runner`, you pass in a starting agent and input. The input can either be a string (which is considered a user message), or a list of input items, which are the items in the OpenAI Responses API.
-
-The runner then runs a loop:
-
-1. We call the LLM for the current agent, with the current input.
-2. The LLM produces its output.
-    1. If the LLM returns a `final_output`, the loop ends and we return the result.
-    2. If the LLM does a handoff, we update the current agent and input, and re-run the loop.
-    3. If the LLM produces tool calls, we run those tool calls, append the results, and re-run the loop.
-3. If we exceed the `max_turns` passed, we raise a [`MaxTurnsExceeded`][agents.exceptions.MaxTurnsExceeded] exception.
-
-!!! note
-
-    The rule for whether the LLM output is considered as a "final output" is that it produces text output with the desired type, and there are no tool calls.
-
-## Streaming
-
-Streaming allows you to additionally receive streaming events as the LLM runs. Once the stream is done, the [`RunResultStreaming`][agents.result.RunResultStreaming] will contain the complete information about the run, including all the new outputs produces. You can call `.stream_events()` for the streaming events. Read more in the [streaming guide](streaming.md).
-
-## Run config
-
-The `run_config` parameter lets you configure some global settings for the agent run:
-
--   [`model`][agents.run.RunConfig.model]: Allows setting a global LLM model to use, irrespective of what `model` each Agent has.
--   [`model_provider`][agents.run.RunConfig.model_provider]: A model provider for looking up model names, which defaults to OpenAI.
--   [`model_settings`][agents.run.RunConfig.model_settings]: Overrides agent-specific settings. For example, you can set a global `temperature` or `top_p`.
--   [`input_guardrails`][agents.run.RunConfig.input_guardrails], [`output_guardrails`][agents.run.RunConfig.output_guardrails]: A list of input or output guardrails to include on all runs.
--   [`handoff_input_filter`][agents.run.RunConfig.handoff_input_filter]: A global input filter to apply to all handoffs, if the handoff doesn't already have one. The input filter allows you to edit the inputs that are sent to the new agent. See the documentation in [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] for more details.
--   [`tracing_disabled`][agents.run.RunConfig.tracing_disabled]: Allows you to disable [tracing](tracing.md) for the entire run.
--   [`trace_include_sensitive_data`][agents.run.RunConfig.trace_include_sensitive_data]: Configures whether traces will include potentially sensitive data, such as LLM and tool call inputs/outputs.
--   [`workflow_name`][agents.run.RunConfig.workflow_name], [`trace_id`][agents.run.RunConfig.trace_id], [`group_id`][agents.run.RunConfig.group_id]: Sets the tracing workflow name, trace ID and trace group ID for the run. We recommend at least setting `workflow_name`. The session ID is an optional field that lets you link traces across multiple runs.
--   [`trace_metadata`][agents.run.RunConfig.trace_metadata]: Metadata to include on all traces.
-
-## Conversations/chat threads
-
-Calling any of the run methods can result in one or more agents running (and hence one or more LLM calls), but it represents a single logical turn in a chat conversation. For example:
-
-1. User turn: user enter text
-2. Runner run: first agent calls LLM, runs tools, does a handoff to a second agent, second agent runs more tools, and then produces an output.
-
-At the end of the agent run, you can choose what to show to the user. For example, you might show the user every new item generated by the agents, or just the final output. Either way, the user might then ask a followup question, in which case you can call the run method again.
-
-You can use the base [`RunResultBase.to_input_list()`][agents.result.RunResultBase.to_input_list] method to get the inputs for the next turn.
-
-```python
-async def main():
-    agent = Agent(name="Assistant", instructions="Reply very concisely.")
-
-    with trace(workflow_name="Conversation", group_id=thread_id):
-        # First turn
-        result = await Runner.run(agent, "What city is the Golden Gate Bridge in?")
-        print(result.final_output)
-        # San Francisco
-
-        # Second turn
-        new_input = output.to_input_list() + [{"role": "user", "content": "What state is it in?"}]
-        result = await Runner.run(agent, new_input)
-        print(result.final_output)
-        # California
-```
-
-## Exceptions
-
-The SDK raises exceptions in certain cases. The full list is in [`agents.exceptions`][]. As an overview:
-
--   [`AgentsException`][agents.exceptions.AgentsException] is the base class for all exceptions raised in the SDK.
--   [`MaxTurnsExceeded`][agents.exceptions.MaxTurnsExceeded] is raised when the run exceeds the `max_turns` passed to the run methods.
--   [`ModelBehaviorError`][agents.exceptions.ModelBehaviorError] is raised when the model produces invalid outputs, e.g. malformed JSON or using non-existent tools.
--   [`UserError`][agents.exceptions.UserError] is raised when you (the person writing code using the SDK) make an error using the SDK.
--   [`InputGuardrailTripwireTriggered`][agents.exceptions.InputGuardrailTripwireTriggered], [`OutputGuardrailTripwireTriggered`][agents.exceptions.OutputGuardrailTripwireTriggered] is raised when a [guardrail](guardrails.md) is tripped.
diff --git a/tests/docs/streaming.md b/tests/docs/streaming.md
deleted file mode 100644
index b2c7c095..00000000
--- a/tests/docs/streaming.md
+++ /dev/null
@@ -1,87 +0,0 @@
-# Streaming
-
-Streaming lets you subscribe to updates of the agent run as it proceeds. This can be useful for showing the end-user progress updates and partial responses.
-
-To stream, you can call [`Runner.run_streamed()`][agents.run.Runner.run_streamed], which will give you a [`RunResultStreaming`][agents.result.RunResultStreaming]. Calling `result.stream_events()` gives you an async stream of [`StreamEvent`][agents.stream_events.StreamEvent] objects, which are described below.
-
-## Raw response events
-
-[`RawResponsesStreamEvent`][agents.stream_events.RawResponsesStreamEvent] are raw events passed directly from the LLM. They are in OpenAI Responses API format, which means each event has a type (like `response.created`, `response.output_text.delta`, etc) and data. These events are useful if you want to stream response messages to the user as soon as they are generated.
-
-For example, this will output the text generated by the LLM token-by-token.
-
-```python
-import asyncio
-from openai.types.responses import ResponseTextDeltaEvent
-from agents import Agent, Runner
-
-async def main():
-    agent = Agent(
-        name="Joker",
-        instructions="You are a helpful assistant.",
-    )
-
-    result = Runner.run_streamed(agent, input="Please tell me 5 jokes.")
-    async for event in result.stream_events():
-        if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
-            print(event.data.delta, end="", flush=True)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-
-## Run item events and agent events
-
-[`RunItemStreamEvent`][agents.stream_events.RunItemStreamEvent]s are higher level events. They inform you when an item has been fully generated. This allows you to push progress updates at the level of "message generated", "tool ran", etc, instead of each token. Similarly, [`AgentUpdatedStreamEvent`][agents.stream_events.AgentUpdatedStreamEvent] gives you updates when the current agent changes (e.g. as the result of a handoff).
-
-For example, this will ignore raw events and stream updates to the user.
-
-```python
-import asyncio
-import random
-from agents import Agent, ItemHelpers, Runner, function_tool
-
-@function_tool
-def how_many_jokes() -> int:
-    return random.randint(1, 10)
-
-
-async def main():
-    agent = Agent(
-        name="Joker",
-        instructions="First call the `how_many_jokes` tool, then tell that many jokes.",
-        tools=[how_many_jokes],
-    )
-
-    result = Runner.run_streamed(
-        agent,
-        input="Hello",
-    )
-    print("=== Run starting ===")
-
-    async for event in result.stream_events():
-        # We'll ignore the raw responses event deltas
-        if event.type == "raw_response_event":
-            continue
-        # When the agent updates, print that
-        elif event.type == "agent_updated_stream_event":
-            print(f"Agent updated: {event.new_agent.name}")
-            continue
-        # When items are generated, print them
-        elif event.type == "run_item_stream_event":
-            if event.item.type == "tool_call_item":
-                print("-- Tool was called")
-            elif event.item.type == "tool_call_output_item":
-                print(f"-- Tool output: {event.item.output}")
-            elif event.item.type == "message_output_item":
-                print(f"-- Message output:\n {ItemHelpers.text_message_output(event.item)}")
-            else:
-                pass  # Ignore other event types
-
-    print("=== Run complete ===")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
diff --git a/tests/docs/stylesheets/extra.css b/tests/docs/stylesheets/extra.css
deleted file mode 100644
index 89cf164b..00000000
--- a/tests/docs/stylesheets/extra.css
+++ /dev/null
@@ -1,194 +0,0 @@
-@font-face {
-    font-display: swap;
-    font-family: "OpenAI Sans";
-    font-style: normal;
-    font-weight: 400;
-    src: url("https://cdn.openai.com/common/fonts/openai-sans/OpenAISans-Regular.woff2")
-        format("woff2");
-}
-
-@font-face {
-    font-display: swap;
-    font-family: "OpenAI Sans";
-    font-style: italic;
-    font-weight: 400;
-    src: url("https://cdn.openai.com/common/fonts/openai-sans/OpenAISans-RegularItalic.woff2")
-        format("woff2");
-}
-
-@font-face {
-    font-display: swap;
-    font-family: "OpenAI Sans";
-    font-style: normal;
-    font-weight: 500;
-    src: url("https://cdn.openai.com/common/fonts/openai-sans/OpenAISans-Medium.woff2")
-        format("woff2");
-}
-
-@font-face {
-    font-display: swap;
-    font-family: "OpenAI Sans";
-    font-style: italic;
-    font-weight: 500;
-    src: url("https://cdn.openai.com/common/fonts/openai-sans/OpenAISans-MediumItalic.woff2")
-        format("woff2");
-}
-
-@font-face {
-    font-display: swap;
-    font-family: "OpenAI Sans";
-    font-style: normal;
-    font-weight: 600;
-    src: url("https://cdn.openai.com/common/fonts/openai-sans/OpenAISans-Semibold.woff2")
-        format("woff2");
-}
-
-@font-face {
-    font-display: swap;
-    font-family: "OpenAI Sans";
-    font-style: italic;
-    font-weight: 600;
-    src: url("https://cdn.openai.com/common/fonts/openai-sans/OpenAISans-SemiboldItalic.woff2")
-        format("woff2");
-}
-
-@font-face {
-    font-display: swap;
-    font-family: "OpenAI Sans";
-    font-style: normal;
-    font-weight: 700;
-    src: url("https://cdn.openai.com/common/fonts/openai-sans/OpenAISans-Bold.woff2")
-        format("woff2");
-}
-
-@font-face {
-    font-display: swap;
-    font-family: "OpenAI Sans";
-    font-style: italic;
-    font-weight: 700;
-    src: url("https://cdn.openai.com/common/fonts/openai-sans/OpenAISans-BoldItalic.woff2")
-        format("woff2");
-}
-
-/* 
-    Root variables that apply to all color schemes.
-    Material for MkDocs automatically switches data-md-color-scheme
-    between "default" (light) and "slate" (dark) when you use the toggles.
-*/
-:root {
-    /* Font families */
-    --md-text-font: "OpenAI Sans", -apple-system, system-ui, Helvetica, Arial,
-        sans-serif;
-    --md-typeface-heading: "OpenAI Sans", -apple-system, system-ui, Helvetica,
-        Arial, sans-serif;
-
-    /* Global color variables */
-    --md-default-fg-color: #212121;
-    --md-default-bg-color: #ffffff;
-    --md-primary-fg-color: #000;
-    --md-accent-fg-color: #000;
-
-    /* Code block theming */
-    --md-code-fg-color: red;
-    --md-code-bg-color: #f5f5f5;
-
-    /* Tables, blockquotes, etc. */
-    --md-table-row-border-color: #e0e0e0;
-    --md-admonition-bg-color: #f8f8f8;
-    --md-admonition-title-fg-color: #373737;
-    --md-default-fg-color--light: #000;
-
-    --md-typeset-a-color: #000;
-    --md-accent-fg-color: #000;
-
-    --md-code-fg-color: #000;
-}
-
-/* Header styling */
-.md-header {
-    background-color: #000;
-}
-
-.md-header--shadow {
-    box-shadow: none;
-}
-
-.md-content .md-typeset h1 {
-    color: #000;
-}
-
-.md-typeset p,
-.md-typeset li {
-    font-size: 16px;
-}
-
-.md-typeset__table p {
-    line-height: 1em;
-}
-
-.md-nav {
-    font-size: 14px;
-}
-.md-nav__title {
-    color: #000;
-    font-weight: 600;
-}
-
-.md-typeset h1,
-.md-typeset h2,
-.md-typeset h3,
-.md-typeset h4 {
-    font-weight: 600;
-}
-
-.md-typeset h1 code {
-    color: #000;
-    padding: 0;
-    background-color: transparent;
-}
-.md-footer {
-    display: none;
-}
-
-.md-header__title {
-    margin-left: 0 !important;
-}
-
-.md-typeset .admonition,
-.md-typeset details {
-    border: none;
-    outline: none;
-    border-radius: 8px;
-    overflow: hidden;
-}
-
-.md-typeset pre > code {
-    font-size: 14px;
-}
-
-.md-typeset__table code {
-    font-size: 14px;
-}
-
-/* Custom link styling */
-.md-content a {
-    text-decoration: none;
-}
-
-.md-content a:hover {
-    text-decoration: underline;
-}
-
-/* Code block styling */
-.md-content .md-code__content {
-    border-radius: 8px;
-}
-
-.md-clipboard.md-icon {
-    color: #9e9e9e;
-}
-
-/* Reset scrollbar styling to browser default with high priority */
-.md-sidebar__scrollwrap {
-    scrollbar-color: auto !important;
-}
diff --git a/tests/docs/tools.md b/tests/docs/tools.md
deleted file mode 100644
index f7a88691..00000000
--- a/tests/docs/tools.md
+++ /dev/null
@@ -1,270 +0,0 @@
-# Tools
-
-Tools let agents take actions: things like fetching data, running code, calling external APIs, and even using a computer. There are three classes of tools in the Agent SDK:
-
--   Hosted tools: these run on LLM servers alongside the AI models. OpenAI offers retrieval, web search and computer use as hosted tools.
--   Function calling: these allow you to use any Python function as a tool.
--   Agents as tools: this allows you to use an agent as a tool, allowing Agents to call other agents without handing off to them.
-
-## Hosted tools
-
-OpenAI offers a few built-in tools when using the [`OpenAIResponsesModel`][agents.models.openai_responses.OpenAIResponsesModel]:
-
--   The [`WebSearchTool`][agents.tool.WebSearchTool] lets an agent search the web.
--   The [`FileSearchTool`][agents.tool.FileSearchTool] allows retrieving information from your OpenAI Vector Stores.
--   The [`ComputerTool`][agents.tool.ComputerTool] allows automating computer use tasks.
-
-```python
-from agents import Agent, FileSearchTool, Runner, WebSearchTool
-
-agent = Agent(
-    name="Assistant",
-    tools=[
-        WebSearchTool(),
-        FileSearchTool(
-            max_num_results=3,
-            vector_store_ids=["VECTOR_STORE_ID"],
-        ),
-    ],
-)
-
-async def main():
-    result = await Runner.run(agent, "Which coffee shop should I go to, taking into account my preferences and the weather today in SF?")
-    print(result.final_output)
-```
-
-## Function tools
-
-You can use any Python function as a tool. The Agents SDK will setup the tool automatically:
-
--   The name of the tool will be the name of the Python function (or you can provide a name)
--   Tool description will be taken from the docstring of the function (or you can provide a description)
--   The schema for the function inputs is automatically created from the function's arguments
--   Descriptions for each input are taken from the docstring of the function, unless disabled
-
-We use Python's `inspect` module to extract the function signature, along with [`griffe`](https://mkdocstrings.github.io/griffe/) to parse docstrings and `pydantic` for schema creation.
-
-```python
-import json
-
-from typing_extensions import TypedDict, Any
-
-from agents import Agent, FunctionTool, RunContextWrapper, function_tool
-
-
-class Location(TypedDict):
-    lat: float
-    long: float
-
-@function_tool  # (1)!
-async def fetch_weather(location: Location) -> str:
-    # (2)!
-    """Fetch the weather for a given location.
-
-    Args:
-        location: The location to fetch the weather for.
-    """
-    # In real life, we'd fetch the weather from a weather API
-    return "sunny"
-
-
-@function_tool(name_override="fetch_data")  # (3)!
-def read_file(ctx: RunContextWrapper[Any], path: str, directory: str | None = None) -> str:
-    """Read the contents of a file.
-
-    Args:
-        path: The path to the file to read.
-        directory: The directory to read the file from.
-    """
-    # In real life, we'd read the file from the file system
-    return "<file contents>"
-
-
-agent = Agent(
-    name="Assistant",
-    tools=[fetch_weather, read_file],  # (4)!
-)
-
-for tool in agent.tools:
-    if isinstance(tool, FunctionTool):
-        print(tool.name)
-        print(tool.description)
-        print(json.dumps(tool.params_json_schema, indent=2))
-        print()
-
-```
-
-1.  You can use any Python types as arguments to your functions, and the function can be sync or async.
-2.  Docstrings, if present, are used to capture descriptions and argument descriptions
-3.  Functions can optionally take the `context` (must be the first argument). You can also set overrides, like the name of the tool, description, which docstring style to use, etc.
-4.  You can pass the decorated functions to the list of tools.
-
-??? note "Expand to see output"
-
-    ```
-    fetch_weather
-    Fetch the weather for a given location.
-    {
-    "$defs": {
-      "Location": {
-        "properties": {
-          "lat": {
-            "title": "Lat",
-            "type": "number"
-          },
-          "long": {
-            "title": "Long",
-            "type": "number"
-          }
-        },
-        "required": [
-          "lat",
-          "long"
-        ],
-        "title": "Location",
-        "type": "object"
-      }
-    },
-    "properties": {
-      "location": {
-        "$ref": "#/$defs/Location",
-        "description": "The location to fetch the weather for."
-      }
-    },
-    "required": [
-      "location"
-    ],
-    "title": "fetch_weather_args",
-    "type": "object"
-    }
-
-    fetch_data
-    Read the contents of a file.
-    {
-    "properties": {
-      "path": {
-        "description": "The path to the file to read.",
-        "title": "Path",
-        "type": "string"
-      },
-      "directory": {
-        "anyOf": [
-          {
-            "type": "string"
-          },
-          {
-            "type": "null"
-          }
-        ],
-        "default": null,
-        "description": "The directory to read the file from.",
-        "title": "Directory"
-      }
-    },
-    "required": [
-      "path"
-    ],
-    "title": "fetch_data_args",
-    "type": "object"
-    }
-    ```
-
-### Custom function tools
-
-Sometimes, you don't want to use a Python function as a tool. You can directly create a [`FunctionTool`][agents.tool.FunctionTool] if you prefer. You'll need to provide:
-
--   `name`
--   `description`
--   `params_json_schema`, which is the JSON schema for the arguments
--   `on_invoke_tool`, which is an async function that receives the context and the arguments as a JSON string, and must return the tool output as a string.
-
-```python
-from typing import Any
-
-from pydantic import BaseModel
-
-from agents import RunContextWrapper, FunctionTool
-
-
-
-def do_some_work(data: str) -> str:
-    return "done"
-
-
-class FunctionArgs(BaseModel):
-    username: str
-    age: int
-
-
-async def run_function(ctx: RunContextWrapper[Any], args: str) -> str:
-    parsed = FunctionArgs.model_validate_json(args)
-    return do_some_work(data=f"{parsed.username} is {parsed.age} years old")
-
-
-tool = FunctionTool(
-    name="process_user",
-    description="Processes extracted user data",
-    params_json_schema=FunctionArgs.model_json_schema(),
-    on_invoke_tool=run_function,
-)
-```
-
-### Automatic argument and docstring parsing
-
-As mentioned before, we automatically parse the function signature to extract the schema for the tool, and we parse the docstring to extract descriptions for the tool and for individual arguments. Some notes on that:
-
-1. The signature parsing is done via the `inspect` module. We use type annotations to understand the types for the arguments, and dynamically build a Pydantic model to represent the overall schema. It supports most types, including Python primitives, Pydantic models, TypedDicts, and more.
-2. We use `griffe` to parse docstrings. Supported docstring formats are `google`, `sphinx` and `numpy`. We attempt to automatically detect the docstring format, but this is best-effort and you can explicitly set it when calling `function_tool`. You can also disable docstring parsing by setting `use_docstring_info` to `False`.
-
-The code for the schema extraction lives in [`agents.function_schema`][].
-
-## Agents as tools
-
-In some workflows, you may want a central agent to orchestrate a network of specialized agents, instead of handing off control. You can do this by modeling agents as tools.
-
-```python
-from agents import Agent, Runner
-import asyncio
-
-spanish_agent = Agent(
-    name="Spanish agent",
-    instructions="You translate the user's message to Spanish",
-)
-
-french_agent = Agent(
-    name="French agent",
-    instructions="You translate the user's message to French",
-)
-
-orchestrator_agent = Agent(
-    name="orchestrator_agent",
-    instructions=(
-        "You are a translation agent. You use the tools given to you to translate."
-        "If asked for multiple translations, you call the relevant tools."
-    ),
-    tools=[
-        spanish_agent.as_tool(
-            tool_name="translate_to_spanish",
-            tool_description="Translate the user's message to Spanish",
-        ),
-        french_agent.as_tool(
-            tool_name="translate_to_french",
-            tool_description="Translate the user's message to French",
-        ),
-    ],
-)
-
-async def main():
-    result = await Runner.run(orchestrator_agent, input="Say 'Hello, how are you?' in Spanish.")
-    print(result.final_output)
-```
-
-## Handling errors in function tools
-
-When you create a function tool via `@function_tool`, you can pass a `failure_error_function`. This is a function that provides an error response to the LLM in case the tool call crashes.
-
--   By default (i.e. if you don't pass anything), it runs a `default_tool_error_function` which tells the LLM an error occurred.
--   If you pass your own error function, it runs that instead, and sends the response to the LLM.
--   If you explicitly pass `None`, then any tool call errors will be re-raised for you to handle. This could be a `ModelBehaviorError` if the model produced invalid JSON, or a `UserError` if your code crashed, etc.
-
-If you are manually creating a `FunctionTool` object, then you must handle errors inside the `on_invoke_tool` function.
diff --git a/tests/docs/tracing.md b/tests/docs/tracing.md
deleted file mode 100644
index fbf2ae41..00000000
--- a/tests/docs/tracing.md
+++ /dev/null
@@ -1,95 +0,0 @@
-# Tracing
-
-The Agents SDK includes built-in tracing, collecting a comprehensive record of events during an agent run: LLM generations, tool calls, handoffs, guardrails, and even custom events that occur. Using the [Traces dashboard](https://platform.openai.com/traces), you can debug, visualize, and monitor your workflows during development and in production.
-
-!!!note
-
-    Tracing is enabled by default. There are two ways to disable tracing:
-
-    1. You can globally disable tracing by setting the env var `OPENAI_AGENTS_DISABLE_TRACING=1`
-    2. You can disable tracing for a single run by setting [`agents.run.RunConfig.tracing_disabled`][] to `True`
-
-## Traces and spans
-
--   **Traces** represent a single end-to-end operation of a "workflow". They're composed of Spans. Traces have the following properties:
-    -   `workflow_name`: This is the logical workflow or app. For example "Code generation" or "Customer service".
-    -   `trace_id`: A unique ID for the trace. Automatically generated if you don't pass one. Must have the format `trace_<32_alphanumeric>`.
-    -   `group_id`: Optional group ID, to link multiple traces from the same conversation. For example, you might use a chat thread ID.
-    -   `disabled`: If True, the trace will not be recorded.
-    -   `metadata`: Optiona metadata for the trace.
--   **Spans** represent operations that have a start and end time. Spans have:
-    -   `started_at` and `ended_at` timestamps.
-    -   `trace_id`, to represent the trace they belong to
-    -   `parent_id`, which points to the parent Span of this Span (if any)
-    -   `span_data`, which is information about the Span. For example, `AgentSpanData` contains information about the Agent, `GenerationSpanData` contains information about the LLM generation, etc.
-
-## Default tracing
-
-By default, the SDK traces the following:
-
--   The entire `Runner.{run, run_sync, run_streamed}()` is wrapped in a `trace()`.
--   Each time an agent runs, it is wrapped in `agent_span()`
--   LLM generations are wrapped in `generation_span()`
--   Function tool calls are each wrapped in `function_span()`
--   Guardrails are wrapped in `guardrail_span()`
--   Handoffs are wrapped in `handoff_span()`
-
-By default, the trace is named "Agent trace". You can set this name if you use `trace`, or you can can configure the name and other properties with the [`RunConfig`][agents.run.RunConfig].
-
-In addition, you can set up [custom trace processors](#custom-tracing-processors) to push traces to other destinations (as a replacement, or secondary destination).
-
-## Higher level traces
-
-Sometimes, you might want multiple calls to `run()` to be part of a single trace. You can do this by wrapping the entire code in a `trace()`.
-
-```python
-from agents import Agent, Runner, trace
-
-async def main():
-    agent = Agent(name="Joke generator", instructions="Tell funny jokes.")
-
-    with trace("Joke workflow"): # (1)!
-        first_result = await Runner.run(agent, "Tell me a joke")
-        second_result = await Runner.run(agent, f"Rate this joke: {first_output.final_output}")
-        print(f"Joke: {first_result.final_output}")
-        print(f"Rating: {second_result.final_output}")
-```
-
-1. Because the two calls to `Runner.run` are wrapped in a `with trace()`, the individual runs will be part of the overall trace rather than creating two traces.
-
-## Creating traces
-
-You can use the [`trace()`][agents.tracing.trace] function to create a trace. Traces need to be started and finished. You have two options to do so:
-
-1. **Recommended**: use the trace as a context manager, i.e. `with trace(...) as my_trace`. This will automatically start and end the trace at the right time.
-2. You can also manually call [`trace.start()`][agents.tracing.Trace.start] and [`trace.finish()`][agents.tracing.Trace.finish].
-
-The current trace is tracked via a Python [`contextvar`](https://docs.python.org/3/library/contextvars.html). This means that it works with concurrency automatically. If you manually start/end a trace, you'll need to pass `mark_as_current` and `reset_current` to `start()`/`finish()` to update the current trace.
-
-## Creating spans
-
-You can use the various [`*_span()`][agents.tracing.create] methods to create a span. In general, you don't need to manually create spans. A [`custom_span()`][agents.tracing.custom_span] function is available for tracking custom span information.
-
-Spans are automatically part of the current trace, and are nested under the nearest current span, which is tracked via a Python [`contextvar`](https://docs.python.org/3/library/contextvars.html).
-
-## Sensitive data
-
-Some spans track potentially sensitive data. For example, the `generation_span()` stores the inputs/outputs of the LLM generation, and `function_span()` stores the inputs/outputs of function calls. These may contain sensitive data, so you can disable capturing that data via [`RunConfig.trace_include_sensitive_data`][agents.run.RunConfig.trace_include_sensitive_data].
-
-## Custom tracing processors
-
-The high level architecture for tracing is:
-
--   At initialization, we create a global [`TraceProvider`][agents.tracing.setup.TraceProvider], which is responsible for creating traces.
--   We configure the `TraceProvider` with a [`BatchTraceProcessor`][agents.tracing.processors.BatchTraceProcessor] that sends traces/spans in batches to a [`BackendSpanExporter`][agents.tracing.processors.BackendSpanExporter], which exports the spans and traces to the OpenAI backend in batches.
-
-To customize this default setup, to send traces to alternative or additional backends or modifying exporter behavior, you have two options:
-
-1. [`add_trace_processor()`][agents.tracing.add_trace_processor] lets you add an **additional** trace processor that will receive traces and spans as they are ready. This lets you do your own processing in addition to sending traces to OpenAI's backend.
-2. [`set_trace_processors()`][agents.tracing.set_trace_processors] lets you **replace** the default processors with your own trace processors. This means traces will not be sent to the OpenAI backend unless you include a `TracingProcessor` that does so.
-
-External trace processors include:
-
--   [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk)
--   [Pydantic Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents)
--   [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk)
diff --git a/tests/examples/__init__.py b/tests/examples/__init__.py
deleted file mode 100644
index e333a2e3..00000000
--- a/tests/examples/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Make the examples directory into a package to avoid top-level module name collisions.
-# This is needed so that mypy treats files like examples/customer_service/main.py and
-# examples/researcher_app/main.py as distinct modules rather than both named "main".
diff --git a/tests/examples/agent_patterns/README.md b/tests/examples/agent_patterns/README.md
deleted file mode 100644
index 4599b001..00000000
--- a/tests/examples/agent_patterns/README.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# Common agentic patterns
-
-This folder contains examples of different common patterns for agents.
-
-## Deterministic flows
-
-A common tactic is to break down a task into a series of smaller steps. Each task can be performed by an agent, and the output of one agent is used as input to the next. For example, if your task was to generate a story, you could break it down into the following steps:
-
-1. Generate an outline
-2. Generate the story
-3. Generate the ending
-
-Each of these steps can be performed by an agent. The output of one agent is used as input to the next.
-
-See the [`deterministic.py`](./deterministic.py) file for an example of this.
-
-## Handoffs and routing
-
-In many situations, you have specialized sub-agents that handle specific tasks. You can use handoffs to route the task to the right agent.
-
-For example, you might have a frontline agent that receives a request, and then hands off to a specialized agent based on the language of the request.
-See the [`routing.py`](./routing.py) file for an example of this.
-
-## Agents as tools
-
-The mental model for handoffs is that the new agent "takes over". It sees the previous conversation history, and owns the conversation from that point onwards. However, this is not the only way to use agents. You can also use agents as a tool - the tool agent goes off and runs on its own, and then returns the result to the original agent.
-
-For example, you could model the translation task above as tool calls instead: rather than handing over to the language-specific agent, you could call the agent as a tool, and then use the result in the next step. This enables things like translating multiple languages at once.
-
-See the [`agents_as_tools.py`](./agents_as_tools.py) file for an example of this.
-
-## LLM-as-a-judge
-
-LLMs can often improve the quality of their output if given feedback. A common pattern is to generate a response using a model, and then use a second model to provide feedback. You can even use a small model for the initial generation and a larger model for the feedback, to optimize cost.
-
-For example, you could use an LLM to generate an outline for a story, and then use a second LLM to evaluate the outline and provide feedback. You can then use the feedback to improve the outline, and repeat until the LLM is satisfied with the outline.
-
-See the [`llm_as_a_judge.py`](./llm_as_a_judge.py) file for an example of this.
-
-## Parallelization
-
-Running multiple agents in parallel is a common pattern. This can be useful for both latency (e.g. if you have multiple steps that don't depend on each other) and also for other reasons e.g. generating multiple responses and picking the best one.
-
-See the [`parallelization.py`](./parallelization.py) file for an example of this. It runs a translation agent multiple times in parallel, and then picks the best translation.
-
-## Guardrails
-
-Related to parallelization, you often want to run input guardrails to make sure the inputs to your agents are valid. For example, if you have a customer support agent, you might want to make sure that the user isn't trying to ask for help with a math problem.
-
-You can definitely do this without any special Agents SDK features by using parallelization, but we support a special guardrail primitive. Guardrails can have a "tripwire" - if the tripwire is triggered, the agent execution will immediately stop and a `GuardrailTripwireTriggered` exception will be raised.
-
-This is really useful for latency: for example, you might have a very fast model that runs the guardrail and a slow model that runs the actual agent. You wouldn't want to wait for the slow model to finish, so guardrails let you quickly reject invalid inputs.
-
-See the [`guardrails.py`](./guardrails.py) file for an example of this.
diff --git a/tests/examples/agent_patterns/agents_as_tools.py b/tests/examples/agent_patterns/agents_as_tools.py
deleted file mode 100644
index 9fd118ef..00000000
--- a/tests/examples/agent_patterns/agents_as_tools.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import asyncio
-
-from agents import Agent, ItemHelpers, MessageOutputItem, Runner, trace
-
-"""
-This example shows the agents-as-tools pattern. The frontline agent receives a user message and
-then picks which agents to call, as tools. In this case, it picks from a set of translation
-agents.
-"""
-
-spanish_agent = Agent(
-    name="spanish_agent",
-    instructions="You translate the user's message to Spanish",
-    handoff_description="An english to spanish translator",
-)
-
-french_agent = Agent(
-    name="french_agent",
-    instructions="You translate the user's message to French",
-    handoff_description="An english to french translator",
-)
-
-italian_agent = Agent(
-    name="italian_agent",
-    instructions="You translate the user's message to Italian",
-    handoff_description="An english to italian translator",
-)
-
-orchestrator_agent = Agent(
-    name="orchestrator_agent",
-    instructions=(
-        "You are a translation agent. You use the tools given to you to translate."
-        "If asked for multiple translations, you call the relevant tools in order."
-        "You never translate on your own, you always use the provided tools."
-    ),
-    tools=[
-        spanish_agent.as_tool(
-            tool_name="translate_to_spanish",
-            tool_description="Translate the user's message to Spanish",
-        ),
-        french_agent.as_tool(
-            tool_name="translate_to_french",
-            tool_description="Translate the user's message to French",
-        ),
-        italian_agent.as_tool(
-            tool_name="translate_to_italian",
-            tool_description="Translate the user's message to Italian",
-        ),
-    ],
-)
-
-synthesizer_agent = Agent(
-    name="synthesizer_agent",
-    instructions="You inspect translations, correct them if needed, and produce a final concatenated response.",
-)
-
-
-async def main():
-    msg = input("Hi! What would you like translated, and to which languages? ")
-
-    # Run the entire orchestration in a single trace
-    with trace("Orchestrator evaluator"):
-        orchestrator_result = await Runner.run(orchestrator_agent, msg)
-
-        for item in orchestrator_result.new_items:
-            if isinstance(item, MessageOutputItem):
-                text = ItemHelpers.text_message_output(item)
-                if text:
-                    print(f"  - Translation step: {text}")
-
-        synthesizer_result = await Runner.run(
-            synthesizer_agent, orchestrator_result.to_input_list()
-        )
-
-    print(f"\n\nFinal response:\n{synthesizer_result.final_output}")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/agent_patterns/deterministic.py b/tests/examples/agent_patterns/deterministic.py
deleted file mode 100644
index 0c163afe..00000000
--- a/tests/examples/agent_patterns/deterministic.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import asyncio
-
-from pydantic import BaseModel
-
-from agents import Agent, Runner, trace
-
-"""
-This example demonstrates a deterministic flow, where each step is performed by an agent.
-1. The first agent generates a story outline
-2. We feed the outline into the second agent
-3. The second agent checks if the outline is good quality and if it is a scifi story
-4. If the outline is not good quality or not a scifi story, we stop here
-5. If the outline is good quality and a scifi story, we feed the outline into the third agent
-6. The third agent writes the story
-"""
-
-story_outline_agent = Agent(
-    name="story_outline_agent",
-    instructions="Generate a very short story outline based on the user's input.",
-)
-
-
-class OutlineCheckerOutput(BaseModel):
-    good_quality: bool
-    is_scifi: bool
-
-
-outline_checker_agent = Agent(
-    name="outline_checker_agent",
-    instructions="Read the given story outline, and judge the quality. Also, determine if it is a scifi story.",
-    output_type=OutlineCheckerOutput,
-)
-
-story_agent = Agent(
-    name="story_agent",
-    instructions="Write a short story based on the given outline.",
-    output_type=str,
-)
-
-
-async def main():
-    input_prompt = input("What kind of story do you want? ")
-
-    # Ensure the entire workflow is a single trace
-    with trace("Deterministic story flow"):
-        # 1. Generate an outline
-        outline_result = await Runner.run(
-            story_outline_agent,
-            input_prompt,
-        )
-        print("Outline generated")
-
-        # 2. Check the outline
-        outline_checker_result = await Runner.run(
-            outline_checker_agent,
-            outline_result.final_output,
-        )
-
-        # 3. Add a gate to stop if the outline is not good quality or not a scifi story
-        assert isinstance(outline_checker_result.final_output, OutlineCheckerOutput)
-        if not outline_checker_result.final_output.good_quality:
-            print("Outline is not good quality, so we stop here.")
-            exit(0)
-
-        if not outline_checker_result.final_output.is_scifi:
-            print("Outline is not a scifi story, so we stop here.")
-            exit(0)
-
-        print("Outline is good quality and a scifi story, so we continue to write the story.")
-
-        # 4. Write the story
-        story_result = await Runner.run(
-            story_agent,
-            outline_result.final_output,
-        )
-        print(f"Story: {story_result.final_output}")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/agent_patterns/input_guardrails.py b/tests/examples/agent_patterns/input_guardrails.py
deleted file mode 100644
index 62591886..00000000
--- a/tests/examples/agent_patterns/input_guardrails.py
+++ /dev/null
@@ -1,105 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-
-from pydantic import BaseModel
-
-from agents import (
-    Agent,
-    GuardrailFunctionOutput,
-    InputGuardrailTripwireTriggered,
-    RunContextWrapper,
-    Runner,
-    TResponseInputItem,
-    input_guardrail,
-)
-
-"""
-This example shows how to use guardrails.
-
-Guardrails are checks that run in parallel to the agent's execution.
-They can be used to do things like:
-- Check if input messages are off-topic
-- Check that output messages don't violate any policies
-- Take over control of the agent's execution if an unexpected input is detected
-
-In this example, we'll setup an input guardrail that trips if the user is asking to do math homework.
-If the guardrail trips, we'll respond with a refusal message.
-"""
-
-
-### 1. An agent-based guardrail that is triggered if the user is asking to do math homework
-class MathHomeworkOutput(BaseModel):
-    is_math_homework: bool
-    reasoning: str
-
-
-guardrail_agent = Agent(
-    name="Guardrail check",
-    instructions="Check if the user is asking you to do their math homework.",
-    output_type=MathHomeworkOutput,
-)
-
-
-@input_guardrail
-async def math_guardrail(
-    context: RunContextWrapper[None], agent: Agent, input: str | list[TResponseInputItem]
-) -> GuardrailFunctionOutput:
-    """This is an input guardrail function, which happens to call an agent to check if the input
-    is a math homework question.
-    """
-    result = await Runner.run(guardrail_agent, input, context=context.context)
-    final_output = result.final_output_as(MathHomeworkOutput)
-
-    return GuardrailFunctionOutput(
-        output_info=final_output,
-        tripwire_triggered=not final_output.is_math_homework,
-    )
-
-
-### 2. The run loop
-
-
-async def main():
-    agent = Agent(
-        name="Customer support agent",
-        instructions="You are a customer support agent. You help customers with their questions.",
-        input_guardrails=[math_guardrail],
-    )
-
-    input_data: list[TResponseInputItem] = []
-
-    while True:
-        user_input = input("Enter a message: ")
-        input_data.append(
-            {
-                "role": "user",
-                "content": user_input,
-            }
-        )
-
-        try:
-            result = await Runner.run(agent, input_data)
-            print(result.final_output)
-            # If the guardrail didn't trigger, we use the result as the input for the next run
-            input_data = result.to_input_list()
-        except InputGuardrailTripwireTriggered:
-            # If the guardrail triggered, we instead add a refusal message to the input
-            message = "Sorry, I can't help you with your math homework."
-            print(message)
-            input_data.append(
-                {
-                    "role": "assistant",
-                    "content": message,
-                }
-            )
-
-    # Sample run:
-    # Enter a message: What's the capital of California?
-    # The capital of California is Sacramento.
-    # Enter a message: Can you help me solve for x: 2x + 5 = 11
-    # Sorry, I can't help you with your math homework.
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/agent_patterns/llm_as_a_judge.py b/tests/examples/agent_patterns/llm_as_a_judge.py
deleted file mode 100644
index d13a67cb..00000000
--- a/tests/examples/agent_patterns/llm_as_a_judge.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from dataclasses import dataclass
-from typing import Literal
-
-from agents import Agent, ItemHelpers, Runner, TResponseInputItem, trace
-
-"""
-This example shows the LLM as a judge pattern. The first agent generates an outline for a story.
-The second agent judges the outline and provides feedback. We loop until the judge is satisfied
-with the outline.
-"""
-
-story_outline_generator = Agent(
-    name="story_outline_generator",
-    instructions=(
-        "You generate a very short story outline based on the user's input."
-        "If there is any feedback provided, use it to improve the outline."
-    ),
-)
-
-
-@dataclass
-class EvaluationFeedback:
-    score: Literal["pass", "needs_improvement", "fail"]
-    feedback: str
-
-
-evaluator = Agent[None](
-    name="evaluator",
-    instructions=(
-        "You evaluate a story outline and decide if it's good enough."
-        "If it's not good enough, you provide feedback on what needs to be improved."
-        "Never give it a pass on the first try."
-    ),
-    output_type=EvaluationFeedback,
-)
-
-
-async def main() -> None:
-    msg = input("What kind of story would you like to hear? ")
-    input_items: list[TResponseInputItem] = [{"content": msg, "role": "user"}]
-
-    latest_outline: str | None = None
-
-    # We'll run the entire workflow in a single trace
-    with trace("LLM as a judge"):
-        while True:
-            story_outline_result = await Runner.run(
-                story_outline_generator,
-                input_items,
-            )
-
-            input_items = story_outline_result.to_input_list()
-            latest_outline = ItemHelpers.text_message_outputs(story_outline_result.new_items)
-            print("Story outline generated")
-
-            evaluator_result = await Runner.run(evaluator, input_items)
-            result: EvaluationFeedback = evaluator_result.final_output
-
-            print(f"Evaluator score: {result.score}")
-
-            if result.score == "pass":
-                print("Story outline is good enough, exiting.")
-                break
-
-            print("Re-running with feedback")
-
-            input_items.append({"content": f"Feedback: {result.feedback}", "role": "user"})
-
-    print(f"Final story outline: {latest_outline}")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/agent_patterns/output_guardrails.py b/tests/examples/agent_patterns/output_guardrails.py
deleted file mode 100644
index 526a0852..00000000
--- a/tests/examples/agent_patterns/output_guardrails.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import json
-
-from pydantic import BaseModel, Field
-
-from agents import (
-    Agent,
-    GuardrailFunctionOutput,
-    OutputGuardrailTripwireTriggered,
-    RunContextWrapper,
-    Runner,
-    output_guardrail,
-)
-
-"""
-This example shows how to use output guardrails.
-
-Output guardrails are checks that run on the final output of an agent.
-They can be used to do things like:
-- Check if the output contains sensitive data
-- Check if the output is a valid response to the user's message
-
-In this example, we'll use a (contrived) example where we check if the agent's response contains
-a phone number.
-"""
-
-
-# The agent's output type
-class MessageOutput(BaseModel):
-    reasoning: str = Field(description="Thoughts on how to respond to the user's message")
-    response: str = Field(description="The response to the user's message")
-    user_name: str | None = Field(description="The name of the user who sent the message, if known")
-
-
-@output_guardrail
-async def sensitive_data_check(
-    context: RunContextWrapper, agent: Agent, output: MessageOutput
-) -> GuardrailFunctionOutput:
-    phone_number_in_response = "650" in output.response
-    phone_number_in_reasoning = "650" in output.reasoning
-
-    return GuardrailFunctionOutput(
-        output_info={
-            "phone_number_in_response": phone_number_in_response,
-            "phone_number_in_reasoning": phone_number_in_reasoning,
-        },
-        tripwire_triggered=phone_number_in_response or phone_number_in_reasoning,
-    )
-
-
-agent = Agent(
-    name="Assistant",
-    instructions="You are a helpful assistant.",
-    output_type=MessageOutput,
-    output_guardrails=[sensitive_data_check],
-)
-
-
-async def main():
-    # This should be ok
-    await Runner.run(agent, "What's the capital of California?")
-    print("First message passed")
-
-    # This should trip the guardrail
-    try:
-        result = await Runner.run(
-            agent, "My phone number is 650-123-4567. Where do you think I live?"
-        )
-        print(
-            f"Guardrail didn't trip - this is unexpected. Output: {json.dumps(result.final_output.model_dump(), indent=2)}"
-        )
-
-    except OutputGuardrailTripwireTriggered as e:
-        print(f"Guardrail tripped. Info: {e.guardrail_result.output.output_info}")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/agent_patterns/parallelization.py b/tests/examples/agent_patterns/parallelization.py
deleted file mode 100644
index fe2a8ecd..00000000
--- a/tests/examples/agent_patterns/parallelization.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import asyncio
-
-from agents import Agent, ItemHelpers, Runner, trace
-
-"""
-This example shows the parallelization pattern. We run the agent three times in parallel, and pick
-the best result.
-"""
-
-spanish_agent = Agent(
-    name="spanish_agent",
-    instructions="You translate the user's message to Spanish",
-)
-
-translation_picker = Agent(
-    name="translation_picker",
-    instructions="You pick the best Spanish translation from the given options.",
-)
-
-
-async def main():
-    msg = input("Hi! Enter a message, and we'll translate it to Spanish.\n\n")
-
-    # Ensure the entire workflow is a single trace
-    with trace("Parallel translation"):
-        res_1, res_2, res_3 = await asyncio.gather(
-            Runner.run(
-                spanish_agent,
-                msg,
-            ),
-            Runner.run(
-                spanish_agent,
-                msg,
-            ),
-            Runner.run(
-                spanish_agent,
-                msg,
-            ),
-        )
-
-        outputs = [
-            ItemHelpers.text_message_outputs(res_1.new_items),
-            ItemHelpers.text_message_outputs(res_2.new_items),
-            ItemHelpers.text_message_outputs(res_3.new_items),
-        ]
-
-        translations = "\n\n".join(outputs)
-        print(f"\n\nTranslations:\n\n{translations}")
-
-        best_translation = await Runner.run(
-            translation_picker,
-            f"Input: {msg}\n\nTranslations:\n{translations}",
-        )
-
-    print("\n\n-----")
-
-    print(f"Best translation: {best_translation.final_output}")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/agent_patterns/routing.py b/tests/examples/agent_patterns/routing.py
deleted file mode 100644
index 3dcaefa9..00000000
--- a/tests/examples/agent_patterns/routing.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import asyncio
-import uuid
-
-from openai.types.responses import ResponseContentPartDoneEvent, ResponseTextDeltaEvent
-
-from agents import Agent, RawResponsesStreamEvent, Runner, TResponseInputItem, trace
-
-"""
-This example shows the handoffs/routing pattern. The triage agent receives the first message, and
-then hands off to the appropriate agent based on the language of the request. Responses are
-streamed to the user.
-"""
-
-french_agent = Agent(
-    name="french_agent",
-    instructions="You only speak French",
-)
-
-spanish_agent = Agent(
-    name="spanish_agent",
-    instructions="You only speak Spanish",
-)
-
-english_agent = Agent(
-    name="english_agent",
-    instructions="You only speak English",
-)
-
-triage_agent = Agent(
-    name="triage_agent",
-    instructions="Handoff to the appropriate agent based on the language of the request.",
-    handoffs=[french_agent, spanish_agent, english_agent],
-)
-
-
-async def main():
-    # We'll create an ID for this conversation, so we can link each trace
-    conversation_id = str(uuid.uuid4().hex[:16])
-
-    msg = input("Hi! We speak French, Spanish and English. How can I help? ")
-    agent = triage_agent
-    inputs: list[TResponseInputItem] = [{"content": msg, "role": "user"}]
-
-    while True:
-        # Each conversation turn is a single trace. Normally, each input from the user would be an
-        # API request to your app, and you can wrap the request in a trace()
-        with trace("Routing example", group_id=conversation_id):
-            result = Runner.run_streamed(
-                agent,
-                input=inputs,
-            )
-            async for event in result.stream_events():
-                if not isinstance(event, RawResponsesStreamEvent):
-                    continue
-                data = event.data
-                if isinstance(data, ResponseTextDeltaEvent):
-                    print(data.delta, end="", flush=True)
-                elif isinstance(data, ResponseContentPartDoneEvent):
-                    print("\n")
-
-        inputs = result.to_input_list()
-        print("\n")
-
-        user_msg = input("Enter a message: ")
-        inputs.append({"content": user_msg, "role": "user"})
-        agent = result.current_agent
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/basic/agent_lifecycle_example.py b/tests/examples/basic/agent_lifecycle_example.py
deleted file mode 100644
index bc0bbe43..00000000
--- a/tests/examples/basic/agent_lifecycle_example.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import asyncio
-import random
-from typing import Any
-
-from pydantic import BaseModel
-
-from agents import Agent, AgentHooks, RunContextWrapper, Runner, Tool, function_tool
-
-
-class CustomAgentHooks(AgentHooks):
-    def __init__(self, display_name: str):
-        self.event_counter = 0
-        self.display_name = display_name
-
-    async def on_start(self, context: RunContextWrapper, agent: Agent) -> None:
-        self.event_counter += 1
-        print(f"### ({self.display_name}) {self.event_counter}: Agent {agent.name} started")
-
-    async def on_end(self, context: RunContextWrapper, agent: Agent, output: Any) -> None:
-        self.event_counter += 1
-        print(
-            f"### ({self.display_name}) {self.event_counter}: Agent {agent.name} ended with output {output}"
-        )
-
-    async def on_handoff(self, context: RunContextWrapper, agent: Agent, source: Agent) -> None:
-        self.event_counter += 1
-        print(
-            f"### ({self.display_name}) {self.event_counter}: Agent {source.name} handed off to {agent.name}"
-        )
-
-    async def on_tool_start(self, context: RunContextWrapper, agent: Agent, tool: Tool) -> None:
-        self.event_counter += 1
-        print(
-            f"### ({self.display_name}) {self.event_counter}: Agent {agent.name} started tool {tool.name}"
-        )
-
-    async def on_tool_end(
-        self, context: RunContextWrapper, agent: Agent, tool: Tool, result: str
-    ) -> None:
-        self.event_counter += 1
-        print(
-            f"### ({self.display_name}) {self.event_counter}: Agent {agent.name} ended tool {tool.name} with result {result}"
-        )
-
-
-###
-
-
-@function_tool
-def random_number(max: int) -> int:
-    """
-    Generate a random number up to the provided maximum.
-    """
-    return random.randint(0, max)
-
-
-@function_tool
-def multiply_by_two(x: int) -> int:
-    """Simple multiplication by two."""
-    return x * 2
-
-
-class FinalResult(BaseModel):
-    number: int
-
-
-multiply_agent = Agent(
-    name="Multiply Agent",
-    instructions="Multiply the number by 2 and then return the final result.",
-    tools=[multiply_by_two],
-    output_type=FinalResult,
-    hooks=CustomAgentHooks(display_name="Multiply Agent"),
-)
-
-start_agent = Agent(
-    name="Start Agent",
-    instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multipler agent.",
-    tools=[random_number],
-    output_type=FinalResult,
-    handoffs=[multiply_agent],
-    hooks=CustomAgentHooks(display_name="Start Agent"),
-)
-
-
-async def main() -> None:
-    user_input = input("Enter a max number: ")
-    await Runner.run(
-        start_agent,
-        input=f"Generate a random number between 0 and {user_input}.",
-    )
-
-    print("Done!")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-"""
-$ python examples/basic/agent_lifecycle_example.py
-
-Enter a max number: 250
-### (Start Agent) 1: Agent Start Agent started
-### (Start Agent) 2: Agent Start Agent started tool random_number
-### (Start Agent) 3: Agent Start Agent ended tool random_number with result 37
-### (Start Agent) 4: Agent Start Agent started
-### (Start Agent) 5: Agent Start Agent handed off to Multiply Agent
-### (Multiply Agent) 1: Agent Multiply Agent started
-### (Multiply Agent) 2: Agent Multiply Agent started tool multiply_by_two
-### (Multiply Agent) 3: Agent Multiply Agent ended tool multiply_by_two with result 74
-### (Multiply Agent) 4: Agent Multiply Agent started
-### (Multiply Agent) 5: Agent Multiply Agent ended with output number=74
-Done!
-"""
diff --git a/tests/examples/basic/dynamic_system_prompt.py b/tests/examples/basic/dynamic_system_prompt.py
deleted file mode 100644
index 7bcf90c0..00000000
--- a/tests/examples/basic/dynamic_system_prompt.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import asyncio
-import random
-from typing import Literal
-
-from agents import Agent, RunContextWrapper, Runner
-
-
-class CustomContext:
-    def __init__(self, style: Literal["haiku", "pirate", "robot"]):
-        self.style = style
-
-
-def custom_instructions(
-    run_context: RunContextWrapper[CustomContext], agent: Agent[CustomContext]
-) -> str:
-    context = run_context.context
-    if context.style == "haiku":
-        return "Only respond in haikus."
-    elif context.style == "pirate":
-        return "Respond as a pirate."
-    else:
-        return "Respond as a robot and say 'beep boop' a lot."
-
-
-agent = Agent(
-    name="Chat agent",
-    instructions=custom_instructions,
-)
-
-
-async def main():
-    choice: Literal["haiku", "pirate", "robot"] = random.choice(["haiku", "pirate", "robot"])
-    context = CustomContext(style=choice)
-    print(f"Using style: {choice}\n")
-
-    user_message = "Tell me a joke."
-    print(f"User: {user_message}")
-    result = await Runner.run(agent, user_message, context=context)
-
-    print(f"Assistant: {result.final_output}")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
-"""
-$ python examples/basic/dynamic_system_prompt.py
-
-Using style: haiku
-
-User: Tell me a joke.
-Assistant: Why don't eggs tell jokes?
-They might crack each other's shells,
-leaving yolk on face.
-
-$ python examples/basic/dynamic_system_prompt.py
-Using style: robot
-
-User: Tell me a joke.
-Assistant: Beep boop! Why was the robot so bad at soccer? Beep boop... because it kept kicking up a debug! Beep boop!
-
-$ python examples/basic/dynamic_system_prompt.py
-Using style: pirate
-
-User: Tell me a joke.
-Assistant: Why did the pirate go to school?
-
-To improve his arrr-ticulation! Har har har! 🏴‍☠️
-"""
diff --git a/tests/examples/basic/hello_world.py b/tests/examples/basic/hello_world.py
deleted file mode 100644
index 169290d6..00000000
--- a/tests/examples/basic/hello_world.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import asyncio
-
-from agents import Agent, Runner
-
-
-async def main():
-    agent = Agent(
-        name="Assistant",
-        instructions="You only respond in haikus.",
-    )
-
-    result = await Runner.run(agent, "Tell me about recursion in programming.")
-    print(result.final_output)
-    # Function calls itself,
-    # Looping in smaller pieces,
-    # Endless by design.
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/basic/lifecycle_example.py b/tests/examples/basic/lifecycle_example.py
deleted file mode 100644
index 9b365106..00000000
--- a/tests/examples/basic/lifecycle_example.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import asyncio
-import random
-from typing import Any
-
-from pydantic import BaseModel
-
-from agents import Agent, RunContextWrapper, RunHooks, Runner, Tool, Usage, function_tool
-
-
-class ExampleHooks(RunHooks):
-    def __init__(self):
-        self.event_counter = 0
-
-    def _usage_to_str(self, usage: Usage) -> str:
-        return f"{usage.requests} requests, {usage.input_tokens} input tokens, {usage.output_tokens} output tokens, {usage.total_tokens} total tokens"
-
-    async def on_agent_start(self, context: RunContextWrapper, agent: Agent) -> None:
-        self.event_counter += 1
-        print(
-            f"### {self.event_counter}: Agent {agent.name} started. Usage: {self._usage_to_str(context.usage)}"
-        )
-
-    async def on_agent_end(self, context: RunContextWrapper, agent: Agent, output: Any) -> None:
-        self.event_counter += 1
-        print(
-            f"### {self.event_counter}: Agent {agent.name} ended with output {output}. Usage: {self._usage_to_str(context.usage)}"
-        )
-
-    async def on_tool_start(self, context: RunContextWrapper, agent: Agent, tool: Tool) -> None:
-        self.event_counter += 1
-        print(
-            f"### {self.event_counter}: Tool {tool.name} started. Usage: {self._usage_to_str(context.usage)}"
-        )
-
-    async def on_tool_end(
-        self, context: RunContextWrapper, agent: Agent, tool: Tool, result: str
-    ) -> None:
-        self.event_counter += 1
-        print(
-            f"### {self.event_counter}: Tool {tool.name} ended with result {result}. Usage: {self._usage_to_str(context.usage)}"
-        )
-
-    async def on_handoff(
-        self, context: RunContextWrapper, from_agent: Agent, to_agent: Agent
-    ) -> None:
-        self.event_counter += 1
-        print(
-            f"### {self.event_counter}: Handoff from {from_agent.name} to {to_agent.name}. Usage: {self._usage_to_str(context.usage)}"
-        )
-
-
-hooks = ExampleHooks()
-
-###
-
-
-@function_tool
-def random_number(max: int) -> int:
-    """Generate a random number up to the provided max."""
-    return random.randint(0, max)
-
-
-@function_tool
-def multiply_by_two(x: int) -> int:
-    """Return x times two."""
-    return x * 2
-
-
-class FinalResult(BaseModel):
-    number: int
-
-
-multiply_agent = Agent(
-    name="Multiply Agent",
-    instructions="Multiply the number by 2 and then return the final result.",
-    tools=[multiply_by_two],
-    output_type=FinalResult,
-)
-
-start_agent = Agent(
-    name="Start Agent",
-    instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multipler agent.",
-    tools=[random_number],
-    output_type=FinalResult,
-    handoffs=[multiply_agent],
-)
-
-
-async def main() -> None:
-    user_input = input("Enter a max number: ")
-    await Runner.run(
-        start_agent,
-        hooks=hooks,
-        input=f"Generate a random number between 0 and {user_input}.",
-    )
-
-    print("Done!")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-"""
-$ python examples/basic/lifecycle_example.py
-
-Enter a max number: 250
-### 1: Agent Start Agent started. Usage: 0 requests, 0 input tokens, 0 output tokens, 0 total tokens
-### 2: Tool random_number started. Usage: 1 requests, 148 input tokens, 15 output tokens, 163 total tokens
-### 3: Tool random_number ended with result 101. Usage: 1 requests, 148 input tokens, 15 output tokens, 163 total tokens
-### 4: Agent Start Agent started. Usage: 1 requests, 148 input tokens, 15 output tokens, 163 total tokens
-### 5: Handoff from Start Agent to Multiply Agent. Usage: 2 requests, 323 input tokens, 30 output tokens, 353 total tokens
-### 6: Agent Multiply Agent started. Usage: 2 requests, 323 input tokens, 30 output tokens, 353 total tokens
-### 7: Tool multiply_by_two started. Usage: 3 requests, 504 input tokens, 46 output tokens, 550 total tokens
-### 8: Tool multiply_by_two ended with result 202. Usage: 3 requests, 504 input tokens, 46 output tokens, 550 total tokens
-### 9: Agent Multiply Agent started. Usage: 3 requests, 504 input tokens, 46 output tokens, 550 total tokens
-### 10: Agent Multiply Agent ended with output number=202. Usage: 4 requests, 714 input tokens, 63 output tokens, 777 total tokens
-Done!
-
-"""
diff --git a/tests/examples/basic/stream_items.py b/tests/examples/basic/stream_items.py
deleted file mode 100644
index c1f2257a..00000000
--- a/tests/examples/basic/stream_items.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import asyncio
-import random
-
-from agents import Agent, ItemHelpers, Runner, function_tool
-
-
-@function_tool
-def how_many_jokes() -> int:
-    return random.randint(1, 10)
-
-
-async def main():
-    agent = Agent(
-        name="Joker",
-        instructions="First call the `how_many_jokes` tool, then tell that many jokes.",
-        tools=[how_many_jokes],
-    )
-
-    result = Runner.run_streamed(
-        agent,
-        input="Hello",
-    )
-    print("=== Run starting ===")
-    async for event in result.stream_events():
-        # We'll ignore the raw responses event deltas
-        if event.type == "raw_response_event":
-            continue
-        elif event.type == "agent_updated_stream_event":
-            print(f"Agent updated: {event.new_agent.name}")
-            continue
-        elif event.type == "run_item_stream_event":
-            if event.item.type == "tool_call_item":
-                print("-- Tool was called")
-            elif event.item.type == "tool_call_output_item":
-                print(f"-- Tool output: {event.item.output}")
-            elif event.item.type == "message_output_item":
-                print(f"-- Message output:\n {ItemHelpers.text_message_output(event.item)}")
-            else:
-                pass  # Ignore other event types
-
-    print("=== Run complete ===")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
-    # === Run starting ===
-    # Agent updated: Joker
-    # -- Tool was called
-    # -- Tool output: 4
-    # -- Message output:
-    #  Sure, here are four jokes for you:
-
-    # 1. **Why don't skeletons fight each other?**
-    #    They don't have the guts!
-
-    # 2. **What do you call fake spaghetti?**
-    #    An impasta!
-
-    # 3. **Why did the scarecrow win an award?**
-    #    Because he was outstanding in his field!
-
-    # 4. **Why did the bicycle fall over?**
-    #    Because it was two-tired!
-    # === Run complete ===
diff --git a/tests/examples/basic/stream_text.py b/tests/examples/basic/stream_text.py
deleted file mode 100644
index a73c1fee..00000000
--- a/tests/examples/basic/stream_text.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import asyncio
-
-from openai.types.responses import ResponseTextDeltaEvent
-
-from agents import Agent, Runner
-
-
-async def main():
-    agent = Agent(
-        name="Joker",
-        instructions="You are a helpful assistant.",
-    )
-
-    result = Runner.run_streamed(agent, input="Please tell me 5 jokes.")
-    async for event in result.stream_events():
-        if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
-            print(event.data.delta, end="", flush=True)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/customer_service/main.py b/tests/examples/customer_service/main.py
deleted file mode 100644
index bd802e22..00000000
--- a/tests/examples/customer_service/main.py
+++ /dev/null
@@ -1,169 +0,0 @@
-from __future__ import annotations as _annotations
-
-import asyncio
-import random
-import uuid
-
-from pydantic import BaseModel
-
-from agents import (
-    Agent,
-    HandoffOutputItem,
-    ItemHelpers,
-    MessageOutputItem,
-    RunContextWrapper,
-    Runner,
-    ToolCallItem,
-    ToolCallOutputItem,
-    TResponseInputItem,
-    function_tool,
-    handoff,
-    trace,
-)
-from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX
-
-### CONTEXT
-
-
-class AirlineAgentContext(BaseModel):
-    passenger_name: str | None = None
-    confirmation_number: str | None = None
-    seat_number: str | None = None
-    flight_number: str | None = None
-
-
-### TOOLS
-
-
-@function_tool(
-    name_override="faq_lookup_tool", description_override="Lookup frequently asked questions."
-)
-async def faq_lookup_tool(question: str) -> str:
-    if "bag" in question or "baggage" in question:
-        return (
-            "You are allowed to bring one bag on the plane. "
-            "It must be under 50 pounds and 22 inches x 14 inches x 9 inches."
-        )
-    elif "seats" in question or "plane" in question:
-        return (
-            "There are 120 seats on the plane. "
-            "There are 22 business class seats and 98 economy seats. "
-            "Exit rows are rows 4 and 16. "
-            "Rows 5-8 are Economy Plus, with extra legroom. "
-        )
-    elif "wifi" in question:
-        return "We have free wifi on the plane, join Airline-Wifi"
-    return "I'm sorry, I don't know the answer to that question."
-
-
-@function_tool
-async def update_seat(
-    context: RunContextWrapper[AirlineAgentContext], confirmation_number: str, new_seat: str
-) -> str:
-    """
-    Update the seat for a given confirmation number.
-
-    Args:
-        confirmation_number: The confirmation number for the flight.
-        new_seat: The new seat to update to.
-    """
-    # Update the context based on the customer's input
-    context.context.confirmation_number = confirmation_number
-    context.context.seat_number = new_seat
-    # Ensure that the flight number has been set by the incoming handoff
-    assert context.context.flight_number is not None, "Flight number is required"
-    return f"Updated seat to {new_seat} for confirmation number {confirmation_number}"
-
-
-### HOOKS
-
-
-async def on_seat_booking_handoff(context: RunContextWrapper[AirlineAgentContext]) -> None:
-    flight_number = f"FLT-{random.randint(100, 999)}"
-    context.context.flight_number = flight_number
-
-
-### AGENTS
-
-faq_agent = Agent[AirlineAgentContext](
-    name="FAQ Agent",
-    handoff_description="A helpful agent that can answer questions about the airline.",
-    instructions=f"""{RECOMMENDED_PROMPT_PREFIX}
-    You are an FAQ agent. If you are speaking to a customer, you probably were transferred to from the triage agent.
-    Use the following routine to support the customer.
-    # Routine
-    1. Identify the last question asked by the customer.
-    2. Use the faq lookup tool to answer the question. Do not rely on your own knowledge.
-    3. If you cannot answer the question, transfer back to the triage agent.""",
-    tools=[faq_lookup_tool],
-)
-
-seat_booking_agent = Agent[AirlineAgentContext](
-    name="Seat Booking Agent",
-    handoff_description="A helpful agent that can update a seat on a flight.",
-    instructions=f"""{RECOMMENDED_PROMPT_PREFIX}
-    You are a seat booking agent. If you are speaking to a customer, you probably were transferred to from the triage agent.
-    Use the following routine to support the customer.
-    # Routine
-    1. Ask for their confirmation number.
-    2. Ask the customer what their desired seat number is.
-    3. Use the update seat tool to update the seat on the flight.
-    If the customer asks a question that is not related to the routine, transfer back to the triage agent. """,
-    tools=[update_seat],
-)
-
-triage_agent = Agent[AirlineAgentContext](
-    name="Triage Agent",
-    handoff_description="A triage agent that can delegate a customer's request to the appropriate agent.",
-    instructions=(
-        f"{RECOMMENDED_PROMPT_PREFIX} "
-        "You are a helpful triaging agent. You can use your tools to delegate questions to other appropriate agents."
-    ),
-    handoffs=[
-        faq_agent,
-        handoff(agent=seat_booking_agent, on_handoff=on_seat_booking_handoff),
-    ],
-)
-
-faq_agent.handoffs.append(triage_agent)
-seat_booking_agent.handoffs.append(triage_agent)
-
-
-### RUN
-
-
-async def main():
-    current_agent: Agent[AirlineAgentContext] = triage_agent
-    input_items: list[TResponseInputItem] = []
-    context = AirlineAgentContext()
-
-    # Normally, each input from the user would be an API request to your app, and you can wrap the request in a trace()
-    # Here, we'll just use a random UUID for the conversation ID
-    conversation_id = uuid.uuid4().hex[:16]
-
-    while True:
-        user_input = input("Enter your message: ")
-        with trace("Customer service", group_id=conversation_id):
-            input_items.append({"content": user_input, "role": "user"})
-            result = await Runner.run(current_agent, input_items, context=context)
-
-            for new_item in result.new_items:
-                agent_name = new_item.agent.name
-                if isinstance(new_item, MessageOutputItem):
-                    print(f"{agent_name}: {ItemHelpers.text_message_output(new_item)}")
-                elif isinstance(new_item, HandoffOutputItem):
-                    print(
-                        f"Handed off from {new_item.source_agent.name} to {new_item.target_agent.name}"
-                    )
-                elif isinstance(new_item, ToolCallItem):
-                    print(f"{agent_name}: Calling a tool")
-                elif isinstance(new_item, ToolCallOutputItem):
-                    print(f"{agent_name}: Tool call output: {new_item.output}")
-                else:
-                    print(f"{agent_name}: Skipping item: {new_item.__class__.__name__}")
-            input_items = result.to_input_list()
-            current_agent = result.last_agent
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/handoffs/message_filter.py b/tests/examples/handoffs/message_filter.py
deleted file mode 100644
index 9dd56ef7..00000000
--- a/tests/examples/handoffs/message_filter.py
+++ /dev/null
@@ -1,176 +0,0 @@
-from __future__ import annotations
-
-import json
-import random
-
-from agents import Agent, HandoffInputData, Runner, function_tool, handoff, trace
-from agents.extensions import handoff_filters
-
-
-@function_tool
-def random_number_tool(max: int) -> int:
-    """Return a random integer between 0 and the given maximum."""
-    return random.randint(0, max)
-
-
-def spanish_handoff_message_filter(handoff_message_data: HandoffInputData) -> HandoffInputData:
-    # First, we'll remove any tool-related messages from the message history
-    handoff_message_data = handoff_filters.remove_all_tools(handoff_message_data)
-
-    # Second, we'll also remove the first two items from the history, just for demonstration
-    history = (
-        tuple(handoff_message_data.input_history[2:])
-        if isinstance(handoff_message_data.input_history, tuple)
-        else handoff_message_data.input_history
-    )
-
-    return HandoffInputData(
-        input_history=history,
-        pre_handoff_items=tuple(handoff_message_data.pre_handoff_items),
-        new_items=tuple(handoff_message_data.new_items),
-    )
-
-
-first_agent = Agent(
-    name="Assistant",
-    instructions="Be extremely concise.",
-    tools=[random_number_tool],
-)
-
-spanish_agent = Agent(
-    name="Spanish Assistant",
-    instructions="You only speak Spanish and are extremely concise.",
-    handoff_description="A Spanish-speaking assistant.",
-)
-
-second_agent = Agent(
-    name="Assistant",
-    instructions=(
-        "Be a helpful assistant. If the user speaks Spanish, handoff to the Spanish assistant."
-    ),
-    handoffs=[handoff(spanish_agent, input_filter=spanish_handoff_message_filter)],
-)
-
-
-async def main():
-    # Trace the entire run as a single workflow
-    with trace(workflow_name="Message filtering"):
-        # 1. Send a regular message to the first agent
-        result = await Runner.run(first_agent, input="Hi, my name is Sora.")
-
-        print("Step 1 done")
-
-        # 2. Ask it to square a number
-        result = await Runner.run(
-            second_agent,
-            input=result.to_input_list()
-            + [{"content": "Can you generate a random number between 0 and 100?", "role": "user"}],
-        )
-
-        print("Step 2 done")
-
-        # 3. Call the second agent
-        result = await Runner.run(
-            second_agent,
-            input=result.to_input_list()
-            + [
-                {
-                    "content": "I live in New York City. Whats the population of the city?",
-                    "role": "user",
-                }
-            ],
-        )
-
-        print("Step 3 done")
-
-        # 4. Cause a handoff to occur
-        result = await Runner.run(
-            second_agent,
-            input=result.to_input_list()
-            + [
-                {
-                    "content": "Por favor habla en español. ¿Cuál es mi nombre y dónde vivo?",
-                    "role": "user",
-                }
-            ],
-        )
-
-        print("Step 4 done")
-
-    print("\n===Final messages===\n")
-
-    # 5. That should have caused spanish_handoff_message_filter to be called, which means the
-    # output should be missing the first two messages, and have no tool calls.
-    # Let's print the messages to see what happened
-    for message in result.to_input_list():
-        print(json.dumps(message, indent=2))
-        # tool_calls = message.tool_calls if isinstance(message, AssistantMessage) else None
-
-        # print(f"{message.role}: {message.content}\n  - Tool calls: {tool_calls or 'None'}")
-        """
-        $python examples/handoffs/message_filter.py
-        Step 1 done
-        Step 2 done
-        Step 3 done
-        Step 4 done
-
-        ===Final messages===
-
-        {
-            "content": "Can you generate a random number between 0 and 100?",
-            "role": "user"
-        }
-        {
-        "id": "...",
-        "content": [
-            {
-            "annotations": [],
-            "text": "Sure! Here's a random number between 0 and 100: **42**.",
-            "type": "output_text"
-            }
-        ],
-        "role": "assistant",
-        "status": "completed",
-        "type": "message"
-        }
-        {
-        "content": "I live in New York City. Whats the population of the city?",
-        "role": "user"
-        }
-        {
-        "id": "...",
-        "content": [
-            {
-            "annotations": [],
-            "text": "As of the most recent estimates, the population of New York City is approximately 8.6 million people. However, this number is constantly changing due to various factors such as migration and birth rates. For the latest and most accurate information, it's always a good idea to check the official data from sources like the U.S. Census Bureau.",
-            "type": "output_text"
-            }
-        ],
-        "role": "assistant",
-        "status": "completed",
-        "type": "message"
-        }
-        {
-        "content": "Por favor habla en espa\u00f1ol. \u00bfCu\u00e1l es mi nombre y d\u00f3nde vivo?",
-        "role": "user"
-        }
-        {
-        "id": "...",
-        "content": [
-            {
-            "annotations": [],
-            "text": "No tengo acceso a esa informaci\u00f3n personal, solo s\u00e9 lo que me has contado: vives en Nueva York.",
-            "type": "output_text"
-            }
-        ],
-        "role": "assistant",
-        "status": "completed",
-        "type": "message"
-        }
-        """
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    asyncio.run(main())
diff --git a/tests/examples/handoffs/message_filter_streaming.py b/tests/examples/handoffs/message_filter_streaming.py
deleted file mode 100644
index 8d1b4208..00000000
--- a/tests/examples/handoffs/message_filter_streaming.py
+++ /dev/null
@@ -1,176 +0,0 @@
-from __future__ import annotations
-
-import json
-import random
-
-from agents import Agent, HandoffInputData, Runner, function_tool, handoff, trace
-from agents.extensions import handoff_filters
-
-
-@function_tool
-def random_number_tool(max: int) -> int:
-    """Return a random integer between 0 and the given maximum."""
-    return random.randint(0, max)
-
-
-def spanish_handoff_message_filter(handoff_message_data: HandoffInputData) -> HandoffInputData:
-    # First, we'll remove any tool-related messages from the message history
-    handoff_message_data = handoff_filters.remove_all_tools(handoff_message_data)
-
-    # Second, we'll also remove the first two items from the history, just for demonstration
-    history = (
-        tuple(handoff_message_data.input_history[2:])
-        if isinstance(handoff_message_data.input_history, tuple)
-        else handoff_message_data.input_history
-    )
-
-    return HandoffInputData(
-        input_history=history,
-        pre_handoff_items=tuple(handoff_message_data.pre_handoff_items),
-        new_items=tuple(handoff_message_data.new_items),
-    )
-
-
-first_agent = Agent(
-    name="Assistant",
-    instructions="Be extremely concise.",
-    tools=[random_number_tool],
-)
-
-spanish_agent = Agent(
-    name="Spanish Assistant",
-    instructions="You only speak Spanish and are extremely concise.",
-    handoff_description="A Spanish-speaking assistant.",
-)
-
-second_agent = Agent(
-    name="Assistant",
-    instructions=(
-        "Be a helpful assistant. If the user speaks Spanish, handoff to the Spanish assistant."
-    ),
-    handoffs=[handoff(spanish_agent, input_filter=spanish_handoff_message_filter)],
-)
-
-
-async def main():
-    # Trace the entire run as a single workflow
-    with trace(workflow_name="Streaming message filter"):
-        # 1. Send a regular message to the first agent
-        result = await Runner.run(first_agent, input="Hi, my name is Sora.")
-
-        print("Step 1 done")
-
-        # 2. Ask it to square a number
-        result = await Runner.run(
-            second_agent,
-            input=result.to_input_list()
-            + [{"content": "Can you generate a random number between 0 and 100?", "role": "user"}],
-        )
-
-        print("Step 2 done")
-
-        # 3. Call the second agent
-        result = await Runner.run(
-            second_agent,
-            input=result.to_input_list()
-            + [
-                {
-                    "content": "I live in New York City. Whats the population of the city?",
-                    "role": "user",
-                }
-            ],
-        )
-
-        print("Step 3 done")
-
-        # 4. Cause a handoff to occur
-        stream_result = Runner.run_streamed(
-            second_agent,
-            input=result.to_input_list()
-            + [
-                {
-                    "content": "Por favor habla en español. ¿Cuál es mi nombre y dónde vivo?",
-                    "role": "user",
-                }
-            ],
-        )
-        async for _ in stream_result.stream_events():
-            pass
-
-        print("Step 4 done")
-
-    print("\n===Final messages===\n")
-
-    # 5. That should have caused spanish_handoff_message_filter to be called, which means the
-    # output should be missing the first two messages, and have no tool calls.
-    # Let's print the messages to see what happened
-    for item in stream_result.to_input_list():
-        print(json.dumps(item, indent=2))
-        """
-        $python examples/handoffs/message_filter_streaming.py
-        Step 1 done
-        Step 2 done
-        Step 3 done
-        Tu nombre y lugar de residencia no los tengo disponibles. Solo sé que mencionaste vivir en la ciudad de Nueva York.
-        Step 4 done
-
-        ===Final messages===
-
-        {
-            "content": "Can you generate a random number between 0 and 100?",
-            "role": "user"
-            }
-            {
-            "id": "...",
-            "content": [
-                {
-                "annotations": [],
-                "text": "Sure! Here's a random number between 0 and 100: **37**.",
-                "type": "output_text"
-                }
-            ],
-            "role": "assistant",
-            "status": "completed",
-            "type": "message"
-            }
-            {
-            "content": "I live in New York City. Whats the population of the city?",
-            "role": "user"
-            }
-            {
-            "id": "...",
-            "content": [
-                {
-                "annotations": [],
-                "text": "As of the latest estimates, New York City's population is approximately 8.5 million people. Would you like more information about the city?",
-                "type": "output_text"
-                }
-            ],
-            "role": "assistant",
-            "status": "completed",
-            "type": "message"
-            }
-            {
-            "content": "Por favor habla en espa\u00f1ol. \u00bfCu\u00e1l es mi nombre y d\u00f3nde vivo?",
-            "role": "user"
-            }
-            {
-            "id": "...",
-            "content": [
-                {
-                "annotations": [],
-                "text": "No s\u00e9 tu nombre, pero me dijiste que vives en Nueva York.",
-                "type": "output_text"
-                }
-            ],
-            "role": "assistant",
-            "status": "completed",
-            "type": "message"
-            }
-        """
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    asyncio.run(main())
diff --git a/tests/examples/research_bot/README.md b/tests/examples/research_bot/README.md
deleted file mode 100644
index 4060983c..00000000
--- a/tests/examples/research_bot/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Research bot
-
-This is a simple example of a multi-agent research bot. To run it:
-
-```bash
-python -m examples.research_bot.main
-```
-
-## Architecture
-
-The flow is:
-
-1. User enters their research topic
-2. `planner_agent` comes up with a plan to search the web for information. The plan is a list of search queries, with a search term and a reason for each query.
-3. For each search item, we run a `search_agent`, which uses the Web Search tool to search for that term and summarize the results. These all run in parallel.
-4. Finally, the `writer_agent` receives the search summaries, and creates a written report.
-
-## Suggested improvements
-
-If you're building your own research bot, some ideas to add to this are:
-
-1. Retrieval: Add support for fetching relevant information from a vector store. You could use the File Search tool for this.
-2. Image and file upload: Allow users to attach PDFs or other files, as baseline context for the research.
-3. More planning and thinking: Models often produce better results given more time to think. Improve the planning process to come up with a better plan, and add an evaluation step so that the model can choose to improve it's results, search for more stuff, etc.
-4. Code execution: Allow running code, which is useful for data analysis.
diff --git a/tests/examples/research_bot/__init__.py b/tests/examples/research_bot/__init__.py
deleted file mode 100644
index 8b137891..00000000
--- a/tests/examples/research_bot/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/examples/research_bot/agents/__init__.py b/tests/examples/research_bot/agents/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/examples/research_bot/agents/planner_agent.py b/tests/examples/research_bot/agents/planner_agent.py
deleted file mode 100644
index e80a8e65..00000000
--- a/tests/examples/research_bot/agents/planner_agent.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from pydantic import BaseModel
-
-from agents import Agent
-
-PROMPT = (
-    "You are a helpful research assistant. Given a query, come up with a set of web searches "
-    "to perform to best answer the query. Output between 5 and 20 terms to query for."
-)
-
-
-class WebSearchItem(BaseModel):
-    reason: str
-    "Your reasoning for why this search is important to the query."
-
-    query: str
-    "The search term to use for the web search."
-
-
-class WebSearchPlan(BaseModel):
-    searches: list[WebSearchItem]
-    """A list of web searches to perform to best answer the query."""
-
-
-planner_agent = Agent(
-    name="PlannerAgent",
-    instructions=PROMPT,
-    model="gpt-4o",
-    output_type=WebSearchPlan,
-)
diff --git a/tests/examples/research_bot/agents/search_agent.py b/tests/examples/research_bot/agents/search_agent.py
deleted file mode 100644
index 72cbc8e1..00000000
--- a/tests/examples/research_bot/agents/search_agent.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from agents import Agent, WebSearchTool
-from agents.model_settings import ModelSettings
-
-INSTRUCTIONS = (
-    "You are a research assistant. Given a search term, you search the web for that term and"
-    "produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300"
-    "words. Capture the main points. Write succintly, no need to have complete sentences or good"
-    "grammar. This will be consumed by someone synthesizing a report, so its vital you capture the"
-    "essence and ignore any fluff. Do not include any additional commentary other than the summary"
-    "itself."
-)
-
-search_agent = Agent(
-    name="Search agent",
-    instructions=INSTRUCTIONS,
-    tools=[WebSearchTool()],
-    model_settings=ModelSettings(tool_choice="required"),
-)
diff --git a/tests/examples/research_bot/agents/writer_agent.py b/tests/examples/research_bot/agents/writer_agent.py
deleted file mode 100644
index 7b7d01a2..00000000
--- a/tests/examples/research_bot/agents/writer_agent.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Agent used to synthesize a final report from the individual summaries.
-from pydantic import BaseModel
-
-from agents import Agent
-
-PROMPT = (
-    "You are a senior researcher tasked with writing a cohesive report for a research query. "
-    "You will be provided with the original query, and some initial research done by a research "
-    "assistant.\n"
-    "You should first come up with an outline for the report that describes the structure and "
-    "flow of the report. Then, generate the report and return that as your final output.\n"
-    "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
-    "for 5-10 pages of content, at least 1000 words."
-)
-
-
-class ReportData(BaseModel):
-    short_summary: str
-    """A short 2-3 sentence summary of the findings."""
-
-    markdown_report: str
-    """The final report"""
-
-    follow_up_questions: list[str]
-    """Suggested topics to research further"""
-
-
-writer_agent = Agent(
-    name="WriterAgent",
-    instructions=PROMPT,
-    model="o3-mini",
-    output_type=ReportData,
-)
diff --git a/tests/examples/research_bot/main.py b/tests/examples/research_bot/main.py
deleted file mode 100644
index a0fd43dc..00000000
--- a/tests/examples/research_bot/main.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import asyncio
-
-from .manager import ResearchManager
-
-
-async def main() -> None:
-    query = input("What would you like to research? ")
-    await ResearchManager().run(query)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/research_bot/manager.py b/tests/examples/research_bot/manager.py
deleted file mode 100644
index 47306f14..00000000
--- a/tests/examples/research_bot/manager.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import time
-
-from rich.console import Console
-
-from agents import Runner, custom_span, gen_trace_id, trace
-
-from .agents.planner_agent import WebSearchItem, WebSearchPlan, planner_agent
-from .agents.search_agent import search_agent
-from .agents.writer_agent import ReportData, writer_agent
-from .printer import Printer
-
-
-class ResearchManager:
-    def __init__(self):
-        self.console = Console()
-        self.printer = Printer(self.console)
-
-    async def run(self, query: str) -> None:
-        trace_id = gen_trace_id()
-        with trace("Research trace", trace_id=trace_id):
-            self.printer.update_item(
-                "trace_id",
-                f"View trace: https://platform.openai.com/traces/{trace_id}",
-                is_done=True,
-                hide_checkmark=True,
-            )
-
-            self.printer.update_item(
-                "starting",
-                "Starting research...",
-                is_done=True,
-                hide_checkmark=True,
-            )
-            search_plan = await self._plan_searches(query)
-            search_results = await self._perform_searches(search_plan)
-            report = await self._write_report(query, search_results)
-
-            final_report = f"Report summary\n\n{report.short_summary}"
-            self.printer.update_item("final_report", final_report, is_done=True)
-
-            self.printer.end()
-
-        print("\n\n=====REPORT=====\n\n")
-        print(f"Report: {report.markdown_report}")
-        print("\n\n=====FOLLOW UP QUESTIONS=====\n\n")
-        follow_up_questions = "\n".join(report.follow_up_questions)
-        print(f"Follow up questions: {follow_up_questions}")
-
-    async def _plan_searches(self, query: str) -> WebSearchPlan:
-        self.printer.update_item("planning", "Planning searches...")
-        result = await Runner.run(
-            planner_agent,
-            f"Query: {query}",
-        )
-        self.printer.update_item(
-            "planning",
-            f"Will perform {len(result.final_output.searches)} searches",
-            is_done=True,
-        )
-        return result.final_output_as(WebSearchPlan)
-
-    async def _perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
-        with custom_span("Search the web"):
-            self.printer.update_item("searching", "Searching...")
-            num_completed = 0
-            tasks = [asyncio.create_task(self._search(item)) for item in search_plan.searches]
-            results = []
-            for task in asyncio.as_completed(tasks):
-                result = await task
-                if result is not None:
-                    results.append(result)
-                num_completed += 1
-                self.printer.update_item(
-                    "searching", f"Searching... {num_completed}/{len(tasks)} completed"
-                )
-            self.printer.mark_item_done("searching")
-            return results
-
-    async def _search(self, item: WebSearchItem) -> str | None:
-        input = f"Search term: {item.query}\nReason for searching: {item.reason}"
-        try:
-            result = await Runner.run(
-                search_agent,
-                input,
-            )
-            return str(result.final_output)
-        except Exception:
-            return None
-
-    async def _write_report(self, query: str, search_results: list[str]) -> ReportData:
-        self.printer.update_item("writing", "Thinking about report...")
-        input = f"Original query: {query}\nSummarized search results: {search_results}"
-        result = Runner.run_streamed(
-            writer_agent,
-            input,
-        )
-        update_messages = [
-            "Thinking about report...",
-            "Planning report structure...",
-            "Writing outline...",
-            "Creating sections...",
-            "Cleaning up formatting...",
-            "Finalizing report...",
-            "Finishing report...",
-        ]
-
-        last_update = time.time()
-        next_message = 0
-        async for _ in result.stream_events():
-            if time.time() - last_update > 5 and next_message < len(update_messages):
-                self.printer.update_item("writing", update_messages[next_message])
-                next_message += 1
-                last_update = time.time()
-
-        self.printer.mark_item_done("writing")
-        return result.final_output_as(ReportData)
diff --git a/tests/examples/research_bot/printer.py b/tests/examples/research_bot/printer.py
deleted file mode 100644
index e820c753..00000000
--- a/tests/examples/research_bot/printer.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from typing import Any
-
-from rich.console import Console, Group
-from rich.live import Live
-from rich.spinner import Spinner
-
-
-class Printer:
-    def __init__(self, console: Console):
-        self.live = Live(console=console)
-        self.items: dict[str, tuple[str, bool]] = {}
-        self.hide_done_ids: set[str] = set()
-        self.live.start()
-
-    def end(self) -> None:
-        self.live.stop()
-
-    def hide_done_checkmark(self, item_id: str) -> None:
-        self.hide_done_ids.add(item_id)
-
-    def update_item(
-        self, item_id: str, content: str, is_done: bool = False, hide_checkmark: bool = False
-    ) -> None:
-        self.items[item_id] = (content, is_done)
-        if hide_checkmark:
-            self.hide_done_ids.add(item_id)
-        self.flush()
-
-    def mark_item_done(self, item_id: str) -> None:
-        self.items[item_id] = (self.items[item_id][0], True)
-        self.flush()
-
-    def flush(self) -> None:
-        renderables: list[Any] = []
-        for item_id, (content, is_done) in self.items.items():
-            if is_done:
-                prefix = "✅ " if item_id not in self.hide_done_ids else ""
-                renderables.append(prefix + content)
-            else:
-                renderables.append(Spinner("dots", text=content))
-        self.live.update(Group(*renderables))
diff --git a/tests/examples/research_bot/sample_outputs/product_recs.md b/tests/examples/research_bot/sample_outputs/product_recs.md
deleted file mode 100644
index 70789eb3..00000000
--- a/tests/examples/research_bot/sample_outputs/product_recs.md
+++ /dev/null
@@ -1,180 +0,0 @@
-# Comprehensive Guide on Best Surfboards for Beginners: Transitioning, Features, and Budget Options
-
-Surfing is not only a sport but a lifestyle that hooks its enthusiasts with the allure of riding waves and connecting with nature. For beginners, selecting the right surfboard is critical to safety, learning, and performance. This comprehensive guide has been crafted to walk through the essential aspects of choosing the ideal surfboard for beginners, especially those looking to transition from an 11-foot longboard to a shorter, more dynamic board. We discuss various board types, materials, design elements, and budget ranges, providing a detailed road map for both new surfers and those in the process of progression.
-
----
-
-## Table of Contents
-
-1. [Introduction](#introduction)
-2. [Board Types and Design Considerations](#board-types-and-design-considerations)
-3. [Key Board Dimensions and Features](#key-board-dimensions-and-features)
-4. [Materials: Soft-Top vs. Hard-Top Boards](#materials-soft-top-vs-hard-top-boards)
-5. [Tips for Transitioning from Longboards to Shorter Boards](#tips-for-transitioning-from-longboards-to-shorter-boards)
-6. [Budget and Pricing Options](#budget-and-pricing-options)
-7. [Recommended Models and Buying Options](#recommended-models-and-buying-options)
-8. [Conclusion](#conclusion)
-9. [Follow-up Questions](#follow-up-questions)
-
----
-
-## Introduction
-
-Surfing is a dynamic sport that requires not only skill and technique but also the proper equipment. For beginners, the right surfboard can make the difference between a frustrating experience and one that builds confidence and enthusiasm. Many newcomers start with longboards due to their stability and ease of paddling; however, as skills develop, transitioning to a shorter board might be desirable for enhancing maneuverability and performance. This guide is designed for surfers who can already catch waves on an 11-foot board and are now considering stepping down to a more versatile option.
-
-The overarching goal of this document is to help beginners identify which surfboard characteristics are most important, including board length, width, thickness, volume, and materials, while also considering factors like weight distribution, buoyancy, and control. We will also take a look at board types that are particularly welcoming for beginners and discuss gradual transitioning strategies.
-
----
-
-## Board Types and Design Considerations
-
-Choosing a board involves understanding the variety of designs available. Below are the main types of surfboards that cater to beginners and transitional surfers:
-
-### Longboards and Mini-Mals
-
-Longboards, typically 8 to 11 feet in length, provide ample stability, smoother paddling, and are well-suited for wave-catching. Their generous volume and width allow beginners to build confidence when standing up and riding waves. Mini-mal or mini-malibus (often around 8 to 9 feet) are a popular bridge between the longboard and the more agile shortboard, offering both stability and moderate maneuverability, which makes them excellent for gradual progress.
-
-### Funboards and Hybrids
-
-Funboards and hybrid boards blend the benefits of longboards and shortboards. They typically range from 6’6" to 8’0" in length, with extra volume and width that help preserve stability while introducing elements of sharper turning and improved agility. Hybrids are particularly helpful for surfers transitioning from longboards, as they maintain some of the buoyancy and ease of catching waves, yet offer a taste of the performance found in smaller boards.
-
-### Shortboards
-
-Shortboards emphasize performance, maneuverability, and a more responsive ride. However, they have less volume and require stronger paddling, quicker pop-up techniques, and more refined balance. For beginners, moving to a traditional shortboard immediately can be challenging. It is generally advised to make a gradual transition, potentially starting with a funboard or hybrid before making a direct leap to a performance shortboard.
-
----
-
-## Key Board Dimensions and Features
-
-When selecting a beginner surfboard, several key dimensions and features drastically affect performance, ease of learning, and safety:
-
-### Length and Width
-
--   **Length**: Starting with an 8 to 9-foot board is ideal. Longer boards offer enhanced stability and improved paddling capabilities. Gradual downsizing is recommended if you plan to move from an 11-foot board.
--   **Width**: A board with a width over 20 inches provides greater stability and facilitates balance, especially vital for beginners.
-
-### Thickness and Volume
-
--   **Thickness**: Typically around 2.5 to 3 inches. Thicker decks increase buoyancy, allowing the surfer to paddle easier while catching waves.
--   **Volume**: Measured in liters, volume is critical in understanding a board's flotation capacity. Higher volumes (e.g., 60-100 liters) are essential for beginners as they make the board more forgiving and stable. Suitable volumes might vary according to the surfer’s weight and experience level.
-
-### Nose and Tail Shape
-
--   **Nose Shape**: A wide, rounded nose expands the board’s planing surface, which can help in catching waves sooner and maintaining stability as you ride.
--   **Tail Design**: Square or rounded tails are generally recommended as they enhance stability and allow for controlled turns, essential during the learning phase.
-
-### Rocker
-
--   **Rocker**: This is the curvature of the board from nose to tail. For beginners, a minimal or relaxed rocker provides better stability and ease during paddling. A steeper rocker might be introduced progressively as the surfer’s skills improve.
-
----
-
-## Materials: Soft-Top vs. Hard-Top Boards
-
-The material composition of a surfboard is a crucial factor in determining its performance, durability, and safety. Beginners have two primary choices:
-
-### Soft-Top (Foam) Boards
-
-Soft-top boards are constructed almost entirely from foam. Their attributes include:
-
--   **Safety and Forgiveness**: The foam construction minimizes injury upon impact which is advantageous for beginners who might fall frequently.
--   **Stability and Buoyancy**: These boards typically offer greater buoyancy due to their softer material and thicker construction, easing the initial learning process.
--   **Maintenance**: They often require less maintenance—there is typically no need for waxing and they are more resistant to dings and scratches.
-
-However, as a surfer’s skills progress, a soft-top might limit maneuverability and overall performance.
-
-### Hard-Top Boards
-
-Hard-tops, in contrast, offer a more traditional surfboard feel. They generally rely on a foam core encased in resin, with two prevalent combinations:
-
--   **PU (Polyurethane) Core with Polyester Resin**: This combination gives a classic feel and is relatively economical; however, these boards can be heavier and, as they age, more prone to damage.
--   **EPS (Expanded Polystyrene) Core with Epoxy Resin**: Lightweight and durable, EPS boards are often more buoyant and resistant to damage, although they usually carry a higher price tag and may be less forgiving.
-
-Deciding between soft-top and hard-top boards often depends on a beginner’s progression goals, overall comfort, and budget constraints.
-
----
-
-## Tips for Transitioning from Longboards to Shorter Boards
-
-For surfers who have mastered the basics on an 11-foot board, the transition to a shorter board requires careful consideration, patience, and incremental changes. Here are some key tips:
-
-### Gradual Downsizing
-
-Experts recommend reducing the board length gradually—by about a foot at a time—to allow the body to adjust slowly to a board with less buoyancy and more responsiveness. This process helps maintain wave-catching ability and reduces the shock of transitioning to a very different board feel.
-
-### Strengthening Core Skills
-
-Before transitioning, make sure your surfing fundamentals are solid. Focus on practicing:
-
--   **Steep Take-offs**: Ensure that your pop-up is swift and robust to keep pace with shorter boards that demand a rapid transition from paddling to standing.
--   **Angling and Paddling Techniques**: Learn to angle your takeoffs properly to compensate for the lower buoyancy and increased maneuverability of shorter boards.
-
-### Experimenting with Rentals or Borrowed Boards
-
-If possible, try out a friend’s shorter board or rent one for a day to experience firsthand the differences in performance. This practical trial can provide valuable insights and inform your decision before making a purchase.
-
----
-
-## Budget and Pricing Options
-
-Surfboards are available across a range of prices to match different budgets. Whether you are looking for an affordable beginner board or a more expensive model that grows with your skills, it’s important to understand what features you can expect at different price points.
-
-### Budget-Friendly Options
-
-For those on a tight budget, several entry-level models offer excellent value. Examples include:
-
--   **Wavestorm 8' Classic Pinline Surfboard**: Priced affordably, this board is popular for its ease of use, ample volume, and forgiving nature. Despite its low cost, it delivers the stability needed to get started.
--   **Liquid Shredder EZ Slider Foamie**: A smaller board catering to younger or lighter surfers, this budget option provides easy paddling and a minimal risk of injury due to its soft construction.
-
-### Moderate Price Range
-
-As you move into the intermediate range, boards typically become slightly more specialized in their design, offering features such as improved stringer systems or versatile fin setups. These are excellent for surfers who wish to continue progressing their skills without compromising stability. Many surfboard packages from retailers also bundle a board with essential accessories like board bags, leashes, and wax for additional savings.
-
-### Higher-End Models and Transitional Packages
-
-For surfers looking for durability, performance, and advanced design features, investing in an EPS/epoxy board might be ideal. Although they come at a premium, these boards are lightweight, strong, and customizable with various fin configurations. Some options include boards from brands like South Bay Board Co. and ISLE, which combine high-quality construction with beginner-friendly features that help mediate the transition from longboard to shortboard performance.
-
----
-
-## Recommended Models and Buying Options
-
-Based on extensive research and community recommendations, here are some standout models and tips on where to buy:
-
-### Recommended Models
-
--   **South Bay Board Co. 8'8" Heritage**: Combining foam and resin construction, this board is ideal for beginners who need stability and a forgiving surface. Its 86-liter volume suits both lightweight and somewhat heavier surfers.
--   **Rock-It 8' Big Softy**: With a high volume and an easy paddling profile, this board is designed for beginners, offering ample buoyancy to smooth out the learning curve.
--   **Wave Bandit EZ Rider Series**: Available in multiple lengths (7', 8', 9'), these boards offer versatility, with construction features that balance the stability of longboards and the agility required for shorter boards.
--   **Hybrid/Funboards Like the Poacher Funboard**: Perfect for transitioning surfers, these boards blend the ease of catching waves with the capability for more dynamic maneuvers.
-
-### Buying Options
-
--   **Surf Shops and Local Retailers**: Traditional surf shops allow you to test different boards, which is ideal for assessing the board feel and condition—especially if you are considering a used board.
--   **Online Retailers and Marketplaces**: Websites like Evo, Surfboards Direct, and even local online marketplaces like Craigslist and Facebook Marketplace provide options that range from new to gently used boards. Always inspect reviews and verify seller policies before purchase.
--   **Package Deals and Bundles**: Many retailers offer bundled packages that include not just the board, but also essentials like a leash, wax, fins, and board bags. These packages can be more cost-effective and are great for beginners who need a complete surf kit.
-
----
-
-## Conclusion
-
-Selecting the right surfboard as a beginner is about balancing various factors: stability, buoyancy, maneuverability, and budget.
-
-For those who have honed the basics using an 11-foot longboard, the transition to a shorter board should be gradual. Start by focusing on boards that preserve stability—such as funboards and hybrids—before moving to the more performance-oriented shortboards. Key characteristics like board length, width, thickness, volume, and material profoundly influence your surfing experience. Soft-top boards provide a forgiving entry point, while hard-top boards, especially those with EPS cores and epoxy resin, offer benefits for more advanced progression despite the increased learning curve.
-
-Emphasizing fundamentals like proper pop-up technique and effective paddle work will ease the transition and ensure that the new board complements your evolving skills. Additionally, understanding the pricing spectrum—from budget-friendly models to premium options—allows you to make an informed purchase that suits both your financial and performance needs.
-
-With a thoughtful approach to board selection, you can enhance your learning curve, enjoy safer sessions in the water, and ultimately develop the skills necessary to master the diverse challenges surfing presents. Whether your goal is to ride gentle waves or eventually experiment with sharper turns and dynamic maneuvers, choosing the right board is your first step towards a rewarding and sustainable surfing journey.
-
----
-
-## Follow-up Questions
-
-1. What is your current budget range for a new surfboard, or are you considering buying used?
-2. How frequently do you plan to surf, and in what type of wave conditions?
-3. Are you interested in a board that you can grow into as your skills progress, or do you prefer one that is more specialized for certain conditions?
-4. Would you be interested in additional equipment bundles (like fins, leashes, boards bags) offered by local retailers or online shops?
-5. Have you had the opportunity to test ride any boards before, and what feedback did you gather from that experience?
-
----
-
-With this detailed guide, beginners should now have a comprehensive understanding of the surfboard market and the key factors influencing board performance, safety, and ease of progression. Happy surfing, and may you find the perfect board that rides the waves as beautifully as your passion for the sport!
diff --git a/tests/examples/research_bot/sample_outputs/product_recs.txt b/tests/examples/research_bot/sample_outputs/product_recs.txt
deleted file mode 100644
index 78865f23..00000000
--- a/tests/examples/research_bot/sample_outputs/product_recs.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-# Terminal output for a product recommendation related query. See product_recs.md for final report.
-
-$ uv run python -m examples.research_bot.main
-
-What would you like to research? Best surfboards for beginners. I can catch my own waves, but previously used an 11ft board. What should I look for, what are my options? Various budget ranges.
-View trace: https://platform.openai.com/traces/trace_...
-Starting research...
-✅ Will perform 15 searches
-✅ Searching... 15/15 completed
-✅ Finishing report...
-✅ Report summary
-
-This report provides a detailed guide on selecting the best surfboards for beginners, especially for those transitioning from an 11-foot longboard to a
-shorter board. It covers design considerations such as board dimensions, shape, materials, and volume, while comparing soft-top and hard-top boards. In
-addition, the report discusses various budget ranges, recommended board models, buying options (both new and used), and techniques to ease the transition to
-more maneuverable boards. By understanding these factors, beginner surfers can select a board that not only enhances their skills but also suits their
-individual needs.
-
-
-=====REPORT=====
-
-
-Report: # Comprehensive Guide on Best Surfboards for Beginners: Transitioning, Features, and Budget Options
-
-Surfing is not only a sport but a lifestyle that hooks its enthusiasts with the allure of riding waves and connecting with nature. For beginners, selecting the right surfboard is critical to safety, learning, and performance. This comprehensive guide has been crafted to walk through the essential aspects of choosing the ideal surfboard for beginners, especially those looking to transition from an 11-foot longboard to a shorter, more dynamic board. We discuss various board types, materials, design elements, and budget ranges, providing a detailed road map for both new surfers and those in the process of progression.
-
----
-
-## Table of Contents
-
-1. [Introduction](#introduction)
-2. [Board Types and Design Considerations](#board-types-and-design-considerations)
-3. [Key Board Dimensions and Features](#key-board-dimensions-and-features)
-4. [Materials: Soft-Top vs. Hard-Top Boards](#materials-soft-top-vs-hard-top-boards)
-5. [Tips for Transitioning from Longboards to Shorter Boards](#tips-for-transitioning-from-longboards-to-shorter-boards)
-6. [Budget and Pricing Options](#budget-and-pricing-options)
-7. [Recommended Models and Buying Options](#recommended-models-and-buying-options)
-8. [Conclusion](#conclusion)
-9. [Follow-up Questions](#follow-up-questions)
-
----
-
-## Introduction
-
-Surfing is a dynamic sport that requires not only skill and technique but also the proper equipment. For beginners, the right surfboard can make the difference between a frustrating experience and one that builds confidence and enthusiasm. Many newcomers start with longboards due to their stability and ease of paddling; however, as skills develop, transitioning to a shorter board might be desirable for enhancing maneuverability and performance. This guide is designed for surfers who can already catch waves on an 11-foot board and are now considering stepping down to a more versatile option.
-
-The overarching goal of this document is to help beginners identify which surfboard characteristics are most important, including board length, width, thickness, volume, and materials, while also considering factors like weight distribution, buoyancy, and control. We will also take a look at board types that are particularly welcoming for beginners and discuss gradual transitioning strategies.
-
----
-
-## Board Types and Design Considerations
-
-Choosing a board involves understanding the variety of designs available. Below are the main types of surfboards that cater to beginners and transitional surfers:
-
-### Longboards and Mini-Mals
-
-Longboards, typically 8 to 11 feet in length, provide ample stability, smoother paddling, and are well-suited for wave-catching. Their generous volume and width allow beginners to build confidence when standing up and riding waves. Mini-mal or mini-malibus (often around 8 to 9 feet) are a popular bridge between the longboard and the more agile shortboard, offering both stability and moderate maneuverability, which makes them excellent for gradual progress.
-
-### Funboards and Hybrids
-
-Funboards and hybrid boards blend the benefits of longboards and shortboards. They typically range from 6’6" to 8’0" in length, with extra volume and width that help preserve stability while introducing elements of sharper turning and improved agility. Hybrids are particularly helpful for surfers transitioning from longboards, as they maintain some of the buoyancy and ease of catching waves, yet offer a taste of the performance found in smaller boards.
-
-### Shortboards
-
-Shortboards emphasize performance, maneuverability, and a more responsive ride. However, they have less volume and require stronger paddling, quicker pop-up techniques, and more refined balance. For beginners, moving to a traditional shortboard immediately can be challenging. It is generally advised to make a gradual transition, potentially starting with a funboard or hybrid before making a direct leap to a performance shortboard.
-
----
-
-## Key Board Dimensions and Features
-
-When selecting a beginner surfboard, several key dimensions and features drastically affect performance, ease of learning, and safety:
-
-### Length and Width
-
-- **Length**: Starting with an 8 to 9-foot board is ideal. Longer boards offer enhanced stability and improved paddling capabilities. Gradual downsizing is recommended if you plan to move from an 11-foot board.
-- **Width**: A board with a width over 20 inches provides greater stability and facilitates balance, especially vital for beginners.
-
-### Thickness and Volume
-
-- **Thickness**: Typically around 2.5 to 3 inches. Thicker decks increase buoyancy, allowing the surfer to paddle easier while catching waves.
-- **Volume**: Measured in liters, volume is critical in understanding a board's flotation capacity. Higher volumes (e.g., 60-100 liters) are essential for beginners as they make the board more forgiving and stable. Suitable volumes might vary according to the surfer’s weight and experience level.
-
-### Nose and Tail Shape
-
-- **Nose Shape**: A wide, rounded nose expands the board’s planing surface, which can help in catching waves sooner and maintaining stability as you ride.
-- **Tail Design**: Square or rounded tails are generally recommended as they enhance stability and allow for controlled turns, essential during the learning phase.
-
-### Rocker
-
-- **Rocker**: This is the curvature of the board from nose to tail. For beginners, a minimal or relaxed rocker provides better stability and ease during paddling. A steeper rocker might be introduced progressively as the surfer’s skills improve.
-
----
-
-## Materials: Soft-Top vs. Hard-Top Boards
-
-The material composition of a surfboard is a crucial factor in determining its performance, durability, and safety. Beginners have two primary choices:
-
-### Soft-Top (Foam) Boards
-
-Soft-top boards are constructed almost entirely from foam. Their attributes include:
-
-- **Safety and Forgiveness**: The foam construction minimizes injury upon impact which is advantageous for beginners who might fall frequently.
-- **Stability and Buoyancy**: These boards typically offer greater buoyancy due to their softer material and thicker construction, easing the initial learning process.
-- **Maintenance**: They often require less maintenance—there is typically no need for waxing and they are more resistant to dings and scratches.
-
-However, as a surfer’s skills progress, a soft-top might limit maneuverability and overall performance.
-
-### Hard-Top Boards
-
-Hard-tops, in contrast, offer a more traditional surfboard feel. They generally rely on a foam core encased in resin, with two prevalent combinations:
-
-- **PU (Polyurethane) Core with Polyester Resin**: This combination gives a classic feel and is relatively economical; however, these boards can be heavier and, as they age, more prone to damage.
-- **EPS (Expanded Polystyrene) Core with Epoxy Resin**: Lightweight and durable, EPS boards are often more buoyant and resistant to damage, although they usually carry a higher price tag and may be less forgiving.
-
-Deciding between soft-top and hard-top boards often depends on a beginner’s progression goals, overall comfort, and budget constraints.
-
----
-
-## Tips for Transitioning from Longboards to Shorter Boards
-
-For surfers who have mastered the basics on an 11-foot board, the transition to a shorter board requires careful consideration, patience, and incremental changes. Here are some key tips:
-
-### Gradual Downsizing
-
-Experts recommend reducing the board length gradually—by about a foot at a time—to allow the body to adjust slowly to a board with less buoyancy and more responsiveness. This process helps maintain wave-catching ability and reduces the shock of transitioning to a very different board feel.
-
-### Strengthening Core Skills
-
-Before transitioning, make sure your surfing fundamentals are solid. Focus on practicing:
-
-- **Steep Take-offs**: Ensure that your pop-up is swift and robust to keep pace with shorter boards that demand a rapid transition from paddling to standing.
-- **Angling and Paddling Techniques**: Learn to angle your takeoffs properly to compensate for the lower buoyancy and increased maneuverability of shorter boards.
-
-### Experimenting with Rentals or Borrowed Boards
-
-If possible, try out a friend’s shorter board or rent one for a day to experience firsthand the differences in performance. This practical trial can provide valuable insights and inform your decision before making a purchase.
-
----
-
-## Budget and Pricing Options
-
-Surfboards are available across a range of prices to match different budgets. Whether you are looking for an affordable beginner board or a more expensive model that grows with your skills, it’s important to understand what features you can expect at different price points.
-
-### Budget-Friendly Options
-
-For those on a tight budget, several entry-level models offer excellent value. Examples include:
-
-- **Wavestorm 8' Classic Pinline Surfboard**: Priced affordably, this board is popular for its ease of use, ample volume, and forgiving nature. Despite its low cost, it delivers the stability needed to get started.
-- **Liquid Shredder EZ Slider Foamie**: A smaller board catering to younger or lighter surfers, this budget option provides easy paddling and a minimal risk of injury due to its soft construction.
-
-### Moderate Price Range
-
-As you move into the intermediate range, boards typically become slightly more specialized in their design, offering features such as improved stringer systems or versatile fin setups. These are excellent for surfers who wish to continue progressing their skills without compromising stability. Many surfboard packages from retailers also bundle a board with essential accessories like board bags, leashes, and wax for additional savings.
-
-### Higher-End Models and Transitional Packages
-
-For surfers looking for durability, performance, and advanced design features, investing in an EPS/epoxy board might be ideal. Although they come at a premium, these boards are lightweight, strong, and customizable with various fin configurations. Some options include boards from brands like South Bay Board Co. and ISLE, which combine high-quality construction with beginner-friendly features that help mediate the transition from longboard to shortboard performance.
-
----
-
-## Recommended Models and Buying Options
-
-Based on extensive research and community recommendations, here are some standout models and tips on where to buy:
-
-### Recommended Models
-
-- **South Bay Board Co. 8'8" Heritage**: Combining foam and resin construction, this board is ideal for beginners who need stability and a forgiving surface. Its 86-liter volume suits both lightweight and somewhat heavier surfers.
-- **Rock-It 8' Big Softy**: With a high volume and an easy paddling profile, this board is designed for beginners, offering ample buoyancy to smooth out the learning curve.
-- **Wave Bandit EZ Rider Series**: Available in multiple lengths (7', 8', 9'), these boards offer versatility, with construction features that balance the stability of longboards and the agility required for shorter boards.
-- **Hybrid/Funboards Like the Poacher Funboard**: Perfect for transitioning surfers, these boards blend the ease of catching waves with the capability for more dynamic maneuvers.
-
-### Buying Options
-
-- **Surf Shops and Local Retailers**: Traditional surf shops allow you to test different boards, which is ideal for assessing the board feel and condition—especially if you are considering a used board.
-- **Online Retailers and Marketplaces**: Websites like Evo, Surfboards Direct, and even local online marketplaces like Craigslist and Facebook Marketplace provide options that range from new to gently used boards. Always inspect reviews and verify seller policies before purchase.
-- **Package Deals and Bundles**: Many retailers offer bundled packages that include not just the board, but also essentials like a leash, wax, fins, and board bags. These packages can be more cost-effective and are great for beginners who need a complete surf kit.
-
----
-
-## Conclusion
-
-Selecting the right surfboard as a beginner is about balancing various factors: stability, buoyancy, maneuverability, and budget.
-
-For those who have honed the basics using an 11-foot longboard, the transition to a shorter board should be gradual. Start by focusing on boards that preserve stability—such as funboards and hybrids—before moving to the more performance-oriented shortboards. Key characteristics like board length, width, thickness, volume, and material profoundly influence your surfing experience. Soft-top boards provide a forgiving entry point, while hard-top boards, especially those with EPS cores and epoxy resin, offer benefits for more advanced progression despite the increased learning curve.
-
-Emphasizing fundamentals like proper pop-up technique and effective paddle work will ease the transition and ensure that the new board complements your evolving skills. Additionally, understanding the pricing spectrum—from budget-friendly models to premium options—allows you to make an informed purchase that suits both your financial and performance needs.
-
-With a thoughtful approach to board selection, you can enhance your learning curve, enjoy safer sessions in the water, and ultimately develop the skills necessary to master the diverse challenges surfing presents. Whether your goal is to ride gentle waves or eventually experiment with sharper turns and dynamic maneuvers, choosing the right board is your first step towards a rewarding and sustainable surfing journey.
-
----
-
-## Follow-up Questions
-
-1. What is your current budget range for a new surfboard, or are you considering buying used?
-2. How frequently do you plan to surf, and in what type of wave conditions?
-3. Are you interested in a board that you can grow into as your skills progress, or do you prefer one that is more specialized for certain conditions?
-4. Would you be interested in additional equipment bundles (like fins, leashes, boards bags) offered by local retailers or online shops?
-5. Have you had the opportunity to test ride any boards before, and what feedback did you gather from that experience?
-
----
-
-With this detailed guide, beginners should now have a comprehensive understanding of the surfboard market and the key factors influencing board performance, safety, and ease of progression. Happy surfing, and may you find the perfect board that rides the waves as beautifully as your passion for the sport!
-
-
-=====FOLLOW UP QUESTIONS=====
-
-
-Follow up questions: What is your current budget range for a new surfboard, or are you considering a used board?
-What types of waves do you typically surf, and how might that affect your board choice?
-Would you be interested in a transitional board that grows with your skills, or are you looking for a more specialized design?
-Have you had experience with renting or borrowing boards to try different sizes before making a purchase?
-Do you require additional equipment bundles (like fins, leash, or wax), or do you already have those?
diff --git a/tests/examples/research_bot/sample_outputs/vacation.md b/tests/examples/research_bot/sample_outputs/vacation.md
deleted file mode 100644
index 82c137af..00000000
--- a/tests/examples/research_bot/sample_outputs/vacation.md
+++ /dev/null
@@ -1,177 +0,0 @@
-Report: # Caribbean Adventure in April: Surfing, Hiking, and Water Sports Exploration
-
-The Caribbean is renowned for its crystal-clear waters, vibrant culture, and diverse outdoor activities. April is an especially attractive month for visitors: warm temperatures, clear skies, and the promise of abundant activities. This report explores the best Caribbean destinations in April, with a focus on optimizing your vacation for surfing, hiking, and water sports.
-
----
-
-## Table of Contents
-
-1. [Introduction](#introduction)
-2. [Why April is the Perfect Time in the Caribbean](#why-april-is-the-perfect-time-in-the-caribbean)
-3. [Surfing in the Caribbean](#surfing-in-the-caribbean)
-    - 3.1 [Barbados: The Tale of Two Coasts](#barbados-the-tale-of-two-coasts)
-    - 3.2 [Puerto Rico: Rincón and Beyond](#puerto-rico-rinc%C3%B3n-and-beyond)
-    - 3.3 [Dominican Republic and Other Hotspots](#dominican-republic-and-other-hotspots)
-4. [Hiking Adventures Across the Caribbean](#hiking-adventures-across-the-caribbean)
-    - 4.1 [Trekking Through Tropical Rainforests](#trekking-through-tropical-rainforests)
-    - 4.2 [Volcanic Peaks and Rugged Landscapes](#volcanic-peaks-and-rugged-landscapes)
-5. [Diverse Water Sports Experiences](#diverse-water-sports-experiences)
-    - 5.1 [Snorkeling, Diving, and Jet Skiing](#snorkeling-diving-and-jet-skiing)
-    - 5.2 [Kiteboarding and Windsurfing](#kiteboarding-and-windsurfing)
-6. [Combining Adventures: Multi-Activity Destinations](#combining-adventures-multi-activity-destinations)
-7. [Practical Advice and Travel Tips](#practical-advice-and-travel-tips)
-8. [Conclusion](#conclusion)
-
----
-
-## Introduction
-
-Caribbean vacations are much more than just beach relaxation; they offer adventure, exploration, and a lively cultural tapestry waiting to be discovered. For travelers seeking an adrenaline-filled getaway, April provides optimal conditions. This report synthesizes diverse research findings and travel insights to help you create an itinerary that combines the thrill of surfing, the challenge of hiking, and the excitement of water sports.
-
-Whether you're standing on the edge of a powerful reef break or trekking through lush tropical landscapes, the Caribbean in April invites you to dive into nature, adventure, and culture. The following sections break down the best destinations and activities, ensuring that every aspect of your trip is meticulously planned for an unforgettable experience.
-
----
-
-## Why April is the Perfect Time in the Caribbean
-
-April stands at the crossroads of seasons in many Caribbean destinations. It marks the tail end of the dry season, ensuring:
-
--   **Consistent Warm Temperatures:** Average daytime highs around 29°C (84°F) foster comfortable conditions for both land and water activities.
--   **Pleasant Sea Temperatures:** With sea temperatures near 26°C (79°F), swimmers, surfers, and divers are treated to inviting waters.
--   **Clear Skies and Minimal Rainfall:** Crisp, blue skies make for excellent visibility during snorkeling and diving, as well as clear panoramic views while hiking.
--   **Festivals and Cultural Events:** Many islands host seasonal festivals such as Barbados' Fish Festival and Antigua's Sailing Week, adding a cultural layer to your vacation.
-
-These factors create an ideal backdrop for balancing your outdoor pursuits, whether you’re catching epic waves, trekking rugged trails, or partaking in water sports.
-
----
-
-## Surfing in the Caribbean
-
-Surfing in the Caribbean offers diverse wave experiences, ranging from gentle, beginner-friendly rollers to powerful reef breaks that challenge even seasoned surfers. April, in particular, provides excellent conditions for those looking to ride its picturesque waves.
-
-### Barbados: The Tale of Two Coasts
-
-Barbados is a prime destination:
-
--   **Soup Bowl in Bathsheba:** On the east coast, the Soup Bowl is famous for its consistent, powerful waves. This spot attracts experienced surfers who appreciate its challenging right-hand reef break with steep drops, providing the kind of performance wave rarely found elsewhere.
--   **Freights Bay:** On the south coast, visitors find more forgiving, gentle wave conditions. Ideal for beginners and longboarders, this spot offers the perfect balance for those still mastering their craft.
-
-Barbados not only excels in its surfing credentials but also complements the experience with a rich local culture and events in April, making it a well-rounded destination.
-
-### Puerto Rico: Rincón and Beyond
-
-Rincón in Puerto Rico is hailed as the Caribbean’s surfing capital:
-
--   **Diverse Breaks:** With spots ranging from challenging reef breaks such as Tres Palmas and Dogman's to more inviting waves at Domes and Maria's, Puerto Rico offers a spectrum for all surfing skill levels.
--   **Local Culture:** Aside from its surf culture, the island boasts vibrant local food scenes, historic sites, and exciting nightlife, enriching your overall travel experience.
-
-In addition, Puerto Rico’s coasts often feature opportunities for hiking and other outdoor adventures, making it an attractive option for multi-activity travelers.
-
-### Dominican Republic and Other Hotspots
-
-Other islands such as the Dominican Republic, with Playa Encuentro on its north coast, provide consistent surf year-round. Highlights include:
-
--   **Playa Encuentro:** A hotspot known for its dependable breaks, ideal for both intermediate and advanced surfers during the cooler months of October to April.
--   **Jamaica and The Bahamas:** Jamaica’s Boston Bay offers a mix of beginner and intermediate waves, and The Bahamas’ Surfer’s Beach on Eleuthera draws parallels to the legendary surf spots of Hawaii, especially during the winter months.
-
-These destinations not only spotlight surfing but also serve as gateways to additional outdoor activities, ensuring there's never a dull moment whether you're balancing waves with hikes or cultural exploration.
-
----
-
-## Hiking Adventures Across the Caribbean
-
-The Caribbean's topography is as varied as it is beautiful. Its network of hiking trails traverses volcanic peaks, ancient rainforests, and dramatic coastal cliffs, offering breathtaking vistas to intrepid explorers.
-
-### Trekking Through Tropical Rainforests
-
-For nature enthusiasts, the lush forests of the Caribbean present an immersive encounter with biodiversity:
-
--   **El Yunque National Forest, Puerto Rico:** The only tropical rainforest within the U.S. National Forest System, El Yunque is rich in endemic species such as the Puerto Rican parrot and the famous coquí frog. Trails like the El Yunque Peak Trail and La Mina Falls Trail provide both challenging hikes and scenic rewards.
--   **Virgin Islands National Park, St. John:** With over 20 well-defined trails, this park offers hikes that reveal historical petroglyphs, colonial ruins, and stunning coastal views along the Reef Bay Trail.
-
-### Volcanic Peaks and Rugged Landscapes
-
-For those seeking more rugged challenges, several destinations offer unforgettable adventures:
-
--   **Morne Trois Pitons National Park, Dominica:** A UNESCO World Heritage Site showcasing volcanic landscapes, hot springs, the famed Boiling Lake, and lush trails that lead to hidden waterfalls.
--   **Gros Piton, Saint Lucia:** The iconic hike up Gros Piton provides a moderately challenging trek that ends with panoramic views of the Caribbean Sea, a truly rewarding experience for hikers.
--   **La Soufrière, St. Vincent:** This active volcano not only offers a dynamic hiking environment but also the opportunity to observe the ongoing geological transformations up close.
-
-Other noteworthy hiking spots include the Blue Mountains in Jamaica for coffee plantation tours and expansive views, as well as trails in Martinique around Montagne Pelée, which combine historical context with natural beauty.
-
----
-
-## Diverse Water Sports Experiences
-
-While surfing and hiking attract a broad range of adventurers, the Caribbean also scores high on other water sports. Whether you're drawn to snorkeling, jet skiing, or wind- and kiteboarding, the islands offer a plethora of aquatic activities.
-
-### Snorkeling, Diving, and Jet Skiing
-
-Caribbean waters teem with life and color, making them ideal for underwater exploration:
-
--   **Bonaire:** Its protected marine parks serve as a magnet for divers and snorkelers. With vibrant coral reefs and diverse marine species, Bonaire is a top destination for those who appreciate the underwater world.
--   **Cayman Islands:** Unique attractions such as Stingray City provide opportunities to interact with friendly stingrays in clear, calm waters. Additionally, the Underwater Sculpture Park is an innovative blend of art and nature.
--   **The Bahamas:** In places like Eleuthera, excursions often cater to families and thrill-seekers alike. Options include jet ski rentals, where groups can explore hidden beaches and pristine coves while enjoying the vibrant marine life.
-
-### Kiteboarding and Windsurfing
-
-Harnessing the steady trade winds and warm Caribbean waters, several islands have become hubs for kiteboarding and windsurfing:
-
--   **Aruba:** Known as "One Happy Island," Aruba’s Fisherman's Huts area provides consistent winds, perfect for enthusiasts of windsurfing and kiteboarding alike.
--   **Cabarete, Dominican Republic and Silver Rock, Barbados:** Both destinations benefit from reliable trade winds, making them popular among kitesurfers. These spots often combine water sports with a lively beach culture, ensuring that the fun continues on land as well.
-
-Local operators provide equipment rental and lessons, ensuring that even first-time adventurers can safely and confidently enjoy these exciting sports.
-
----
-
-## Combining Adventures: Multi-Activity Destinations
-
-For travelers seeking a comprehensive vacation where surfing, hiking, and water sports converge, several Caribbean destinations offer the best of all worlds.
-
--   **Puerto Rico:** With its robust surf scene in Rincón, world-class hiking in El Yunque, and opportunities for snorkeling and jet skiing in San Juan Bay, Puerto Rico is a true multi-adventure destination.
--   **Barbados:** In addition to the surf breaks along its coasts, Barbados offers a mix of cultural events, local cuisine, and even hiking excursions to scenic rural areas, making for a well-rounded experience.
--   **Dominican Republic and Jamaica:** Both are renowned not only for their consistent surf conditions but also for expansive hiking trails and water sports. From the rugged landscapes of the Dominican Republic to Jamaica’s blend of cultural history and natural exploration, these islands allow travelers to mix and match activities seamlessly.
-
-Group tours and local guides further enhance these experiences, providing insider tips, safe excursions, and personalized itineraries that cater to multiple interests within one trip.
-
----
-
-## Practical Advice and Travel Tips
-
-### Weather and Timing
-
--   **Optimal Climate:** April offers ideal weather conditions across the Caribbean. With minimal rainfall and warm temperatures, it is a great time to schedule outdoor activities.
--   **Surfing Seasons:** While April marks the end of the prime surf season in some areas (like Rincón in Puerto Rico), many destinations maintain consistent conditions during this month.
-
-### Booking and Costs
-
--   **Surfing Lessons:** Expect to pay between $40 and $110 per session depending on the location. For instance, Puerto Rico typically charges around $75 for beginner lessons, while group lessons in the Dominican Republic average approximately $95.
--   **Equipment Rentals:** Pricing for jet ski, surfboard, and snorkeling equipment may vary. In the Bahamas, an hour-long jet ski tour might cost about $120 per group, whereas a similar experience might be available at a lower cost in other regions.
--   **Accommodations:** Prices also vary by island. Many travelers find that even affordable stays do not skimp on amenities, allowing you to invest more in guided excursions and local experiences.
-
-### Cultural Considerations
-
--   **Festivals and Events:** Check local event calendars. Destinations like Barbados and Antigua host festivals in April that combine cultural heritage with festive outdoor activities.
--   **Local Cuisine:** Incorporate food tours into your itinerary. Caribbean cuisine—with its fusion of flavors—can be as adventurous as the outdoor activities.
-
-### Health and Safety
-
--   **Staying Hydrated:** The warm temperatures demand that you stay properly hydrated. Always carry water, especially during long hikes.
--   **Sun Protection:** Use sunscreen, hats, and sunglasses to protect yourself during extended periods outdoors on both land and water.
--   **Local Guides:** Utilize local tour operators for both hiking and water sports. Their expertise not only enriches your experience but also ensures safety in unfamiliar terrain or water bodies.
-
----
-
-## Conclusion
-
-The Caribbean in April is a haven for adventure seekers. With its pristine beaches, diverse ecosystems, and rich cultural tapestry, it offers something for every type of traveler. Whether you're chasing the perfect wave along the shores of Barbados and Puerto Rico, trekking through the lush landscapes of El Yunque or Morne Trois Pitons, or engaging in an array of water sports from snorkeling to kiteboarding, your ideal vacation is only a booking away.
-
-This report has outlined the best destinations and provided practical advice to optimize your vacation for surfing, hiking, and water sports. By considering the diverse offerings—from epic surf breaks and challenging hiking trails to vibrant water sports—the Caribbean stands out as a multi-adventure destination where every day brings a new experience.
-
-Plan carefully, pack wisely, and get ready to explore the vibrant mosaic of landscapes and activities that make the Caribbean in April a truly unforgettable adventure.
-
-Happy travels!
-
----
-
-_References available upon request. Many insights were drawn from trusted sources including Lonely Planet, TravelPug, and various Caribbean-centric exploration sites, ensuring a well-rounded and practical guide for your vacation planning._
diff --git a/tests/examples/research_bot/sample_outputs/vacation.txt b/tests/examples/research_bot/sample_outputs/vacation.txt
deleted file mode 100644
index b2649981..00000000
--- a/tests/examples/research_bot/sample_outputs/vacation.txt
+++ /dev/null
@@ -1,206 +0,0 @@
-# Terminal output for a vacation related query. See vacation.md for final report.
-
-$ uv run python -m examples.research_bot.main
-What would you like to research? Caribbean vacation spots in April, optimizing for surfing, hiking and water sports
-View trace: https://platform.openai.com/traces/trace_....
-Starting research...
-✅ Will perform 15 searches
-✅ Searching... 15/15 completed
-✅ Finishing report...
-✅ Report summary
-
-This report provides an in-depth exploration of selected Caribbean vacation spots in April that are ideal for surfing, hiking, and water sports. Covering
-destinations from Barbados and Puerto Rico to the Bahamas and Jamaica, it examines favorable weather conditions, recommended surf breaks, scenic hiking
-trails, and various water sports activities. Detailed destination profiles, activity highlights, and travel tips are integrated to help travelers design a
-multi-adventure itinerary in the Caribbean during April.
-
-
-=====REPORT=====
-
-
-Report: # Caribbean Adventure in April: Surfing, Hiking, and Water Sports Exploration
-
-The Caribbean is renowned for its crystal-clear waters, vibrant culture, and diverse outdoor activities. April is an especially attractive month for visitors: warm temperatures, clear skies, and the promise of abundant activities. This report explores the best Caribbean destinations in April, with a focus on optimizing your vacation for surfing, hiking, and water sports.
-
----
-
-## Table of Contents
-
-1. [Introduction](#introduction)
-2. [Why April is the Perfect Time in the Caribbean](#why-april-is-the-perfect-time-in-the-caribbean)
-3. [Surfing in the Caribbean](#surfing-in-the-caribbean)
-    - 3.1 [Barbados: The Tale of Two Coasts](#barbados-the-tale-of-two-coasts)
-    - 3.2 [Puerto Rico: Rincón and Beyond](#puerto-rico-rinc%C3%B3n-and-beyond)
-    - 3.3 [Dominican Republic and Other Hotspots](#dominican-republic-and-other-hotspots)
-4. [Hiking Adventures Across the Caribbean](#hiking-adventures-across-the-caribbean)
-    - 4.1 [Trekking Through Tropical Rainforests](#trekking-through-tropical-rainforests)
-    - 4.2 [Volcanic Peaks and Rugged Landscapes](#volcanic-peaks-and-rugged-landscapes)
-5. [Diverse Water Sports Experiences](#diverse-water-sports-experiences)
-    - 5.1 [Snorkeling, Diving, and Jet Skiing](#snorkeling-diving-and-jet-skiing)
-    - 5.2 [Kiteboarding and Windsurfing](#kiteboarding-and-windsurfing)
-6. [Combining Adventures: Multi-Activity Destinations](#combining-adventures-multi-activity-destinations)
-7. [Practical Advice and Travel Tips](#practical-advice-and-travel-tips)
-8. [Conclusion](#conclusion)
-
----
-
-## Introduction
-
-Caribbean vacations are much more than just beach relaxation; they offer adventure, exploration, and a lively cultural tapestry waiting to be discovered. For travelers seeking an adrenaline-filled getaway, April provides optimal conditions. This report synthesizes diverse research findings and travel insights to help you create an itinerary that combines the thrill of surfing, the challenge of hiking, and the excitement of water sports.
-
-Whether you're standing on the edge of a powerful reef break or trekking through lush tropical landscapes, the Caribbean in April invites you to dive into nature, adventure, and culture. The following sections break down the best destinations and activities, ensuring that every aspect of your trip is meticulously planned for an unforgettable experience.
-
----
-
-## Why April is the Perfect Time in the Caribbean
-
-April stands at the crossroads of seasons in many Caribbean destinations. It marks the tail end of the dry season, ensuring:
-
-- **Consistent Warm Temperatures:** Average daytime highs around 29°C (84°F) foster comfortable conditions for both land and water activities.
-- **Pleasant Sea Temperatures:** With sea temperatures near 26°C (79°F), swimmers, surfers, and divers are treated to inviting waters.
-- **Clear Skies and Minimal Rainfall:** Crisp, blue skies make for excellent visibility during snorkeling and diving, as well as clear panoramic views while hiking.
-- **Festivals and Cultural Events:** Many islands host seasonal festivals such as Barbados' Fish Festival and Antigua's Sailing Week, adding a cultural layer to your vacation.
-
-These factors create an ideal backdrop for balancing your outdoor pursuits, whether you’re catching epic waves, trekking rugged trails, or partaking in water sports.
-
----
-
-## Surfing in the Caribbean
-
-Surfing in the Caribbean offers diverse wave experiences, ranging from gentle, beginner-friendly rollers to powerful reef breaks that challenge even seasoned surfers. April, in particular, provides excellent conditions for those looking to ride its picturesque waves.
-
-### Barbados: The Tale of Two Coasts
-
-Barbados is a prime destination:
-
-- **Soup Bowl in Bathsheba:** On the east coast, the Soup Bowl is famous for its consistent, powerful waves. This spot attracts experienced surfers who appreciate its challenging right-hand reef break with steep drops, providing the kind of performance wave rarely found elsewhere.
-- **Freights Bay:** On the south coast, visitors find more forgiving, gentle wave conditions. Ideal for beginners and longboarders, this spot offers the perfect balance for those still mastering their craft.
-
-Barbados not only excels in its surfing credentials but also complements the experience with a rich local culture and events in April, making it a well-rounded destination.
-
-### Puerto Rico: Rincón and Beyond
-
-Rincón in Puerto Rico is hailed as the Caribbean’s surfing capital:
-
-- **Diverse Breaks:** With spots ranging from challenging reef breaks such as Tres Palmas and Dogman's to more inviting waves at Domes and Maria's, Puerto Rico offers a spectrum for all surfing skill levels.
-- **Local Culture:** Aside from its surf culture, the island boasts vibrant local food scenes, historic sites, and exciting nightlife, enriching your overall travel experience.
-
-In addition, Puerto Rico’s coasts often feature opportunities for hiking and other outdoor adventures, making it an attractive option for multi-activity travelers.
-
-### Dominican Republic and Other Hotspots
-
-Other islands such as the Dominican Republic, with Playa Encuentro on its north coast, provide consistent surf year-round. Highlights include:
-
-- **Playa Encuentro:** A hotspot known for its dependable breaks, ideal for both intermediate and advanced surfers during the cooler months of October to April.
-- **Jamaica and The Bahamas:** Jamaica’s Boston Bay offers a mix of beginner and intermediate waves, and The Bahamas’ Surfer’s Beach on Eleuthera draws parallels to the legendary surf spots of Hawaii, especially during the winter months.
-
-These destinations not only spotlight surfing but also serve as gateways to additional outdoor activities, ensuring there's never a dull moment whether you're balancing waves with hikes or cultural exploration.
-
----
-
-## Hiking Adventures Across the Caribbean
-
-The Caribbean's topography is as varied as it is beautiful. Its network of hiking trails traverses volcanic peaks, ancient rainforests, and dramatic coastal cliffs, offering breathtaking vistas to intrepid explorers.
-
-### Trekking Through Tropical Rainforests
-
-For nature enthusiasts, the lush forests of the Caribbean present an immersive encounter with biodiversity:
-
-- **El Yunque National Forest, Puerto Rico:** The only tropical rainforest within the U.S. National Forest System, El Yunque is rich in endemic species such as the Puerto Rican parrot and the famous coquí frog. Trails like the El Yunque Peak Trail and La Mina Falls Trail provide both challenging hikes and scenic rewards.
-- **Virgin Islands National Park, St. John:** With over 20 well-defined trails, this park offers hikes that reveal historical petroglyphs, colonial ruins, and stunning coastal views along the Reef Bay Trail.
-
-### Volcanic Peaks and Rugged Landscapes
-
-For those seeking more rugged challenges, several destinations offer unforgettable adventures:
-
-- **Morne Trois Pitons National Park, Dominica:** A UNESCO World Heritage Site showcasing volcanic landscapes, hot springs, the famed Boiling Lake, and lush trails that lead to hidden waterfalls.
-- **Gros Piton, Saint Lucia:** The iconic hike up Gros Piton provides a moderately challenging trek that ends with panoramic views of the Caribbean Sea, a truly rewarding experience for hikers.
-- **La Soufrière, St. Vincent:** This active volcano not only offers a dynamic hiking environment but also the opportunity to observe the ongoing geological transformations up close.
-
-Other noteworthy hiking spots include the Blue Mountains in Jamaica for coffee plantation tours and expansive views, as well as trails in Martinique around Montagne Pelée, which combine historical context with natural beauty.
-
----
-
-## Diverse Water Sports Experiences
-
-While surfing and hiking attract a broad range of adventurers, the Caribbean also scores high on other water sports. Whether you're drawn to snorkeling, jet skiing, or wind- and kiteboarding, the islands offer a plethora of aquatic activities.
-
-### Snorkeling, Diving, and Jet Skiing
-
-Caribbean waters teem with life and color, making them ideal for underwater exploration:
-
-- **Bonaire:** Its protected marine parks serve as a magnet for divers and snorkelers. With vibrant coral reefs and diverse marine species, Bonaire is a top destination for those who appreciate the underwater world.
-- **Cayman Islands:** Unique attractions such as Stingray City provide opportunities to interact with friendly stingrays in clear, calm waters. Additionally, the Underwater Sculpture Park is an innovative blend of art and nature.
-- **The Bahamas:** In places like Eleuthera, excursions often cater to families and thrill-seekers alike. Options include jet ski rentals, where groups can explore hidden beaches and pristine coves while enjoying the vibrant marine life.
-
-### Kiteboarding and Windsurfing
-
-Harnessing the steady trade winds and warm Caribbean waters, several islands have become hubs for kiteboarding and windsurfing:
-
-- **Aruba:** Known as "One Happy Island," Aruba’s Fisherman's Huts area provides consistent winds, perfect for enthusiasts of windsurfing and kiteboarding alike.
-- **Cabarete, Dominican Republic and Silver Rock, Barbados:** Both destinations benefit from reliable trade winds, making them popular among kitesurfers. These spots often combine water sports with a lively beach culture, ensuring that the fun continues on land as well.
-
-Local operators provide equipment rental and lessons, ensuring that even first-time adventurers can safely and confidently enjoy these exciting sports.
-
----
-
-## Combining Adventures: Multi-Activity Destinations
-
-For travelers seeking a comprehensive vacation where surfing, hiking, and water sports converge, several Caribbean destinations offer the best of all worlds.
-
-- **Puerto Rico:** With its robust surf scene in Rincón, world-class hiking in El Yunque, and opportunities for snorkeling and jet skiing in San Juan Bay, Puerto Rico is a true multi-adventure destination.
-- **Barbados:** In addition to the surf breaks along its coasts, Barbados offers a mix of cultural events, local cuisine, and even hiking excursions to scenic rural areas, making for a well-rounded experience.
-- **Dominican Republic and Jamaica:** Both are renowned not only for their consistent surf conditions but also for expansive hiking trails and water sports. From the rugged landscapes of the Dominican Republic to Jamaica’s blend of cultural history and natural exploration, these islands allow travelers to mix and match activities seamlessly.
-
-Group tours and local guides further enhance these experiences, providing insider tips, safe excursions, and personalized itineraries that cater to multiple interests within one trip.
-
----
-
-## Practical Advice and Travel Tips
-
-### Weather and Timing
-
-- **Optimal Climate:** April offers ideal weather conditions across the Caribbean. With minimal rainfall and warm temperatures, it is a great time to schedule outdoor activities.
-- **Surfing Seasons:** While April marks the end of the prime surf season in some areas (like Rincón in Puerto Rico), many destinations maintain consistent conditions during this month.
-
-### Booking and Costs
-
-- **Surfing Lessons:** Expect to pay between $40 and $110 per session depending on the location. For instance, Puerto Rico typically charges around $75 for beginner lessons, while group lessons in the Dominican Republic average approximately $95.
-- **Equipment Rentals:** Pricing for jet ski, surfboard, and snorkeling equipment may vary. In the Bahamas, an hour-long jet ski tour might cost about $120 per group, whereas a similar experience might be available at a lower cost in other regions.
-- **Accommodations:** Prices also vary by island. Many travelers find that even affordable stays do not skimp on amenities, allowing you to invest more in guided excursions and local experiences.
-
-### Cultural Considerations
-
-- **Festivals and Events:** Check local event calendars. Destinations like Barbados and Antigua host festivals in April that combine cultural heritage with festive outdoor activities.
-- **Local Cuisine:** Incorporate food tours into your itinerary. Caribbean cuisine—with its fusion of flavors—can be as adventurous as the outdoor activities.
-
-### Health and Safety
-
-- **Staying Hydrated:** The warm temperatures demand that you stay properly hydrated. Always carry water, especially during long hikes.
-- **Sun Protection:** Use sunscreen, hats, and sunglasses to protect yourself during extended periods outdoors on both land and water.
-- **Local Guides:** Utilize local tour operators for both hiking and water sports. Their expertise not only enriches your experience but also ensures safety in unfamiliar terrain or water bodies.
-
----
-
-## Conclusion
-
-The Caribbean in April is a haven for adventure seekers. With its pristine beaches, diverse ecosystems, and rich cultural tapestry, it offers something for every type of traveler. Whether you're chasing the perfect wave along the shores of Barbados and Puerto Rico, trekking through the lush landscapes of El Yunque or Morne Trois Pitons, or engaging in an array of water sports from snorkeling to kiteboarding, your ideal vacation is only a booking away.
-
-This report has outlined the best destinations and provided practical advice to optimize your vacation for surfing, hiking, and water sports. By considering the diverse offerings—from epic surf breaks and challenging hiking trails to vibrant water sports—the Caribbean stands out as a multi-adventure destination where every day brings a new experience.
-
-Plan carefully, pack wisely, and get ready to explore the vibrant mosaic of landscapes and activities that make the Caribbean in April a truly unforgettable adventure.
-
-Happy travels!
-
----
-
-*References available upon request. Many insights were drawn from trusted sources including Lonely Planet, TravelPug, and various Caribbean-centric exploration sites, ensuring a well-rounded and practical guide for your vacation planning.*
-
-
-
-=====FOLLOW UP QUESTIONS=====
-
-
-Follow up questions: Would you like detailed profiles for any of the highlighted destinations (e.g., Puerto Rico or Barbados)?
-Are you interested in more information about booking details and local tour operators in specific islands?
-Do you need guidance on combining cultural events with outdoor adventures during your Caribbean vacation?
\ No newline at end of file
diff --git a/tests/examples/tools/computer_use.py b/tests/examples/tools/computer_use.py
deleted file mode 100644
index ae339552..00000000
--- a/tests/examples/tools/computer_use.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import asyncio
-import base64
-import logging
-from typing import Literal, Union
-
-from playwright.async_api import Browser, Page, Playwright, async_playwright
-
-from agents import (
-    Agent,
-    AsyncComputer,
-    Button,
-    ComputerTool,
-    Environment,
-    ModelSettings,
-    Runner,
-    trace,
-)
-
-logging.getLogger("openai.agents").setLevel(logging.DEBUG)
-logging.getLogger("openai.agents").addHandler(logging.StreamHandler())
-
-
-async def main():
-    async with LocalPlaywrightComputer() as computer:
-        with trace("Computer use example"):
-            agent = Agent(
-                name="Browser user",
-                instructions="You are a helpful agent.",
-                tools=[ComputerTool(computer)],
-                # Use the computer using model, and set truncation to auto because its required
-                model="computer-use-preview",
-                model_settings=ModelSettings(truncation="auto"),
-            )
-            result = await Runner.run(agent, "Search for SF sports news and summarize.")
-            print(result.final_output)
-
-
-CUA_KEY_TO_PLAYWRIGHT_KEY = {
-    "/": "Divide",
-    "\\": "Backslash",
-    "alt": "Alt",
-    "arrowdown": "ArrowDown",
-    "arrowleft": "ArrowLeft",
-    "arrowright": "ArrowRight",
-    "arrowup": "ArrowUp",
-    "backspace": "Backspace",
-    "capslock": "CapsLock",
-    "cmd": "Meta",
-    "ctrl": "Control",
-    "delete": "Delete",
-    "end": "End",
-    "enter": "Enter",
-    "esc": "Escape",
-    "home": "Home",
-    "insert": "Insert",
-    "option": "Alt",
-    "pagedown": "PageDown",
-    "pageup": "PageUp",
-    "shift": "Shift",
-    "space": " ",
-    "super": "Meta",
-    "tab": "Tab",
-    "win": "Meta",
-}
-
-
-class LocalPlaywrightComputer(AsyncComputer):
-    """A computer, implemented using a local Playwright browser."""
-
-    def __init__(self):
-        self._playwright: Union[Playwright, None] = None
-        self._browser: Union[Browser, None] = None
-        self._page: Union[Page, None] = None
-
-    async def _get_browser_and_page(self) -> tuple[Browser, Page]:
-        width, height = self.dimensions
-        launch_args = [f"--window-size={width},{height}"]
-        browser = await self.playwright.chromium.launch(headless=False, args=launch_args)
-        page = await browser.new_page()
-        await page.set_viewport_size({"width": width, "height": height})
-        await page.goto("https://www.bing.com")
-        return browser, page
-
-    async def __aenter__(self):
-        # Start Playwright and call the subclass hook for getting browser/page
-        self._playwright = await async_playwright().start()
-        self._browser, self._page = await self._get_browser_and_page()
-        return self
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        if self._browser:
-            await self._browser.close()
-        if self._playwright:
-            await self._playwright.stop()
-
-    @property
-    def playwright(self) -> Playwright:
-        assert self._playwright is not None
-        return self._playwright
-
-    @property
-    def browser(self) -> Browser:
-        assert self._browser is not None
-        return self._browser
-
-    @property
-    def page(self) -> Page:
-        assert self._page is not None
-        return self._page
-
-    @property
-    def environment(self) -> Environment:
-        return "browser"
-
-    @property
-    def dimensions(self) -> tuple[int, int]:
-        return (1024, 768)
-
-    async def screenshot(self) -> str:
-        """Capture only the viewport (not full_page)."""
-        png_bytes = await self.page.screenshot(full_page=False)
-        return base64.b64encode(png_bytes).decode("utf-8")
-
-    async def click(self, x: int, y: int, button: Button = "left") -> None:
-        playwright_button: Literal["left", "middle", "right"] = "left"
-
-        # Playwright only supports left, middle, right buttons
-        if button in ("left", "right", "middle"):
-            playwright_button = button  # type: ignore
-
-        await self.page.mouse.click(x, y, button=playwright_button)
-
-    async def double_click(self, x: int, y: int) -> None:
-        await self.page.mouse.dblclick(x, y)
-
-    async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
-        await self.page.mouse.move(x, y)
-        await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
-
-    async def type(self, text: str) -> None:
-        await self.page.keyboard.type(text)
-
-    async def wait(self) -> None:
-        await asyncio.sleep(1)
-
-    async def move(self, x: int, y: int) -> None:
-        await self.page.mouse.move(x, y)
-
-    async def keypress(self, keys: list[str]) -> None:
-        for key in keys:
-            mapped_key = CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key)
-            await self.page.keyboard.press(mapped_key)
-
-    async def drag(self, path: list[tuple[int, int]]) -> None:
-        if not path:
-            return
-        await self.page.mouse.move(path[0][0], path[0][1])
-        await self.page.mouse.down()
-        for px, py in path[1:]:
-            await self.page.mouse.move(px, py)
-        await self.page.mouse.up()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/tools/file_search.py b/tests/examples/tools/file_search.py
deleted file mode 100644
index 2a3d4cf1..00000000
--- a/tests/examples/tools/file_search.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import asyncio
-
-from agents import Agent, FileSearchTool, Runner, trace
-
-
-async def main():
-    agent = Agent(
-        name="File searcher",
-        instructions="You are a helpful agent.",
-        tools=[
-            FileSearchTool(
-                max_num_results=3,
-                vector_store_ids=["vs_67bf88953f748191be42b462090e53e7"],
-                include_search_results=True,
-            )
-        ],
-    )
-
-    with trace("File search example"):
-        result = await Runner.run(
-            agent, "Be concise, and tell me 1 sentence about Arrakis I might not know."
-        )
-        print(result.final_output)
-        """
-        Arrakis, the desert planet in Frank Herbert's "Dune," was inspired by the scarcity of water
-        as a metaphor for oil and other finite resources.
-        """
-
-        print("\n".join([str(out) for out in result.new_items]))
-        """
-        {"id":"...", "queries":["Arrakis"], "results":[...]}
-        """
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/examples/tools/web_search.py b/tests/examples/tools/web_search.py
deleted file mode 100644
index 35eeb680..00000000
--- a/tests/examples/tools/web_search.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import asyncio
-
-from agents import Agent, Runner, WebSearchTool, trace
-
-
-async def main():
-    agent = Agent(
-        name="Web searcher",
-        instructions="You are a helpful agent.",
-        tools=[WebSearchTool(user_location={"type": "approximate", "city": "New York"})],
-    )
-
-    with trace("Web search example"):
-        result = await Runner.run(
-            agent,
-            "search the web for 'local sports news' and give me 1 interesting update in a sentence.",
-        )
-        print(result.final_output)
-        # The New York Giants are reportedly pursuing quarterback Aaron Rodgers after his ...
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/tests/mkdocs.yml b/tests/mkdocs.yml
deleted file mode 100644
index 398fb74a..00000000
--- a/tests/mkdocs.yml
+++ /dev/null
@@ -1,121 +0,0 @@
-site_name: OpenAI Agents SDK
-theme:
-    name: material
-    features:
-        # Allows copying code blocks
-        - content.code.copy
-        # Allows selecting code blocks
-        - content.code.select
-        # Shows the current path in the sidebar
-        - navigation.path
-        # Shows sections in the sidebar
-        - navigation.sections
-        # Shows sections expanded by default
-        - navigation.expand
-        # Enables annotations in code blocks
-        - content.code.annotate
-    palette:
-        primary: black
-    logo: assets/logo.svg
-    favicon: images/favicon-platform.svg
-nav:
-    - Intro: index.md
-    - Quickstart: quickstart.md
-    - Documentation:
-          - agents.md
-          - running_agents.md
-          - results.md
-          - streaming.md
-          - tools.md
-          - handoffs.md
-          - tracing.md
-          - context.md
-          - guardrails.md
-          - multi_agent.md
-          - models.md
-          - config.md
-    - API Reference:
-          - Agents:
-                - ref/index.md
-                - ref/agent.md
-                - ref/run.md
-                - ref/tool.md
-                - ref/result.md
-                - ref/stream_events.md
-                - ref/handoffs.md
-                - ref/lifecycle.md
-                - ref/items.md
-                - ref/run_context.md
-                - ref/usage.md
-                - ref/exceptions.md
-                - ref/guardrail.md
-                - ref/model_settings.md
-                - ref/agent_output.md
-                - ref/function_schema.md
-                - ref/models/interface.md
-                - ref/models/openai_chatcompletions.md
-                - ref/models/openai_responses.md
-          - Tracing:
-                - ref/tracing/index.md
-                - ref/tracing/create.md
-                - ref/tracing/traces.md
-                - ref/tracing/spans.md
-                - ref/tracing/processor_interface.md
-                - ref/tracing/processors.md
-                - ref/tracing/scope.md
-                - ref/tracing/setup.md
-                - ref/tracing/span_data.md
-                - ref/tracing/util.md
-          - Extensions:
-                - ref/extensions/handoff_filters.md
-                - ref/extensions/handoff_prompt.md
-
-plugins:
-    - search
-    - mkdocstrings:
-          handlers:
-              python:
-                  paths: ["src/agents"]
-                  selection:
-                      docstring_style: google
-                  options:
-                      # Shows links to other members in signatures
-                      signature_crossrefs: true
-                      # Orders members by source order, rather than alphabetical
-                      members_order: source
-                      # Puts the signature on a separate line from the member name
-                      separate_signature: true
-                      # Shows type annotations in signatures
-                      show_signature_annotations: true
-                      # Makes the font sizes nicer
-                      heading_level: 3
-
-extra:
-    # Remove material generation message in footer
-    generator: false
-
-markdown_extensions:
-    - admonition
-    - pymdownx.details
-    - pymdownx.superfences
-    - attr_list
-    - md_in_html
-    - pymdownx.highlight:
-          anchor_linenums: true
-          line_spans: __span
-          pygments_lang_class: true
-    - pymdownx.inlinehilite
-    - pymdownx.snippets
-    - pymdownx.superfences
-
-validation:
-    omitted_files: warn
-    absolute_links: warn
-    unrecognized_links: warn
-    anchors: warn
-
-extra_css:
-    - stylesheets/extra.css
-
-watch:
-    - "src/agents"
diff --git a/tests/pyproject.toml b/tests/pyproject.toml
deleted file mode 100644
index 24e08eb7..00000000
--- a/tests/pyproject.toml
+++ /dev/null
@@ -1,119 +0,0 @@
-[project]
-name = "openai-agents"
-version = "0.0.1"
-description = "OpenAI Agents SDK"
-readme = "README.md"
-requires-python = ">=3.9"
-license = "MIT"
-authors = [
-    { name = "OpenAI", email = "support@openai.com" },
-]
-dependencies = [
-    "openai>=1.66.0",
-    "pydantic>=2.10, <3",
-    "griffe>=1.5.6, <2",
-    "typing-extensions>=4.12.2, <5",
-    "requests>=2.0, <3",
-    "types-requests>=2.0, <3",
-]
-classifiers = [
-    "Typing :: Typed",
-    "Intended Audience :: Developers",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Intended Audience :: Developers",
-    "Operating System :: OS Independent",
-    "Topic :: Software Development :: Libraries :: Python Modules",
-    "License :: OSI Approved :: MIT License"
-]
-
-[project.urls]
-Homepage = "https://github.com/openai/openai-agents-python"
-Repository = "https://github.com/openai/openai-agents-python"
-
-[dependency-groups]
-dev = [
-    "mypy",
-    "ruff==0.9.2",
-    "pytest",
-    "pytest-asyncio",
-    "pytest-mock>=3.14.0",
-    "rich",
-    "mkdocs>=1.6.0",
-    "mkdocs-material>=9.6.0",
-    "mkdocstrings[python]>=0.28.0",
-    "coverage>=7.6.12",
-    "playwright==1.50.0",
-]
-[tool.uv.workspace]
-members = ["agents"]
-
-[tool.uv.sources]
-agents = { workspace = true }
-
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-
-[tool.hatch.build.targets.wheel]
-packages = ["src/agents"]
-
-
-[tool.ruff]
-line-length = 100
-target-version = "py39"
-
-[tool.ruff.lint]
-select = [
-    "E",  # pycodestyle errors
-    "W",  # pycodestyle warnings
-    "F",  # pyflakes
-    "I",  # isort
-    "B",  # flake8-bugbear
-    "C4",  # flake8-comprehensions
-    "UP",  # pyupgrade
-]
-isort = { combine-as-imports = true, known-first-party = ["agents"] }
-
-[tool.ruff.lint.pydocstyle]
-convention = "google"
-
-[tool.ruff.lint.per-file-ignores]
-"examples/**/*.py" = ["E501"]
-
-[tool.mypy]
-strict = true
-disallow_incomplete_defs = false
-disallow_untyped_defs = false
-disallow_untyped_calls = false
-
-[tool.coverage.run]
-source = [
-    "tests",
-    "src/agents",
-]
-
-[tool.coverage.report]
-show_missing = true
-sort = "-Cover"
-exclude_also = [
-    # This is only executed while typechecking
-    "if TYPE_CHECKING:",
-    "@abc.abstractmethod",
-    "raise NotImplementedError",
-    "logger.debug",
-]
-
-[tool.pytest.ini_options]
-asyncio_mode = "auto"  
-asyncio_default_fixture_loop_scope = "session"
-filterwarnings = [
-    # This is a warning that is expected to happen: we have an async filter that raises an exception
-    "ignore:coroutine 'test_async_input_filter_fails.<locals>.invalid_input_filter' was never awaited:RuntimeWarning",
-]
-markers = [
-    "allow_call_model_methods: mark test as allowing calls to real model implementations",
-]
\ No newline at end of file
diff --git a/tests/src/agents/__init__.py b/tests/src/agents/__init__.py
deleted file mode 100644
index 69c500ab..00000000
--- a/tests/src/agents/__init__.py
+++ /dev/null
@@ -1,223 +0,0 @@
-import logging
-import sys
-from typing import Literal
-
-from openai import AsyncOpenAI
-
-from . import _config
-from .agent import Agent
-from .agent_output import AgentOutputSchema
-from .computer import AsyncComputer, Button, Computer, Environment
-from .exceptions import (
-    AgentsException,
-    InputGuardrailTripwireTriggered,
-    MaxTurnsExceeded,
-    ModelBehaviorError,
-    OutputGuardrailTripwireTriggered,
-    UserError,
-)
-from .guardrail import (
-    GuardrailFunctionOutput,
-    InputGuardrail,
-    InputGuardrailResult,
-    OutputGuardrail,
-    OutputGuardrailResult,
-    input_guardrail,
-    output_guardrail,
-)
-from .handoffs import Handoff, HandoffInputData, HandoffInputFilter, handoff
-from .items import (
-    HandoffCallItem,
-    HandoffOutputItem,
-    ItemHelpers,
-    MessageOutputItem,
-    ModelResponse,
-    ReasoningItem,
-    RunItem,
-    ToolCallItem,
-    ToolCallOutputItem,
-    TResponseInputItem,
-)
-from .lifecycle import AgentHooks, RunHooks
-from .model_settings import ModelSettings
-from .models.interface import Model, ModelProvider, ModelTracing
-from .models.openai_chatcompletions import OpenAIChatCompletionsModel
-from .models.openai_provider import OpenAIProvider
-from .models.openai_responses import OpenAIResponsesModel
-from .result import RunResult, RunResultStreaming
-from .run import RunConfig, Runner
-from .run_context import RunContextWrapper, TContext
-from .stream_events import (
-    AgentUpdatedStreamEvent,
-    RawResponsesStreamEvent,
-    RunItemStreamEvent,
-    StreamEvent,
-)
-from .tool import (
-    ComputerTool,
-    FileSearchTool,
-    FunctionTool,
-    Tool,
-    WebSearchTool,
-    default_tool_error_function,
-    function_tool,
-)
-from .tracing import (
-    AgentSpanData,
-    CustomSpanData,
-    FunctionSpanData,
-    GenerationSpanData,
-    GuardrailSpanData,
-    HandoffSpanData,
-    Span,
-    SpanData,
-    SpanError,
-    Trace,
-    add_trace_processor,
-    agent_span,
-    custom_span,
-    function_span,
-    gen_span_id,
-    gen_trace_id,
-    generation_span,
-    get_current_span,
-    get_current_trace,
-    guardrail_span,
-    handoff_span,
-    set_trace_processors,
-    set_tracing_disabled,
-    set_tracing_export_api_key,
-    trace,
-)
-from .usage import Usage
-
-
-def set_default_openai_key(key: str) -> None:
-    """Set the default OpenAI API key to use for LLM requests and tracing. This is only necessary if
-    the OPENAI_API_KEY environment variable is not already set.
-
-    If provided, this key will be used instead of the OPENAI_API_KEY environment variable.
-    """
-    _config.set_default_openai_key(key)
-
-
-def set_default_openai_client(client: AsyncOpenAI, use_for_tracing: bool = True) -> None:
-    """Set the default OpenAI client to use for LLM requests and/or tracing. If provided, this
-    client will be used instead of the default OpenAI client.
-
-    Args:
-        client: The OpenAI client to use.
-        use_for_tracing: Whether to use the API key from this client for uploading traces. If False,
-            you'll either need to set the OPENAI_API_KEY environment variable or call
-            set_tracing_export_api_key() with the API key you want to use for tracing.
-    """
-    _config.set_default_openai_client(client, use_for_tracing)
-
-
-def set_default_openai_api(api: Literal["chat_completions", "responses"]) -> None:
-    """Set the default API to use for OpenAI LLM requests. By default, we will use the responses API
-    but you can set this to use the chat completions API instead.
-    """
-    _config.set_default_openai_api(api)
-
-
-def enable_verbose_stdout_logging():
-    """Enables verbose logging to stdout. This is useful for debugging."""
-    for name in ["openai.agents", "openai.agents.tracing"]:
-        logger = logging.getLogger(name)
-        logger.setLevel(logging.DEBUG)
-        logger.addHandler(logging.StreamHandler(sys.stdout))
-
-
-__all__ = [
-    "Agent",
-    "Runner",
-    "Model",
-    "ModelProvider",
-    "ModelTracing",
-    "ModelSettings",
-    "OpenAIChatCompletionsModel",
-    "OpenAIProvider",
-    "OpenAIResponsesModel",
-    "AgentOutputSchema",
-    "Computer",
-    "AsyncComputer",
-    "Environment",
-    "Button",
-    "AgentsException",
-    "InputGuardrailTripwireTriggered",
-    "OutputGuardrailTripwireTriggered",
-    "MaxTurnsExceeded",
-    "ModelBehaviorError",
-    "UserError",
-    "InputGuardrail",
-    "InputGuardrailResult",
-    "OutputGuardrail",
-    "OutputGuardrailResult",
-    "GuardrailFunctionOutput",
-    "input_guardrail",
-    "output_guardrail",
-    "handoff",
-    "Handoff",
-    "HandoffInputData",
-    "HandoffInputFilter",
-    "TResponseInputItem",
-    "MessageOutputItem",
-    "ModelResponse",
-    "RunItem",
-    "HandoffCallItem",
-    "HandoffOutputItem",
-    "ToolCallItem",
-    "ToolCallOutputItem",
-    "ReasoningItem",
-    "ModelResponse",
-    "ItemHelpers",
-    "RunHooks",
-    "AgentHooks",
-    "RunContextWrapper",
-    "TContext",
-    "RunResult",
-    "RunResultStreaming",
-    "RunConfig",
-    "RawResponsesStreamEvent",
-    "RunItemStreamEvent",
-    "AgentUpdatedStreamEvent",
-    "StreamEvent",
-    "FunctionTool",
-    "ComputerTool",
-    "FileSearchTool",
-    "Tool",
-    "WebSearchTool",
-    "function_tool",
-    "Usage",
-    "add_trace_processor",
-    "agent_span",
-    "custom_span",
-    "function_span",
-    "generation_span",
-    "get_current_span",
-    "get_current_trace",
-    "guardrail_span",
-    "handoff_span",
-    "set_trace_processors",
-    "set_tracing_disabled",
-    "trace",
-    "Trace",
-    "SpanError",
-    "Span",
-    "SpanData",
-    "AgentSpanData",
-    "CustomSpanData",
-    "FunctionSpanData",
-    "GenerationSpanData",
-    "GuardrailSpanData",
-    "HandoffSpanData",
-    "set_default_openai_key",
-    "set_default_openai_client",
-    "set_default_openai_api",
-    "set_tracing_export_api_key",
-    "enable_verbose_stdout_logging",
-    "gen_trace_id",
-    "gen_span_id",
-    "default_tool_error_function",
-]
diff --git a/tests/src/agents/_config.py b/tests/src/agents/_config.py
deleted file mode 100644
index 55ded64d..00000000
--- a/tests/src/agents/_config.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from openai import AsyncOpenAI
-from typing_extensions import Literal
-
-from .models import _openai_shared
-from .tracing import set_tracing_export_api_key
-
-
-def set_default_openai_key(key: str) -> None:
-    set_tracing_export_api_key(key)
-    _openai_shared.set_default_openai_key(key)
-
-
-def set_default_openai_client(client: AsyncOpenAI, use_for_tracing: bool) -> None:
-    if use_for_tracing:
-        set_tracing_export_api_key(client.api_key)
-    _openai_shared.set_default_openai_client(client)
-
-
-def set_default_openai_api(api: Literal["chat_completions", "responses"]) -> None:
-    if api == "chat_completions":
-        _openai_shared.set_use_responses_by_default(False)
-    else:
-        _openai_shared.set_use_responses_by_default(True)
diff --git a/tests/src/agents/_debug.py b/tests/src/agents/_debug.py
deleted file mode 100644
index 4da91be4..00000000
--- a/tests/src/agents/_debug.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import os
-
-
-def _debug_flag_enabled(flag: str) -> bool:
-    flag_value = os.getenv(flag)
-    return flag_value is not None and (flag_value == "1" or flag_value.lower() == "true")
-
-
-DONT_LOG_MODEL_DATA = _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_MODEL_DATA")
-"""By default we don't log LLM inputs/outputs, to prevent exposing sensitive information. Set this
-flag to enable logging them.
-"""
-
-DONT_LOG_TOOL_DATA = _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_TOOL_DATA")
-"""By default we don't log tool call inputs/outputs, to prevent exposing sensitive information. Set
-this flag to enable logging them.
-"""
diff --git a/tests/src/agents/_run_impl.py b/tests/src/agents/_run_impl.py
deleted file mode 100644
index 112819c8..00000000
--- a/tests/src/agents/_run_impl.py
+++ /dev/null
@@ -1,792 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
-
-from openai.types.responses import (
-    ResponseComputerToolCall,
-    ResponseFileSearchToolCall,
-    ResponseFunctionToolCall,
-    ResponseFunctionWebSearch,
-    ResponseOutputMessage,
-)
-from openai.types.responses.response_computer_tool_call import (
-    ActionClick,
-    ActionDoubleClick,
-    ActionDrag,
-    ActionKeypress,
-    ActionMove,
-    ActionScreenshot,
-    ActionScroll,
-    ActionType,
-    ActionWait,
-)
-from openai.types.responses.response_input_param import ComputerCallOutput
-from openai.types.responses.response_output_item import Reasoning
-
-from . import _utils
-from .agent import Agent
-from .agent_output import AgentOutputSchema
-from .computer import AsyncComputer, Computer
-from .exceptions import AgentsException, ModelBehaviorError, UserError
-from .guardrail import InputGuardrail, InputGuardrailResult, OutputGuardrail, OutputGuardrailResult
-from .handoffs import Handoff, HandoffInputData
-from .items import (
-    HandoffCallItem,
-    HandoffOutputItem,
-    ItemHelpers,
-    MessageOutputItem,
-    ModelResponse,
-    ReasoningItem,
-    RunItem,
-    ToolCallItem,
-    ToolCallOutputItem,
-    TResponseInputItem,
-)
-from .lifecycle import RunHooks
-from .logger import logger
-from .models.interface import ModelTracing
-from .run_context import RunContextWrapper, TContext
-from .stream_events import RunItemStreamEvent, StreamEvent
-from .tool import ComputerTool, FunctionTool
-from .tracing import (
-    SpanError,
-    Trace,
-    function_span,
-    get_current_trace,
-    guardrail_span,
-    handoff_span,
-    trace,
-)
-
-if TYPE_CHECKING:
-    from .run import RunConfig
-
-
-class QueueCompleteSentinel:
-    pass
-
-
-QUEUE_COMPLETE_SENTINEL = QueueCompleteSentinel()
-
-
-@dataclass
-class ToolRunHandoff:
-    handoff: Handoff
-    tool_call: ResponseFunctionToolCall
-
-
-@dataclass
-class ToolRunFunction:
-    tool_call: ResponseFunctionToolCall
-    function_tool: FunctionTool
-
-
-@dataclass
-class ToolRunComputerAction:
-    tool_call: ResponseComputerToolCall
-    computer_tool: ComputerTool
-
-
-@dataclass
-class ProcessedResponse:
-    new_items: list[RunItem]
-    handoffs: list[ToolRunHandoff]
-    functions: list[ToolRunFunction]
-    computer_actions: list[ToolRunComputerAction]
-
-    def has_tools_to_run(self) -> bool:
-        # Handoffs, functions and computer actions need local processing
-        # Hosted tools have already run, so there's nothing to do.
-        return any(
-            [
-                self.handoffs,
-                self.functions,
-                self.computer_actions,
-            ]
-        )
-
-
-@dataclass
-class NextStepHandoff:
-    new_agent: Agent[Any]
-
-
-@dataclass
-class NextStepFinalOutput:
-    output: Any
-
-
-@dataclass
-class NextStepRunAgain:
-    pass
-
-
-@dataclass
-class SingleStepResult:
-    original_input: str | list[TResponseInputItem]
-    """The input items i.e. the items before run() was called. May be mutated by handoff input
-    filters."""
-
-    model_response: ModelResponse
-    """The model response for the current step."""
-
-    pre_step_items: list[RunItem]
-    """Items generated before the current step."""
-
-    new_step_items: list[RunItem]
-    """Items generated during this current step."""
-
-    next_step: NextStepHandoff | NextStepFinalOutput | NextStepRunAgain
-    """The next step to take."""
-
-    @property
-    def generated_items(self) -> list[RunItem]:
-        """Items generated during the agent run (i.e. everything generated after
-        `original_input`)."""
-        return self.pre_step_items + self.new_step_items
-
-
-def get_model_tracing_impl(
-    tracing_disabled: bool, trace_include_sensitive_data: bool
-) -> ModelTracing:
-    if tracing_disabled:
-        return ModelTracing.DISABLED
-    elif trace_include_sensitive_data:
-        return ModelTracing.ENABLED
-    else:
-        return ModelTracing.ENABLED_WITHOUT_DATA
-
-
-class RunImpl:
-    @classmethod
-    async def execute_tools_and_side_effects(
-        cls,
-        *,
-        agent: Agent[TContext],
-        # The original input to the Runner
-        original_input: str | list[TResponseInputItem],
-        # Eveything generated by Runner since the original input, but before the current step
-        pre_step_items: list[RunItem],
-        new_response: ModelResponse,
-        processed_response: ProcessedResponse,
-        output_schema: AgentOutputSchema | None,
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        run_config: RunConfig,
-    ) -> SingleStepResult:
-        # Make a copy of the generated items
-        pre_step_items = list(pre_step_items)
-
-        new_step_items: list[RunItem] = []
-        new_step_items.extend(processed_response.new_items)
-
-        # First, lets run the tool calls - function tools and computer actions
-        function_results, computer_results = await asyncio.gather(
-            cls.execute_function_tool_calls(
-                agent=agent,
-                tool_runs=processed_response.functions,
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-                config=run_config,
-            ),
-            cls.execute_computer_actions(
-                agent=agent,
-                actions=processed_response.computer_actions,
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-                config=run_config,
-            ),
-        )
-        new_step_items.extend(function_results)
-        new_step_items.extend(computer_results)
-
-        # Second, check if there are any handoffs
-        if run_handoffs := processed_response.handoffs:
-            return await cls.execute_handoffs(
-                agent=agent,
-                original_input=original_input,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                new_response=new_response,
-                run_handoffs=run_handoffs,
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-                run_config=run_config,
-            )
-
-        # Now we can check if the model also produced a final output
-        message_items = [item for item in new_step_items if isinstance(item, MessageOutputItem)]
-
-        # We'll use the last content output as the final output
-        potential_final_output_text = (
-            ItemHelpers.extract_last_text(message_items[-1].raw_item) if message_items else None
-        )
-
-        # There are two possibilities that lead to a final output:
-        # 1. Structured output schema => always leads to a final output
-        # 2. Plain text output schema => only leads to a final output if there are no tool calls
-        if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
-            final_output = output_schema.validate_json(potential_final_output_text)
-            return await cls.execute_final_output(
-                agent=agent,
-                original_input=original_input,
-                new_response=new_response,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                final_output=final_output,
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-            )
-        elif (
-            not output_schema or output_schema.is_plain_text()
-        ) and not processed_response.has_tools_to_run():
-            return await cls.execute_final_output(
-                agent=agent,
-                original_input=original_input,
-                new_response=new_response,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                final_output=potential_final_output_text or "",
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-            )
-        else:
-            # If there's no final output, we can just run again
-            return SingleStepResult(
-                original_input=original_input,
-                model_response=new_response,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                next_step=NextStepRunAgain(),
-            )
-
-    @classmethod
-    def process_model_response(
-        cls,
-        *,
-        agent: Agent[Any],
-        response: ModelResponse,
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-    ) -> ProcessedResponse:
-        items: list[RunItem] = []
-
-        run_handoffs = []
-        functions = []
-        computer_actions = []
-
-        handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
-        function_map = {tool.name: tool for tool in agent.tools if isinstance(tool, FunctionTool)}
-        computer_tool = next((tool for tool in agent.tools if isinstance(tool, ComputerTool)), None)
-
-        for output in response.output:
-            if isinstance(output, ResponseOutputMessage):
-                items.append(MessageOutputItem(raw_item=output, agent=agent))
-            elif isinstance(output, ResponseFileSearchToolCall):
-                items.append(ToolCallItem(raw_item=output, agent=agent))
-            elif isinstance(output, ResponseFunctionWebSearch):
-                items.append(ToolCallItem(raw_item=output, agent=agent))
-            elif isinstance(output, Reasoning):
-                items.append(ReasoningItem(raw_item=output, agent=agent))
-            elif isinstance(output, ResponseComputerToolCall):
-                items.append(ToolCallItem(raw_item=output, agent=agent))
-                if not computer_tool:
-                    _utils.attach_error_to_current_span(
-                        SpanError(
-                            message="Computer tool not found",
-                            data={},
-                        )
-                    )
-                    raise ModelBehaviorError(
-                        "Model produced computer action without a computer tool."
-                    )
-                computer_actions.append(
-                    ToolRunComputerAction(tool_call=output, computer_tool=computer_tool)
-                )
-            elif not isinstance(output, ResponseFunctionToolCall):
-                logger.warning(f"Unexpected output type, ignoring: {type(output)}")
-                continue
-
-            # At this point we know it's a function tool call
-            if not isinstance(output, ResponseFunctionToolCall):
-                continue
-
-            # Handoffs
-            if output.name in handoff_map:
-                items.append(HandoffCallItem(raw_item=output, agent=agent))
-                handoff = ToolRunHandoff(
-                    tool_call=output,
-                    handoff=handoff_map[output.name],
-                )
-                run_handoffs.append(handoff)
-            # Regular function tool call
-            else:
-                if output.name not in function_map:
-                    _utils.attach_error_to_current_span(
-                        SpanError(
-                            message="Tool not found",
-                            data={"tool_name": output.name},
-                        )
-                    )
-                    raise ModelBehaviorError(f"Tool {output.name} not found in agent {agent.name}")
-                items.append(ToolCallItem(raw_item=output, agent=agent))
-                functions.append(
-                    ToolRunFunction(
-                        tool_call=output,
-                        function_tool=function_map[output.name],
-                    )
-                )
-
-        return ProcessedResponse(
-            new_items=items,
-            handoffs=run_handoffs,
-            functions=functions,
-            computer_actions=computer_actions,
-        )
-
-    @classmethod
-    async def execute_function_tool_calls(
-        cls,
-        *,
-        agent: Agent[TContext],
-        tool_runs: list[ToolRunFunction],
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        config: RunConfig,
-    ) -> list[RunItem]:
-        async def run_single_tool(
-            func_tool: FunctionTool, tool_call: ResponseFunctionToolCall
-        ) -> str:
-            with function_span(func_tool.name) as span_fn:
-                if config.trace_include_sensitive_data:
-                    span_fn.span_data.input = tool_call.arguments
-                try:
-                    _, _, result = await asyncio.gather(
-                        hooks.on_tool_start(context_wrapper, agent, func_tool),
-                        (
-                            agent.hooks.on_tool_start(context_wrapper, agent, func_tool)
-                            if agent.hooks
-                            else _utils.noop_coroutine()
-                        ),
-                        func_tool.on_invoke_tool(context_wrapper, tool_call.arguments),
-                    )
-
-                    await asyncio.gather(
-                        hooks.on_tool_end(context_wrapper, agent, func_tool, result),
-                        (
-                            agent.hooks.on_tool_end(context_wrapper, agent, func_tool, result)
-                            if agent.hooks
-                            else _utils.noop_coroutine()
-                        ),
-                    )
-                except Exception as e:
-                    _utils.attach_error_to_current_span(
-                        SpanError(
-                            message="Error running tool",
-                            data={"tool_name": func_tool.name, "error": str(e)},
-                        )
-                    )
-                    if isinstance(e, AgentsException):
-                        raise e
-                    raise UserError(f"Error running tool {func_tool.name}: {e}") from e
-
-                if config.trace_include_sensitive_data:
-                    span_fn.span_data.output = result
-            return result
-
-        tasks = []
-        for tool_run in tool_runs:
-            function_tool = tool_run.function_tool
-            tasks.append(run_single_tool(function_tool, tool_run.tool_call))
-
-        results = await asyncio.gather(*tasks)
-
-        return [
-            ToolCallOutputItem(
-                output=str(result),
-                raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, str(result)),
-                agent=agent,
-            )
-            for tool_run, result in zip(tool_runs, results)
-        ]
-
-    @classmethod
-    async def execute_computer_actions(
-        cls,
-        *,
-        agent: Agent[TContext],
-        actions: list[ToolRunComputerAction],
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        config: RunConfig,
-    ) -> list[RunItem]:
-        results: list[RunItem] = []
-        # Need to run these serially, because each action can affect the computer state
-        for action in actions:
-            results.append(
-                await ComputerAction.execute(
-                    agent=agent,
-                    action=action,
-                    hooks=hooks,
-                    context_wrapper=context_wrapper,
-                    config=config,
-                )
-            )
-
-        return results
-
-    @classmethod
-    async def execute_handoffs(
-        cls,
-        *,
-        agent: Agent[TContext],
-        original_input: str | list[TResponseInputItem],
-        pre_step_items: list[RunItem],
-        new_step_items: list[RunItem],
-        new_response: ModelResponse,
-        run_handoffs: list[ToolRunHandoff],
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        run_config: RunConfig,
-    ) -> SingleStepResult:
-        # If there is more than one handoff, add tool responses that reject those handoffs
-        if len(run_handoffs) > 1:
-            output_message = "Multiple handoffs detected, ignoring this one."
-            new_step_items.extend(
-                [
-                    ToolCallOutputItem(
-                        output=output_message,
-                        raw_item=ItemHelpers.tool_call_output_item(
-                            handoff.tool_call, output_message
-                        ),
-                        agent=agent,
-                    )
-                    for handoff in run_handoffs[1:]
-                ]
-            )
-
-        actual_handoff = run_handoffs[0]
-        with handoff_span(from_agent=agent.name) as span_handoff:
-            handoff = actual_handoff.handoff
-            new_agent: Agent[Any] = await handoff.on_invoke_handoff(
-                context_wrapper, actual_handoff.tool_call.arguments
-            )
-            span_handoff.span_data.to_agent = new_agent.name
-
-            # Append a tool output item for the handoff
-            new_step_items.append(
-                HandoffOutputItem(
-                    agent=agent,
-                    raw_item=ItemHelpers.tool_call_output_item(
-                        actual_handoff.tool_call,
-                        handoff.get_transfer_message(new_agent),
-                    ),
-                    source_agent=agent,
-                    target_agent=new_agent,
-                )
-            )
-
-            # Execute handoff hooks
-            await asyncio.gather(
-                hooks.on_handoff(
-                    context=context_wrapper,
-                    from_agent=agent,
-                    to_agent=new_agent,
-                ),
-                (
-                    agent.hooks.on_handoff(
-                        context_wrapper,
-                        agent=new_agent,
-                        source=agent,
-                    )
-                    if agent.hooks
-                    else _utils.noop_coroutine()
-                ),
-            )
-
-            # If there's an input filter, filter the input for the next agent
-            input_filter = handoff.input_filter or (
-                run_config.handoff_input_filter if run_config else None
-            )
-            if input_filter:
-                logger.debug("Filtering inputs for handoff")
-                handoff_input_data = HandoffInputData(
-                    input_history=tuple(original_input)
-                    if isinstance(original_input, list)
-                    else original_input,
-                    pre_handoff_items=tuple(pre_step_items),
-                    new_items=tuple(new_step_items),
-                )
-                if not callable(input_filter):
-                    _utils.attach_error_to_span(
-                        span_handoff,
-                        SpanError(
-                            message="Invalid input filter",
-                            data={"details": "not callable()"},
-                        ),
-                    )
-                    raise UserError(f"Invalid input filter: {input_filter}")
-                filtered = input_filter(handoff_input_data)
-                if not isinstance(filtered, HandoffInputData):
-                    _utils.attach_error_to_span(
-                        span_handoff,
-                        SpanError(
-                            message="Invalid input filter result",
-                            data={"details": "not a HandoffInputData"},
-                        ),
-                    )
-                    raise UserError(f"Invalid input filter result: {filtered}")
-
-                original_input = (
-                    filtered.input_history
-                    if isinstance(filtered.input_history, str)
-                    else list(filtered.input_history)
-                )
-                pre_step_items = list(filtered.pre_handoff_items)
-                new_step_items = list(filtered.new_items)
-
-        return SingleStepResult(
-            original_input=original_input,
-            model_response=new_response,
-            pre_step_items=pre_step_items,
-            new_step_items=new_step_items,
-            next_step=NextStepHandoff(new_agent),
-        )
-
-    @classmethod
-    async def execute_final_output(
-        cls,
-        *,
-        agent: Agent[TContext],
-        original_input: str | list[TResponseInputItem],
-        new_response: ModelResponse,
-        pre_step_items: list[RunItem],
-        new_step_items: list[RunItem],
-        final_output: Any,
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-    ) -> SingleStepResult:
-        # Run the on_end hooks
-        await cls.run_final_output_hooks(agent, hooks, context_wrapper, final_output)
-
-        return SingleStepResult(
-            original_input=original_input,
-            model_response=new_response,
-            pre_step_items=pre_step_items,
-            new_step_items=new_step_items,
-            next_step=NextStepFinalOutput(final_output),
-        )
-
-    @classmethod
-    async def run_final_output_hooks(
-        cls,
-        agent: Agent[TContext],
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        final_output: Any,
-    ):
-        await asyncio.gather(
-            hooks.on_agent_end(context_wrapper, agent, final_output),
-            agent.hooks.on_end(context_wrapper, agent, final_output)
-            if agent.hooks
-            else _utils.noop_coroutine(),
-        )
-
-    @classmethod
-    async def run_single_input_guardrail(
-        cls,
-        agent: Agent[Any],
-        guardrail: InputGuardrail[TContext],
-        input: str | list[TResponseInputItem],
-        context: RunContextWrapper[TContext],
-    ) -> InputGuardrailResult:
-        with guardrail_span(guardrail.get_name()) as span_guardrail:
-            result = await guardrail.run(agent, input, context)
-            span_guardrail.span_data.triggered = result.output.tripwire_triggered
-            return result
-
-    @classmethod
-    async def run_single_output_guardrail(
-        cls,
-        guardrail: OutputGuardrail[TContext],
-        agent: Agent[Any],
-        agent_output: Any,
-        context: RunContextWrapper[TContext],
-    ) -> OutputGuardrailResult:
-        with guardrail_span(guardrail.get_name()) as span_guardrail:
-            result = await guardrail.run(agent=agent, agent_output=agent_output, context=context)
-            span_guardrail.span_data.triggered = result.output.tripwire_triggered
-            return result
-
-    @classmethod
-    def stream_step_result_to_queue(
-        cls,
-        step_result: SingleStepResult,
-        queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel],
-    ):
-        for item in step_result.new_step_items:
-            if isinstance(item, MessageOutputItem):
-                event = RunItemStreamEvent(item=item, name="message_output_created")
-            elif isinstance(item, HandoffCallItem):
-                event = RunItemStreamEvent(item=item, name="handoff_requested")
-            elif isinstance(item, HandoffOutputItem):
-                event = RunItemStreamEvent(item=item, name="handoff_occured")
-            elif isinstance(item, ToolCallItem):
-                event = RunItemStreamEvent(item=item, name="tool_called")
-            elif isinstance(item, ToolCallOutputItem):
-                event = RunItemStreamEvent(item=item, name="tool_output")
-            elif isinstance(item, ReasoningItem):
-                event = RunItemStreamEvent(item=item, name="reasoning_item_created")
-            else:
-                logger.warning(f"Unexpected item type: {type(item)}")
-                event = None
-
-            if event:
-                queue.put_nowait(event)
-
-
-class TraceCtxManager:
-    """Creates a trace only if there is no current trace, and manages the trace lifecycle."""
-
-    def __init__(
-        self,
-        workflow_name: str,
-        trace_id: str | None,
-        group_id: str | None,
-        metadata: dict[str, Any] | None,
-        disabled: bool,
-    ):
-        self.trace: Trace | None = None
-        self.workflow_name = workflow_name
-        self.trace_id = trace_id
-        self.group_id = group_id
-        self.metadata = metadata
-        self.disabled = disabled
-
-    def __enter__(self) -> TraceCtxManager:
-        current_trace = get_current_trace()
-        if not current_trace:
-            self.trace = trace(
-                workflow_name=self.workflow_name,
-                trace_id=self.trace_id,
-                group_id=self.group_id,
-                metadata=self.metadata,
-                disabled=self.disabled,
-            )
-            self.trace.start(mark_as_current=True)
-
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self.trace:
-            self.trace.finish(reset_current=True)
-
-
-class ComputerAction:
-    @classmethod
-    async def execute(
-        cls,
-        *,
-        agent: Agent[TContext],
-        action: ToolRunComputerAction,
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        config: RunConfig,
-    ) -> RunItem:
-        output_func = (
-            cls._get_screenshot_async(action.computer_tool.computer, action.tool_call)
-            if isinstance(action.computer_tool.computer, AsyncComputer)
-            else cls._get_screenshot_sync(action.computer_tool.computer, action.tool_call)
-        )
-
-        _, _, output = await asyncio.gather(
-            hooks.on_tool_start(context_wrapper, agent, action.computer_tool),
-            (
-                agent.hooks.on_tool_start(context_wrapper, agent, action.computer_tool)
-                if agent.hooks
-                else _utils.noop_coroutine()
-            ),
-            output_func,
-        )
-
-        await asyncio.gather(
-            hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output),
-            (
-                agent.hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output)
-                if agent.hooks
-                else _utils.noop_coroutine()
-            ),
-        )
-
-        # TODO: don't send a screenshot every single time, use references
-        image_url = f"data:image/png;base64,{output}"
-        return ToolCallOutputItem(
-            agent=agent,
-            output=image_url,
-            raw_item=ComputerCallOutput(
-                call_id=action.tool_call.call_id,
-                output={
-                    "type": "computer_screenshot",
-                    "image_url": image_url,
-                },
-                type="computer_call_output",
-            ),
-        )
-
-    @classmethod
-    async def _get_screenshot_sync(
-        cls,
-        computer: Computer,
-        tool_call: ResponseComputerToolCall,
-    ) -> str:
-        action = tool_call.action
-        if isinstance(action, ActionClick):
-            computer.click(action.x, action.y, action.button)
-        elif isinstance(action, ActionDoubleClick):
-            computer.double_click(action.x, action.y)
-        elif isinstance(action, ActionDrag):
-            computer.drag([(p.x, p.y) for p in action.path])
-        elif isinstance(action, ActionKeypress):
-            computer.keypress(action.keys)
-        elif isinstance(action, ActionMove):
-            computer.move(action.x, action.y)
-        elif isinstance(action, ActionScreenshot):
-            computer.screenshot()
-        elif isinstance(action, ActionScroll):
-            computer.scroll(action.x, action.y, action.scroll_x, action.scroll_y)
-        elif isinstance(action, ActionType):
-            computer.type(action.text)
-        elif isinstance(action, ActionWait):
-            computer.wait()
-
-        return computer.screenshot()
-
-    @classmethod
-    async def _get_screenshot_async(
-        cls,
-        computer: AsyncComputer,
-        tool_call: ResponseComputerToolCall,
-    ) -> str:
-        action = tool_call.action
-        if isinstance(action, ActionClick):
-            await computer.click(action.x, action.y, action.button)
-        elif isinstance(action, ActionDoubleClick):
-            await computer.double_click(action.x, action.y)
-        elif isinstance(action, ActionDrag):
-            await computer.drag([(p.x, p.y) for p in action.path])
-        elif isinstance(action, ActionKeypress):
-            await computer.keypress(action.keys)
-        elif isinstance(action, ActionMove):
-            await computer.move(action.x, action.y)
-        elif isinstance(action, ActionScreenshot):
-            await computer.screenshot()
-        elif isinstance(action, ActionScroll):
-            await computer.scroll(action.x, action.y, action.scroll_x, action.scroll_y)
-        elif isinstance(action, ActionType):
-            await computer.type(action.text)
-        elif isinstance(action, ActionWait):
-            await computer.wait()
-
-        return await computer.screenshot()
diff --git a/tests/src/agents/_utils.py b/tests/src/agents/_utils.py
deleted file mode 100644
index 2a0293a6..00000000
--- a/tests/src/agents/_utils.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import annotations
-
-import re
-from collections.abc import Awaitable
-from typing import Any, Literal, Union
-
-from pydantic import TypeAdapter, ValidationError
-from typing_extensions import TypeVar
-
-from .exceptions import ModelBehaviorError
-from .logger import logger
-from .tracing import Span, SpanError, get_current_span
-
-T = TypeVar("T")
-
-MaybeAwaitable = Union[Awaitable[T], T]
-
-
-def transform_string_function_style(name: str) -> str:
-    # Replace spaces with underscores
-    name = name.replace(" ", "_")
-
-    # Replace non-alphanumeric characters with underscores
-    name = re.sub(r"[^a-zA-Z0-9]", "_", name)
-
-    return name.lower()
-
-
-def validate_json(json_str: str, type_adapter: TypeAdapter[T], partial: bool) -> T:
-    partial_setting: bool | Literal["off", "on", "trailing-strings"] = (
-        "trailing-strings" if partial else False
-    )
-    try:
-        validated = type_adapter.validate_json(json_str, experimental_allow_partial=partial_setting)
-        return validated
-    except ValidationError as e:
-        attach_error_to_current_span(
-            SpanError(
-                message="Invalid JSON provided",
-                data={},
-            )
-        )
-        raise ModelBehaviorError(
-            f"Invalid JSON when parsing {json_str} for {type_adapter}; {e}"
-        ) from e
-
-
-def attach_error_to_span(span: Span[Any], error: SpanError) -> None:
-    span.set_error(error)
-
-
-def attach_error_to_current_span(error: SpanError) -> None:
-    span = get_current_span()
-    if span:
-        attach_error_to_span(span, error)
-    else:
-        logger.warning(f"No span to add error {error} to")
-
-
-async def noop_coroutine() -> None:
-    pass
diff --git a/tests/src/agents/agent.py b/tests/src/agents/agent.py
deleted file mode 100644
index 61c0a896..00000000
--- a/tests/src/agents/agent.py
+++ /dev/null
@@ -1,159 +0,0 @@
-from __future__ import annotations
-
-import dataclasses
-import inspect
-from collections.abc import Awaitable
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Callable, Generic, cast
-
-from . import _utils
-from ._utils import MaybeAwaitable
-from .guardrail import InputGuardrail, OutputGuardrail
-from .handoffs import Handoff
-from .items import ItemHelpers
-from .logger import logger
-from .model_settings import ModelSettings
-from .models.interface import Model
-from .run_context import RunContextWrapper, TContext
-from .tool import Tool, function_tool
-
-if TYPE_CHECKING:
-    from .lifecycle import AgentHooks
-    from .result import RunResult
-
-
-@dataclass
-class Agent(Generic[TContext]):
-    """An agent is an AI model configured with instructions, tools, guardrails, handoffs and more.
-
-    We strongly recommend passing `instructions`, which is the "system prompt" for the agent. In
-    addition, you can pass `description`, which is a human-readable description of the agent, used
-    when the agent is used inside tools/handoffs.
-
-    Agents are generic on the context type. The context is a (mutable) object you create. It is
-    passed to tool functions, handoffs, guardrails, etc.
-    """
-
-    name: str
-    """The name of the agent."""
-
-    instructions: (
-        str
-        | Callable[
-            [RunContextWrapper[TContext], Agent[TContext]],
-            MaybeAwaitable[str],
-        ]
-        | None
-    ) = None
-    """The instructions for the agent. Will be used as the "system prompt" when this agent is
-    invoked. Describes what the agent should do, and how it responds.
-
-    Can either be a string, or a function that dynamically generates instructions for the agent. If
-    you provide a function, it will be called with the context and the agent instance. It must
-    return a string.
-    """
-
-    handoff_description: str | None = None
-    """A description of the agent. This is used when the agent is used as a handoff, so that an
-    LLM knows what it does and when to invoke it.
-    """
-
-    handoffs: list[Agent[Any] | Handoff[TContext]] = field(default_factory=list)
-    """Handoffs are sub-agents that the agent can delegate to. You can provide a list of handoffs,
-    and the agent can choose to delegate to them if relevant. Allows for separation of concerns and
-    modularity.
-    """
-
-    model: str | Model | None = None
-    """The model implementation to use when invoking the LLM.
-
-    By default, if not set, the agent will use the default model configured in
-    `model_settings.DEFAULT_MODEL`.
-    """
-
-    model_settings: ModelSettings = field(default_factory=ModelSettings)
-    """Configures model-specific tuning parameters (e.g. temperature, top_p).
-    """
-
-    tools: list[Tool] = field(default_factory=list)
-    """A list of tools that the agent can use."""
-
-    input_guardrails: list[InputGuardrail[TContext]] = field(default_factory=list)
-    """A list of checks that run in parallel to the agent's execution, before generating a
-    response. Runs only if the agent is the first agent in the chain.
-    """
-
-    output_guardrails: list[OutputGuardrail[TContext]] = field(default_factory=list)
-    """A list of checks that run on the final output of the agent, after generating a response.
-    Runs only if the agent produces a final output.
-    """
-
-    output_type: type[Any] | None = None
-    """The type of the output object. If not provided, the output will be `str`."""
-
-    hooks: AgentHooks[TContext] | None = None
-    """A class that receives callbacks on various lifecycle events for this agent.
-    """
-
-    def clone(self, **kwargs: Any) -> Agent[TContext]:
-        """Make a copy of the agent, with the given arguments changed. For example, you could do:
-        ```
-        new_agent = agent.clone(instructions="New instructions")
-        ```
-        """
-        return dataclasses.replace(self, **kwargs)
-
-    def as_tool(
-        self,
-        tool_name: str | None,
-        tool_description: str | None,
-        custom_output_extractor: Callable[[RunResult], Awaitable[str]] | None = None,
-    ) -> Tool:
-        """Transform this agent into a tool, callable by other agents.
-
-        This is different from handoffs in two ways:
-        1. In handoffs, the new agent receives the conversation history. In this tool, the new agent
-           receives generated input.
-        2. In handoffs, the new agent takes over the conversation. In this tool, the new agent is
-           called as a tool, and the conversation is continued by the original agent.
-
-        Args:
-            tool_name: The name of the tool. If not provided, the agent's name will be used.
-            tool_description: The description of the tool, which should indicate what it does and
-                when to use it.
-            custom_output_extractor: A function that extracts the output from the agent. If not
-                provided, the last message from the agent will be used.
-        """
-
-        @function_tool(
-            name_override=tool_name or _utils.transform_string_function_style(self.name),
-            description_override=tool_description or "",
-        )
-        async def run_agent(context: RunContextWrapper, input: str) -> str:
-            from .run import Runner
-
-            output = await Runner.run(
-                starting_agent=self,
-                input=input,
-                context=context.context,
-            )
-            if custom_output_extractor:
-                return await custom_output_extractor(output)
-
-            return ItemHelpers.text_message_outputs(output.new_items)
-
-        return run_agent
-
-    async def get_system_prompt(self, run_context: RunContextWrapper[TContext]) -> str | None:
-        """Get the system prompt for the agent."""
-        if isinstance(self.instructions, str):
-            return self.instructions
-        elif callable(self.instructions):
-            if inspect.iscoroutinefunction(self.instructions):
-                return await cast(Awaitable[str], self.instructions(run_context, self))
-            else:
-                return cast(str, self.instructions(run_context, self))
-        elif self.instructions is not None:
-            logger.error(f"Instructions must be a string or a function, got {self.instructions}")
-
-        return None
diff --git a/tests/src/agents/agent_output.py b/tests/src/agents/agent_output.py
deleted file mode 100644
index 8140d8c6..00000000
--- a/tests/src/agents/agent_output.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from dataclasses import dataclass
-from typing import Any
-
-from pydantic import BaseModel, TypeAdapter
-from typing_extensions import TypedDict, get_args, get_origin
-
-from . import _utils
-from .exceptions import ModelBehaviorError, UserError
-from .strict_schema import ensure_strict_json_schema
-from .tracing import SpanError
-
-_WRAPPER_DICT_KEY = "response"
-
-
-@dataclass(init=False)
-class AgentOutputSchema:
-    """An object that captures the JSON schema of the output, as well as validating/parsing JSON
-    produced by the LLM into the output type.
-    """
-
-    output_type: type[Any]
-    """The type of the output."""
-
-    _type_adapter: TypeAdapter[Any]
-    """A type adapter that wraps the output type, so that we can validate JSON."""
-
-    _is_wrapped: bool
-    """Whether the output type is wrapped in a dictionary. This is generally done if the base
-    output type cannot be represented as a JSON Schema object.
-    """
-
-    _output_schema: dict[str, Any]
-    """The JSON schema of the output."""
-
-    strict_json_schema: bool
-    """Whether the JSON schema is in strict mode. We **strongly** recommend setting this to True,
-    as it increases the likelihood of correct JSON input.
-    """
-
-    def __init__(self, output_type: type[Any], strict_json_schema: bool = True):
-        """
-        Args:
-            output_type: The type of the output.
-            strict_json_schema: Whether the JSON schema is in strict mode. We **strongly** recommend
-                setting this to True, as it increases the likelihood of correct JSON input.
-        """
-        self.output_type = output_type
-        self.strict_json_schema = strict_json_schema
-
-        if output_type is None or output_type is str:
-            self._is_wrapped = False
-            self._type_adapter = TypeAdapter(output_type)
-            self._output_schema = self._type_adapter.json_schema()
-            return
-
-        # We should wrap for things that are not plain text, and for things that would definitely
-        # not be a JSON Schema object.
-        self._is_wrapped = not _is_subclass_of_base_model_or_dict(output_type)
-
-        if self._is_wrapped:
-            OutputType = TypedDict(
-                "OutputType",
-                {
-                    _WRAPPER_DICT_KEY: output_type,  # type: ignore
-                },
-            )
-            self._type_adapter = TypeAdapter(OutputType)
-            self._output_schema = self._type_adapter.json_schema()
-        else:
-            self._type_adapter = TypeAdapter(output_type)
-            self._output_schema = self._type_adapter.json_schema()
-
-        if self.strict_json_schema:
-            self._output_schema = ensure_strict_json_schema(self._output_schema)
-
-    def is_plain_text(self) -> bool:
-        """Whether the output type is plain text (versus a JSON object)."""
-        return self.output_type is None or self.output_type is str
-
-    def json_schema(self) -> dict[str, Any]:
-        """The JSON schema of the output type."""
-        if self.is_plain_text():
-            raise UserError("Output type is plain text, so no JSON schema is available")
-        return self._output_schema
-
-    def validate_json(self, json_str: str, partial: bool = False) -> Any:
-        """Validate a JSON string against the output type. Returns the validated object, or raises
-        a `ModelBehaviorError` if the JSON is invalid.
-        """
-        validated = _utils.validate_json(json_str, self._type_adapter, partial)
-        if self._is_wrapped:
-            if not isinstance(validated, dict):
-                _utils.attach_error_to_current_span(
-                    SpanError(
-                        message="Invalid JSON",
-                        data={"details": f"Expected a dict, got {type(validated)}"},
-                    )
-                )
-                raise ModelBehaviorError(
-                    f"Expected a dict, got {type(validated)} for JSON: {json_str}"
-                )
-
-            if _WRAPPER_DICT_KEY not in validated:
-                _utils.attach_error_to_current_span(
-                    SpanError(
-                        message="Invalid JSON",
-                        data={"details": f"Could not find key {_WRAPPER_DICT_KEY} in JSON"},
-                    )
-                )
-                raise ModelBehaviorError(
-                    f"Could not find key {_WRAPPER_DICT_KEY} in JSON: {json_str}"
-                )
-            return validated[_WRAPPER_DICT_KEY]
-        return validated
-
-    def output_type_name(self) -> str:
-        """The name of the output type."""
-        return _type_to_str(self.output_type)
-
-
-def _is_subclass_of_base_model_or_dict(t: Any) -> bool:
-    if not isinstance(t, type):
-        return False
-
-    # If it's a generic alias, 'origin' will be the actual type, e.g. 'list'
-    origin = get_origin(t)
-
-    allowed_types = (BaseModel, dict)
-    # If it's a generic alias e.g. list[str], then we should check the origin type i.e. list
-    return issubclass(origin or t, allowed_types)
-
-
-def _type_to_str(t: type[Any]) -> str:
-    origin = get_origin(t)
-    args = get_args(t)
-
-    if origin is None:
-        # It's a simple type like `str`, `int`, etc.
-        return t.__name__
-    elif args:
-        args_str = ', '.join(_type_to_str(arg) for arg in args)
-        return f"{origin.__name__}[{args_str}]"
-    else:
-        return str(t)
diff --git a/tests/src/agents/computer.py b/tests/src/agents/computer.py
deleted file mode 100644
index 1b9224d5..00000000
--- a/tests/src/agents/computer.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import abc
-from typing import Literal
-
-Environment = Literal["mac", "windows", "ubuntu", "browser"]
-Button = Literal["left", "right", "wheel", "back", "forward"]
-
-
-class Computer(abc.ABC):
-    """A computer implemented with sync operations. The Computer interface abstracts the
-    operations needed to control a computer or browser."""
-
-    @property
-    @abc.abstractmethod
-    def environment(self) -> Environment:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def dimensions(self) -> tuple[int, int]:
-        pass
-
-    @abc.abstractmethod
-    def screenshot(self) -> str:
-        pass
-
-    @abc.abstractmethod
-    def click(self, x: int, y: int, button: Button) -> None:
-        pass
-
-    @abc.abstractmethod
-    def double_click(self, x: int, y: int) -> None:
-        pass
-
-    @abc.abstractmethod
-    def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
-        pass
-
-    @abc.abstractmethod
-    def type(self, text: str) -> None:
-        pass
-
-    @abc.abstractmethod
-    def wait(self) -> None:
-        pass
-
-    @abc.abstractmethod
-    def move(self, x: int, y: int) -> None:
-        pass
-
-    @abc.abstractmethod
-    def keypress(self, keys: list[str]) -> None:
-        pass
-
-    @abc.abstractmethod
-    def drag(self, path: list[tuple[int, int]]) -> None:
-        pass
-
-
-class AsyncComputer(abc.ABC):
-    """A computer implemented with async operations. The Computer interface abstracts the
-    operations needed to control a computer or browser."""
-
-    @property
-    @abc.abstractmethod
-    def environment(self) -> Environment:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def dimensions(self) -> tuple[int, int]:
-        pass
-
-    @abc.abstractmethod
-    async def screenshot(self) -> str:
-        pass
-
-    @abc.abstractmethod
-    async def click(self, x: int, y: int, button: Button) -> None:
-        pass
-
-    @abc.abstractmethod
-    async def double_click(self, x: int, y: int) -> None:
-        pass
-
-    @abc.abstractmethod
-    async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
-        pass
-
-    @abc.abstractmethod
-    async def type(self, text: str) -> None:
-        pass
-
-    @abc.abstractmethod
-    async def wait(self) -> None:
-        pass
-
-    @abc.abstractmethod
-    async def move(self, x: int, y: int) -> None:
-        pass
-
-    @abc.abstractmethod
-    async def keypress(self, keys: list[str]) -> None:
-        pass
-
-    @abc.abstractmethod
-    async def drag(self, path: list[tuple[int, int]]) -> None:
-        pass
diff --git a/tests/src/agents/exceptions.py b/tests/src/agents/exceptions.py
deleted file mode 100644
index 78898f01..00000000
--- a/tests/src/agents/exceptions.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from .guardrail import InputGuardrailResult, OutputGuardrailResult
-
-
-class AgentsException(Exception):
-    """Base class for all exceptions in the Agents SDK."""
-
-
-class MaxTurnsExceeded(AgentsException):
-    """Exception raised when the maximum number of turns is exceeded."""
-
-    message: str
-
-    def __init__(self, message: str):
-        self.message = message
-
-
-class ModelBehaviorError(AgentsException):
-    """Exception raised when the model does something unexpected, e.g. calling a tool that doesn't
-    exist, or providing malformed JSON.
-    """
-
-    message: str
-
-    def __init__(self, message: str):
-        self.message = message
-
-
-class UserError(AgentsException):
-    """Exception raised when the user makes an error using the SDK."""
-
-    message: str
-
-    def __init__(self, message: str):
-        self.message = message
-
-
-class InputGuardrailTripwireTriggered(AgentsException):
-    """Exception raised when a guardrail tripwire is triggered."""
-
-    guardrail_result: "InputGuardrailResult"
-    """The result data of the guardrail that was triggered."""
-
-    def __init__(self, guardrail_result: "InputGuardrailResult"):
-        self.guardrail_result = guardrail_result
-        super().__init__(
-            f"Guardrail {guardrail_result.guardrail.__class__.__name__} triggered tripwire"
-        )
-
-
-class OutputGuardrailTripwireTriggered(AgentsException):
-    """Exception raised when a guardrail tripwire is triggered."""
-
-    guardrail_result: "OutputGuardrailResult"
-    """The result data of the guardrail that was triggered."""
-
-    def __init__(self, guardrail_result: "OutputGuardrailResult"):
-        self.guardrail_result = guardrail_result
-        super().__init__(
-            f"Guardrail {guardrail_result.guardrail.__class__.__name__} triggered tripwire"
-        )
diff --git a/tests/src/agents/extensions/__init__.py b/tests/src/agents/extensions/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/src/agents/extensions/handoff_filters.py b/tests/src/agents/extensions/handoff_filters.py
deleted file mode 100644
index f4f9b8bf..00000000
--- a/tests/src/agents/extensions/handoff_filters.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import annotations
-
-from ..handoffs import HandoffInputData
-from ..items import (
-    HandoffCallItem,
-    HandoffOutputItem,
-    RunItem,
-    ToolCallItem,
-    ToolCallOutputItem,
-    TResponseInputItem,
-)
-
-"""Contains common handoff input filters, for convenience. """
-
-
-def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData:
-    """Filters out all tool items: file search, web search and function calls+output."""
-
-    history = handoff_input_data.input_history
-    new_items = handoff_input_data.new_items
-
-    filtered_history = (
-        _remove_tool_types_from_input(history) if isinstance(history, tuple) else history
-    )
-    filtered_pre_handoff_items = _remove_tools_from_items(handoff_input_data.pre_handoff_items)
-    filtered_new_items = _remove_tools_from_items(new_items)
-
-    return HandoffInputData(
-        input_history=filtered_history,
-        pre_handoff_items=filtered_pre_handoff_items,
-        new_items=filtered_new_items,
-    )
-
-
-def _remove_tools_from_items(items: tuple[RunItem, ...]) -> tuple[RunItem, ...]:
-    filtered_items = []
-    for item in items:
-        if (
-            isinstance(item, HandoffCallItem)
-            or isinstance(item, HandoffOutputItem)
-            or isinstance(item, ToolCallItem)
-            or isinstance(item, ToolCallOutputItem)
-        ):
-            continue
-        filtered_items.append(item)
-    return tuple(filtered_items)
-
-
-def _remove_tool_types_from_input(
-    items: tuple[TResponseInputItem, ...],
-) -> tuple[TResponseInputItem, ...]:
-    tool_types = [
-        "function_call",
-        "function_call_output",
-        "computer_call",
-        "computer_call_output",
-        "file_search_call",
-        "web_search_call",
-    ]
-
-    filtered_items: list[TResponseInputItem] = []
-    for item in items:
-        itype = item.get("type")
-        if itype in tool_types:
-            continue
-        filtered_items.append(item)
-    return tuple(filtered_items)
diff --git a/tests/src/agents/extensions/handoff_prompt.py b/tests/src/agents/extensions/handoff_prompt.py
deleted file mode 100644
index cfb5ca7e..00000000
--- a/tests/src/agents/extensions/handoff_prompt.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# A recommended prompt prefix for agents that use handoffs. We recommend including this or
-# similar instructions in any agents that use handoffs.
-RECOMMENDED_PROMPT_PREFIX = (
-    "# System context\n"
-    "You are part of a multi-agent system called the Agents SDK, designed to make agent "
-    "coordination and execution easy. Agents uses two primary abstraction: **Agents** and "
-    "**Handoffs**. An agent encompasses instructions and tools and can hand off a "
-    "conversation to another agent when appropriate. "
-    "Handoffs are achieved by calling a handoff function, generally named "
-    "`transfer_to_<agent_name>`. Transfers between agents are handled seamlessly in the background;"
-    " do not mention or draw attention to these transfers in your conversation with the user.\n"
-)
-
-
-def prompt_with_handoff_instructions(prompt: str) -> str:
-    """
-    Add recommended instructions to the prompt for agents that use handoffs.
-    """
-    return f"{RECOMMENDED_PROMPT_PREFIX}\n\n{prompt}"
diff --git a/tests/src/agents/function_schema.py b/tests/src/agents/function_schema.py
deleted file mode 100644
index a4b57672..00000000
--- a/tests/src/agents/function_schema.py
+++ /dev/null
@@ -1,340 +0,0 @@
-from __future__ import annotations
-
-import contextlib
-import inspect
-import logging
-import re
-from dataclasses import dataclass
-from typing import Any, Callable, Literal, get_args, get_origin, get_type_hints
-
-from griffe import Docstring, DocstringSectionKind
-from pydantic import BaseModel, Field, create_model
-
-from .exceptions import UserError
-from .run_context import RunContextWrapper
-from .strict_schema import ensure_strict_json_schema
-
-
-@dataclass
-class FuncSchema:
-    """
-    Captures the schema for a python function, in preparation for sending it to an LLM as a tool.
-    """
-
-    name: str
-    """The name of the function."""
-    description: str | None
-    """The description of the function."""
-    params_pydantic_model: type[BaseModel]
-    """A Pydantic model that represents the function's parameters."""
-    params_json_schema: dict[str, Any]
-    """The JSON schema for the function's parameters, derived from the Pydantic model."""
-    signature: inspect.Signature
-    """The signature of the function."""
-    takes_context: bool = False
-    """Whether the function takes a RunContextWrapper argument (must be the first argument)."""
-
-    def to_call_args(self, data: BaseModel) -> tuple[list[Any], dict[str, Any]]:
-        """
-        Converts validated data from the Pydantic model into (args, kwargs), suitable for calling
-        the original function.
-        """
-        positional_args: list[Any] = []
-        keyword_args: dict[str, Any] = {}
-        seen_var_positional = False
-
-        # Use enumerate() so we can skip the first parameter if it's context.
-        for idx, (name, param) in enumerate(self.signature.parameters.items()):
-            # If the function takes a RunContextWrapper and this is the first parameter, skip it.
-            if self.takes_context and idx == 0:
-                continue
-
-            value = getattr(data, name, None)
-            if param.kind == param.VAR_POSITIONAL:
-                # e.g. *args: extend positional args and mark that *args is now seen
-                positional_args.extend(value or [])
-                seen_var_positional = True
-            elif param.kind == param.VAR_KEYWORD:
-                # e.g. **kwargs handling
-                keyword_args.update(value or {})
-            elif param.kind in (param.POSITIONAL_ONLY, param.POSITIONAL_OR_KEYWORD):
-                # Before *args, add to positional args. After *args, add to keyword args.
-                if not seen_var_positional:
-                    positional_args.append(value)
-                else:
-                    keyword_args[name] = value
-            else:
-                # For KEYWORD_ONLY parameters, always use keyword args.
-                keyword_args[name] = value
-        return positional_args, keyword_args
-
-
-@dataclass
-class FuncDocumentation:
-    """Contains metadata about a python function, extracted from its docstring."""
-
-    name: str
-    """The name of the function, via `__name__`."""
-    description: str | None
-    """The description of the function, derived from the docstring."""
-    param_descriptions: dict[str, str] | None
-    """The parameter descriptions of the function, derived from the docstring."""
-
-
-DocstringStyle = Literal["google", "numpy", "sphinx"]
-
-
-# As of Feb 2025, the automatic style detection in griffe is an Insiders feature. This
-# code approximates it.
-def _detect_docstring_style(doc: str) -> DocstringStyle:
-    scores: dict[DocstringStyle, int] = {"sphinx": 0, "numpy": 0, "google": 0}
-
-    # Sphinx style detection: look for :param, :type, :return:, and :rtype:
-    sphinx_patterns = [r"^:param\s", r"^:type\s", r"^:return:", r"^:rtype:"]
-    for pattern in sphinx_patterns:
-        if re.search(pattern, doc, re.MULTILINE):
-            scores["sphinx"] += 1
-
-    # Numpy style detection: look for headers like 'Parameters', 'Returns', or 'Yields' followed by
-    # a dashed underline
-    numpy_patterns = [
-        r"^Parameters\s*\n\s*-{3,}",
-        r"^Returns\s*\n\s*-{3,}",
-        r"^Yields\s*\n\s*-{3,}",
-    ]
-    for pattern in numpy_patterns:
-        if re.search(pattern, doc, re.MULTILINE):
-            scores["numpy"] += 1
-
-    # Google style detection: look for section headers with a trailing colon
-    google_patterns = [r"^(Args|Arguments):", r"^(Returns):", r"^(Raises):"]
-    for pattern in google_patterns:
-        if re.search(pattern, doc, re.MULTILINE):
-            scores["google"] += 1
-
-    max_score = max(scores.values())
-    if max_score == 0:
-        return "google"
-
-    # Priority order: sphinx > numpy > google in case of tie
-    styles: list[DocstringStyle] = ["sphinx", "numpy", "google"]
-
-    for style in styles:
-        if scores[style] == max_score:
-            return style
-
-    return "google"
-
-
-@contextlib.contextmanager
-def _suppress_griffe_logging():
-    # Supresses warnings about missing annotations for params
-    logger = logging.getLogger("griffe")
-    previous_level = logger.getEffectiveLevel()
-    logger.setLevel(logging.ERROR)
-    try:
-        yield
-    finally:
-        logger.setLevel(previous_level)
-
-
-def generate_func_documentation(
-    func: Callable[..., Any], style: DocstringStyle | None = None
-) -> FuncDocumentation:
-    """
-    Extracts metadata from a function docstring, in preparation for sending it to an LLM as a tool.
-
-    Args:
-        func: The function to extract documentation from.
-        style: The style of the docstring to use for parsing. If not provided, we will attempt to
-            auto-detect the style.
-
-    Returns:
-        A FuncDocumentation object containing the function's name, description, and parameter
-        descriptions.
-    """
-    name = func.__name__
-    doc = inspect.getdoc(func)
-    if not doc:
-        return FuncDocumentation(name=name, description=None, param_descriptions=None)
-
-    with _suppress_griffe_logging():
-        docstring = Docstring(doc, lineno=1, parser=style or _detect_docstring_style(doc))
-        parsed = docstring.parse()
-
-    description: str | None = next(
-        (section.value for section in parsed if section.kind == DocstringSectionKind.text), None
-    )
-
-    param_descriptions: dict[str, str] = {
-        param.name: param.description
-        for section in parsed
-        if section.kind == DocstringSectionKind.parameters
-        for param in section.value
-    }
-
-    return FuncDocumentation(
-        name=func.__name__,
-        description=description,
-        param_descriptions=param_descriptions or None,
-    )
-
-
-def function_schema(
-    func: Callable[..., Any],
-    docstring_style: DocstringStyle | None = None,
-    name_override: str | None = None,
-    description_override: str | None = None,
-    use_docstring_info: bool = True,
-    strict_json_schema: bool = True,
-) -> FuncSchema:
-    """
-    Given a python function, extracts a `FuncSchema` from it, capturing the name, description,
-    parameter descriptions, and other metadata.
-
-    Args:
-        func: The function to extract the schema from.
-        docstring_style: The style of the docstring to use for parsing. If not provided, we will
-            attempt to auto-detect the style.
-        name_override: If provided, use this name instead of the function's `__name__`.
-        description_override: If provided, use this description instead of the one derived from the
-            docstring.
-        use_docstring_info: If True, uses the docstring to generate the description and parameter
-            descriptions.
-        strict_json_schema: Whether the JSON schema is in strict mode. If True, we'll ensure that
-            the schema adheres to the "strict" standard the OpenAI API expects. We **strongly**
-            recommend setting this to True, as it increases the likelihood of the LLM providing
-            correct JSON input.
-
-    Returns:
-        A `FuncSchema` object containing the function's name, description, parameter descriptions,
-        and other metadata.
-    """
-
-    # 1. Grab docstring info
-    if use_docstring_info:
-        doc_info = generate_func_documentation(func, docstring_style)
-        param_descs = doc_info.param_descriptions or {}
-    else:
-        doc_info = None
-        param_descs = {}
-
-    func_name = name_override or doc_info.name if doc_info else func.__name__
-
-    # 2. Inspect function signature and get type hints
-    sig = inspect.signature(func)
-    type_hints = get_type_hints(func)
-    params = list(sig.parameters.items())
-    takes_context = False
-    filtered_params = []
-
-    if params:
-        first_name, first_param = params[0]
-        # Prefer the evaluated type hint if available
-        ann = type_hints.get(first_name, first_param.annotation)
-        if ann != inspect._empty:
-            origin = get_origin(ann) or ann
-            if origin is RunContextWrapper:
-                takes_context = True  # Mark that the function takes context
-            else:
-                filtered_params.append((first_name, first_param))
-        else:
-            filtered_params.append((first_name, first_param))
-
-    # For parameters other than the first, raise error if any use RunContextWrapper.
-    for name, param in params[1:]:
-        ann = type_hints.get(name, param.annotation)
-        if ann != inspect._empty:
-            origin = get_origin(ann) or ann
-            if origin is RunContextWrapper:
-                raise UserError(
-                    f"RunContextWrapper param found at non-first position in function"
-                    f" {func.__name__}"
-                )
-        filtered_params.append((name, param))
-
-    # We will collect field definitions for create_model as a dict:
-    #   field_name -> (type_annotation, default_value_or_Field(...))
-    fields: dict[str, Any] = {}
-
-    for name, param in filtered_params:
-        ann = type_hints.get(name, param.annotation)
-        default = param.default
-
-        # If there's no type hint, assume `Any`
-        if ann == inspect._empty:
-            ann = Any
-
-        # If a docstring param description exists, use it
-        field_description = param_descs.get(name, None)
-
-        # Handle different parameter kinds
-        if param.kind == param.VAR_POSITIONAL:
-            # e.g. *args: extend positional args
-            if get_origin(ann) is tuple:
-                # e.g. def foo(*args: tuple[int, ...]) -> treat as List[int]
-                args_of_tuple = get_args(ann)
-                if len(args_of_tuple) == 2 and args_of_tuple[1] is Ellipsis:
-                    ann = list[args_of_tuple[0]]  # type: ignore
-                else:
-                    ann = list[Any]
-            else:
-                # If user wrote *args: int, treat as List[int]
-                ann = list[ann]  # type: ignore
-
-            # Default factory to empty list
-            fields[name] = (
-                ann,
-                Field(default_factory=list, description=field_description),  # type: ignore
-            )
-
-        elif param.kind == param.VAR_KEYWORD:
-            # **kwargs handling
-            if get_origin(ann) is dict:
-                # e.g. def foo(**kwargs: dict[str, int])
-                dict_args = get_args(ann)
-                if len(dict_args) == 2:
-                    ann = dict[dict_args[0], dict_args[1]]  # type: ignore
-                else:
-                    ann = dict[str, Any]
-            else:
-                # e.g. def foo(**kwargs: int) -> Dict[str, int]
-                ann = dict[str, ann]  # type: ignore
-
-            fields[name] = (
-                ann,
-                Field(default_factory=dict, description=field_description),  # type: ignore
-            )
-
-        else:
-            # Normal parameter
-            if default == inspect._empty:
-                # Required field
-                fields[name] = (
-                    ann,
-                    Field(..., description=field_description),
-                )
-            else:
-                # Parameter with a default value
-                fields[name] = (
-                    ann,
-                    Field(default=default, description=field_description),
-                )
-
-    # 3. Dynamically build a Pydantic model
-    dynamic_model = create_model(f"{func_name}_args", __base__=BaseModel, **fields)
-
-    # 4. Build JSON schema from that model
-    json_schema = dynamic_model.model_json_schema()
-    if strict_json_schema:
-        json_schema = ensure_strict_json_schema(json_schema)
-
-    # 5. Return as a FuncSchema dataclass
-    return FuncSchema(
-        name=func_name,
-        description=description_override or doc_info.description if doc_info else None,
-        params_pydantic_model=dynamic_model,
-        params_json_schema=json_schema,
-        signature=sig,
-        takes_context=takes_context,
-    )
diff --git a/tests/src/agents/guardrail.py b/tests/src/agents/guardrail.py
deleted file mode 100644
index fcae0b8a..00000000
--- a/tests/src/agents/guardrail.py
+++ /dev/null
@@ -1,320 +0,0 @@
-from __future__ import annotations
-
-import inspect
-from collections.abc import Awaitable
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Callable, Generic, Union, overload
-
-from typing_extensions import TypeVar
-
-from ._utils import MaybeAwaitable
-from .exceptions import UserError
-from .items import TResponseInputItem
-from .run_context import RunContextWrapper, TContext
-
-if TYPE_CHECKING:
-    from .agent import Agent
-
-
-@dataclass
-class GuardrailFunctionOutput:
-    """The output of a guardrail function."""
-
-    output_info: Any
-    """
-    Optional information about the guardrail's output. For example, the guardrail could include
-    information about the checks it performed and granular results.
-    """
-
-    tripwire_triggered: bool
-    """
-    Whether the tripwire was triggered. If triggered, the agent's execution will be halted.
-    """
-
-
-@dataclass
-class InputGuardrailResult:
-    """The result of a guardrail run."""
-
-    guardrail: InputGuardrail[Any]
-    """
-    The guardrail that was run.
-    """
-
-    output: GuardrailFunctionOutput
-    """The output of the guardrail function."""
-
-
-@dataclass
-class OutputGuardrailResult:
-    """The result of a guardrail run."""
-
-    guardrail: OutputGuardrail[Any]
-    """
-    The guardrail that was run.
-    """
-
-    agent_output: Any
-    """
-    The output of the agent that was checked by the guardrail.
-    """
-
-    agent: Agent[Any]
-    """
-    The agent that was checked by the guardrail.
-    """
-
-    output: GuardrailFunctionOutput
-    """The output of the guardrail function."""
-
-
-@dataclass
-class InputGuardrail(Generic[TContext]):
-    """Input guardrails are checks that run in parallel to the agent's execution.
-    They can be used to do things like:
-    - Check if input messages are off-topic
-    - Take over control of the agent's execution if an unexpected input is detected
-
-    You can use the `@input_guardrail()` decorator to turn a function into an `InputGuardrail`, or
-    create an `InputGuardrail` manually.
-
-    Guardrails return a `GuardrailResult`. If `result.tripwire_triggered` is `True`, the agent
-    execution will immediately stop and a `InputGuardrailTripwireTriggered` exception will be raised
-    """
-
-    guardrail_function: Callable[
-        [RunContextWrapper[TContext], Agent[Any], str | list[TResponseInputItem]],
-        MaybeAwaitable[GuardrailFunctionOutput],
-    ]
-    """A function that receives the the agent input and the context, and returns a
-     `GuardrailResult`. The result marks whether the tripwire was triggered, and can optionally
-     include information about the guardrail's output.
-    """
-
-    name: str | None = None
-    """The name of the guardrail, used for tracing. If not provided, we'll use the guardrail
-    function's name.
-    """
-
-    def get_name(self) -> str:
-        if self.name:
-            return self.name
-
-        return self.guardrail_function.__name__
-
-    async def run(
-        self,
-        agent: Agent[Any],
-        input: str | list[TResponseInputItem],
-        context: RunContextWrapper[TContext],
-    ) -> InputGuardrailResult:
-        if not callable(self.guardrail_function):
-            raise UserError(f"Guardrail function must be callable, got {self.guardrail_function}")
-
-        output = self.guardrail_function(context, agent, input)
-        if inspect.isawaitable(output):
-            return InputGuardrailResult(
-                guardrail=self,
-                output=await output,
-            )
-
-        return InputGuardrailResult(
-            guardrail=self,
-            output=output,
-        )
-
-
-@dataclass
-class OutputGuardrail(Generic[TContext]):
-    """Output guardrails are checks that run on the final output of an agent.
-    They can be used to do check if the output passes certain validation criteria
-
-    You can use the `@output_guardrail()` decorator to turn a function into an `OutputGuardrail`,
-    or create an `OutputGuardrail` manually.
-
-    Guardrails return a `GuardrailResult`. If `result.tripwire_triggered` is `True`, a
-    `OutputGuardrailTripwireTriggered` exception will be raised.
-    """
-
-    guardrail_function: Callable[
-        [RunContextWrapper[TContext], Agent[Any], Any],
-        MaybeAwaitable[GuardrailFunctionOutput],
-    ]
-    """A function that receives the final agent, its output, and the context, and returns a
-     `GuardrailResult`. The result marks whether the tripwire was triggered, and can optionally
-     include information about the guardrail's output.
-    """
-
-    name: str | None = None
-    """The name of the guardrail, used for tracing. If not provided, we'll use the guardrail
-    function's name.
-    """
-
-    def get_name(self) -> str:
-        if self.name:
-            return self.name
-
-        return self.guardrail_function.__name__
-
-    async def run(
-        self, context: RunContextWrapper[TContext], agent: Agent[Any], agent_output: Any
-    ) -> OutputGuardrailResult:
-        if not callable(self.guardrail_function):
-            raise UserError(f"Guardrail function must be callable, got {self.guardrail_function}")
-
-        output = self.guardrail_function(context, agent, agent_output)
-        if inspect.isawaitable(output):
-            return OutputGuardrailResult(
-                guardrail=self,
-                agent=agent,
-                agent_output=agent_output,
-                output=await output,
-            )
-
-        return OutputGuardrailResult(
-            guardrail=self,
-            agent=agent,
-            agent_output=agent_output,
-            output=output,
-        )
-
-
-TContext_co = TypeVar("TContext_co", bound=Any, covariant=True)
-
-# For InputGuardrail
-_InputGuardrailFuncSync = Callable[
-    [RunContextWrapper[TContext_co], "Agent[Any]", Union[str, list[TResponseInputItem]]],
-    GuardrailFunctionOutput,
-]
-_InputGuardrailFuncAsync = Callable[
-    [RunContextWrapper[TContext_co], "Agent[Any]", Union[str, list[TResponseInputItem]]],
-    Awaitable[GuardrailFunctionOutput],
-]
-
-
-@overload
-def input_guardrail(
-    func: _InputGuardrailFuncSync[TContext_co],
-) -> InputGuardrail[TContext_co]: ...
-
-
-@overload
-def input_guardrail(
-    func: _InputGuardrailFuncAsync[TContext_co],
-) -> InputGuardrail[TContext_co]: ...
-
-
-@overload
-def input_guardrail(
-    *,
-    name: str | None = None,
-) -> Callable[
-    [_InputGuardrailFuncSync[TContext_co] | _InputGuardrailFuncAsync[TContext_co]],
-    InputGuardrail[TContext_co],
-]: ...
-
-
-def input_guardrail(
-    func: _InputGuardrailFuncSync[TContext_co]
-    | _InputGuardrailFuncAsync[TContext_co]
-    | None = None,
-    *,
-    name: str | None = None,
-) -> (
-    InputGuardrail[TContext_co]
-    | Callable[
-        [_InputGuardrailFuncSync[TContext_co] | _InputGuardrailFuncAsync[TContext_co]],
-        InputGuardrail[TContext_co],
-    ]
-):
-    """
-    Decorator that transforms a sync or async function into an `InputGuardrail`.
-    It can be used directly (no parentheses) or with keyword args, e.g.:
-
-        @input_guardrail
-        def my_sync_guardrail(...): ...
-
-        @input_guardrail(name="guardrail_name")
-        async def my_async_guardrail(...): ...
-    """
-
-    def decorator(
-        f: _InputGuardrailFuncSync[TContext_co] | _InputGuardrailFuncAsync[TContext_co],
-    ) -> InputGuardrail[TContext_co]:
-        return InputGuardrail(guardrail_function=f, name=name)
-
-    if func is not None:
-        # Decorator was used without parentheses
-        return decorator(func)
-
-    # Decorator used with keyword arguments
-    return decorator
-
-
-_OutputGuardrailFuncSync = Callable[
-    [RunContextWrapper[TContext_co], "Agent[Any]", Any],
-    GuardrailFunctionOutput,
-]
-_OutputGuardrailFuncAsync = Callable[
-    [RunContextWrapper[TContext_co], "Agent[Any]", Any],
-    Awaitable[GuardrailFunctionOutput],
-]
-
-
-@overload
-def output_guardrail(
-    func: _OutputGuardrailFuncSync[TContext_co],
-) -> OutputGuardrail[TContext_co]: ...
-
-
-@overload
-def output_guardrail(
-    func: _OutputGuardrailFuncAsync[TContext_co],
-) -> OutputGuardrail[TContext_co]: ...
-
-
-@overload
-def output_guardrail(
-    *,
-    name: str | None = None,
-) -> Callable[
-    [_OutputGuardrailFuncSync[TContext_co] | _OutputGuardrailFuncAsync[TContext_co]],
-    OutputGuardrail[TContext_co],
-]: ...
-
-
-def output_guardrail(
-    func: _OutputGuardrailFuncSync[TContext_co]
-    | _OutputGuardrailFuncAsync[TContext_co]
-    | None = None,
-    *,
-    name: str | None = None,
-) -> (
-    OutputGuardrail[TContext_co]
-    | Callable[
-        [_OutputGuardrailFuncSync[TContext_co] | _OutputGuardrailFuncAsync[TContext_co]],
-        OutputGuardrail[TContext_co],
-    ]
-):
-    """
-    Decorator that transforms a sync or async function into an `OutputGuardrail`.
-    It can be used directly (no parentheses) or with keyword args, e.g.:
-
-        @output_guardrail
-        def my_sync_guardrail(...): ...
-
-        @output_guardrail(name="guardrail_name")
-        async def my_async_guardrail(...): ...
-    """
-
-    def decorator(
-        f: _OutputGuardrailFuncSync[TContext_co] | _OutputGuardrailFuncAsync[TContext_co],
-    ) -> OutputGuardrail[TContext_co]:
-        return OutputGuardrail(guardrail_function=f, name=name)
-
-    if func is not None:
-        # Decorator was used without parentheses
-        return decorator(func)
-
-    # Decorator used with keyword arguments
-    return decorator
diff --git a/tests/src/agents/handoffs.py b/tests/src/agents/handoffs.py
deleted file mode 100644
index ac157401..00000000
--- a/tests/src/agents/handoffs.py
+++ /dev/null
@@ -1,236 +0,0 @@
-from __future__ import annotations
-
-import inspect
-from collections.abc import Awaitable
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Callable, Generic, cast, overload
-
-from pydantic import TypeAdapter
-from typing_extensions import TypeAlias, TypeVar
-
-from . import _utils
-from .exceptions import ModelBehaviorError, UserError
-from .items import RunItem, TResponseInputItem
-from .run_context import RunContextWrapper, TContext
-from .strict_schema import ensure_strict_json_schema
-from .tracing.spans import SpanError
-
-if TYPE_CHECKING:
-    from .agent import Agent
-
-
-# The handoff input type is the type of data passed when the agent is called via a handoff.
-THandoffInput = TypeVar("THandoffInput", default=Any)
-
-OnHandoffWithInput = Callable[[RunContextWrapper[Any], THandoffInput], Any]
-OnHandoffWithoutInput = Callable[[RunContextWrapper[Any]], Any]
-
-
-@dataclass(frozen=True)
-class HandoffInputData:
-    input_history: str | tuple[TResponseInputItem, ...]
-    """
-    The input history before `Runner.run()` was called.
-    """
-
-    pre_handoff_items: tuple[RunItem, ...]
-    """
-    The items generated before the agent turn where the handoff was invoked.
-    """
-
-    new_items: tuple[RunItem, ...]
-    """
-    The new items generated during the current agent turn, including the item that triggered the
-    handoff and the tool output message representing the response from the handoff output.
-    """
-
-
-HandoffInputFilter: TypeAlias = Callable[[HandoffInputData], HandoffInputData]
-"""A function that filters the input data passed to the next agent."""
-
-
-@dataclass
-class Handoff(Generic[TContext]):
-    """A handoff is when an agent delegates a task to another agent.
-    For example, in a customer support scenario you might have a "triage agent" that determines
-    which agent should handle the user's request, and sub-agents that specialize in different
-    areas like billing, account management, etc.
-    """
-
-    tool_name: str
-    """The name of the tool that represents the handoff."""
-
-    tool_description: str
-    """The description of the tool that represents the handoff."""
-
-    input_json_schema: dict[str, Any]
-    """The JSON schema for the handoff input. Can be empty if the handoff does not take an input.
-    """
-
-    on_invoke_handoff: Callable[[RunContextWrapper[Any], str], Awaitable[Agent[TContext]]]
-    """The function that invokes the handoff. The parameters passed are:
-    1. The handoff run context
-    2. The arguments from the LLM, as a JSON string. Empty string if input_json_schema is empty.
-
-    Must return an agent.
-    """
-
-    agent_name: str
-    """The name of the agent that is being handed off to."""
-
-    input_filter: HandoffInputFilter | None = None
-    """A function that filters the inputs that are passed to the next agent. By default, the new
-    agent sees the entire conversation history. In some cases, you may want to filter inputs e.g.
-    to remove older inputs, or remove tools from existing inputs.
-
-    The function will receive the entire conversation history so far, including the input item
-    that triggered the handoff and a tool call output item representing the handoff tool's output.
-
-    You are free to modify the input history or new items as you see fit. The next agent that
-    runs will receive `handoff_input_data.all_items`.
-
-    IMPORTANT: in streaming mode, we will not stream anything as a result of this function. The
-    items generated before will already have been streamed.
-    """
-
-    strict_json_schema: bool = True
-    """Whether the input JSON schema is in strict mode. We **strongly** recommend setting this to
-    True, as it increases the likelihood of correct JSON input.
-    """
-
-    def get_transfer_message(self, agent: Agent[Any]) -> str:
-        base = f"{{'assistant': '{agent.name}'}}"
-        return base
-
-    @classmethod
-    def default_tool_name(cls, agent: Agent[Any]) -> str:
-        return _utils.transform_string_function_style(f"transfer_to_{agent.name}")
-
-    @classmethod
-    def default_tool_description(cls, agent: Agent[Any]) -> str:
-        return (
-            f"Handoff to the {agent.name} agent to handle the request. "
-            f"{agent.handoff_description or ''}"
-        )
-
-
-@overload
-def handoff(
-    agent: Agent[TContext],
-    *,
-    tool_name_override: str | None = None,
-    tool_description_override: str | None = None,
-    input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None,
-) -> Handoff[TContext]: ...
-
-
-@overload
-def handoff(
-    agent: Agent[TContext],
-    *,
-    on_handoff: OnHandoffWithInput[THandoffInput],
-    input_type: type[THandoffInput],
-    tool_description_override: str | None = None,
-    tool_name_override: str | None = None,
-    input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None,
-) -> Handoff[TContext]: ...
-
-
-@overload
-def handoff(
-    agent: Agent[TContext],
-    *,
-    on_handoff: OnHandoffWithoutInput,
-    tool_description_override: str | None = None,
-    tool_name_override: str | None = None,
-    input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None,
-) -> Handoff[TContext]: ...
-
-
-def handoff(
-    agent: Agent[TContext],
-    tool_name_override: str | None = None,
-    tool_description_override: str | None = None,
-    on_handoff: OnHandoffWithInput[THandoffInput] | OnHandoffWithoutInput | None = None,
-    input_type: type[THandoffInput] | None = None,
-    input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None,
-) -> Handoff[TContext]:
-    """Create a handoff from an agent.
-
-    Args:
-        agent: The agent to handoff to, or a function that returns an agent.
-        tool_name_override: Optional override for the name of the tool that represents the handoff.
-        tool_description_override: Optional override for the description of the tool that
-            represents the handoff.
-        on_handoff: A function that runs when the handoff is invoked.
-        input_type: the type of the input to the handoff. If provided, the input will be validated
-            against this type. Only relevant if you pass a function that takes an input.
-        input_filter: a function that filters the inputs that are passed to the next agent.
-    """
-    assert (on_handoff and input_type) or not (on_handoff and input_type), (
-        "You must provide either both on_input and input_type, or neither"
-    )
-    type_adapter: TypeAdapter[Any] | None
-    if input_type is not None:
-        assert callable(on_handoff), "on_handoff must be callable"
-        sig = inspect.signature(on_handoff)
-        if len(sig.parameters) != 2:
-            raise UserError("on_handoff must take two arguments: context and input")
-
-        type_adapter = TypeAdapter(input_type)
-        input_json_schema = type_adapter.json_schema()
-    else:
-        type_adapter = None
-        input_json_schema = {}
-        if on_handoff is not None:
-            sig = inspect.signature(on_handoff)
-            if len(sig.parameters) != 1:
-                raise UserError("on_handoff must take one argument: context")
-
-    async def _invoke_handoff(
-        ctx: RunContextWrapper[Any], input_json: str | None = None
-    ) -> Agent[Any]:
-        if input_type is not None and type_adapter is not None:
-            if input_json is None:
-                _utils.attach_error_to_current_span(
-                    SpanError(
-                        message="Handoff function expected non-null input, but got None",
-                        data={"details": "input_json is None"},
-                    )
-                )
-                raise ModelBehaviorError("Handoff function expected non-null input, but got None")
-
-            validated_input = _utils.validate_json(
-                json_str=input_json,
-                type_adapter=type_adapter,
-                partial=False,
-            )
-            input_func = cast(OnHandoffWithInput[THandoffInput], on_handoff)
-            if inspect.iscoroutinefunction(input_func):
-                await input_func(ctx, validated_input)
-            else:
-                input_func(ctx, validated_input)
-        elif on_handoff is not None:
-            no_input_func = cast(OnHandoffWithoutInput, on_handoff)
-            if inspect.iscoroutinefunction(no_input_func):
-                await no_input_func(ctx)
-            else:
-                no_input_func(ctx)
-
-        return agent
-
-    tool_name = tool_name_override or Handoff.default_tool_name(agent)
-    tool_description = tool_description_override or Handoff.default_tool_description(agent)
-
-    # Always ensure the input JSON schema is in strict mode
-    # If there is a need, we can make this configurable in the future
-    input_json_schema = ensure_strict_json_schema(input_json_schema)
-
-    return Handoff(
-        tool_name=tool_name,
-        tool_description=tool_description,
-        input_json_schema=input_json_schema,
-        on_invoke_handoff=_invoke_handoff,
-        input_filter=input_filter,
-        agent_name=agent.name,
-    )
diff --git a/tests/src/agents/items.py b/tests/src/agents/items.py
deleted file mode 100644
index bbaf49d8..00000000
--- a/tests/src/agents/items.py
+++ /dev/null
@@ -1,246 +0,0 @@
-from __future__ import annotations
-
-import abc
-import copy
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union
-
-from openai.types.responses import (
-    Response,
-    ResponseComputerToolCall,
-    ResponseFileSearchToolCall,
-    ResponseFunctionToolCall,
-    ResponseFunctionWebSearch,
-    ResponseInputItemParam,
-    ResponseOutputItem,
-    ResponseOutputMessage,
-    ResponseOutputRefusal,
-    ResponseOutputText,
-    ResponseStreamEvent,
-)
-from openai.types.responses.response_input_item_param import ComputerCallOutput, FunctionCallOutput
-from openai.types.responses.response_output_item import Reasoning
-from pydantic import BaseModel
-from typing_extensions import TypeAlias
-
-from .exceptions import AgentsException, ModelBehaviorError
-from .usage import Usage
-
-if TYPE_CHECKING:
-    from .agent import Agent
-
-TResponse = Response
-"""A type alias for the Response type from the OpenAI SDK."""
-
-TResponseInputItem = ResponseInputItemParam
-"""A type alias for the ResponseInputItemParam type from the OpenAI SDK."""
-
-TResponseOutputItem = ResponseOutputItem
-"""A type alias for the ResponseOutputItem type from the OpenAI SDK."""
-
-TResponseStreamEvent = ResponseStreamEvent
-"""A type alias for the ResponseStreamEvent type from the OpenAI SDK."""
-
-T = TypeVar("T", bound=Union[TResponseOutputItem, TResponseInputItem])
-
-
-@dataclass
-class RunItemBase(Generic[T], abc.ABC):
-    agent: Agent[Any]
-    """The agent whose run caused this item to be generated."""
-
-    raw_item: T
-    """The raw Responses item from the run. This will always be a either an output item (i.e.
-    `openai.types.responses.ResponseOutputItem` or an input item
-    (i.e. `openai.types.responses.ResponseInputItemParam`).
-    """
-
-    def to_input_item(self) -> TResponseInputItem:
-        """Converts this item into an input item suitable for passing to the model."""
-        if isinstance(self.raw_item, dict):
-            # We know that input items are dicts, so we can ignore the type error
-            return self.raw_item  # type: ignore
-        elif isinstance(self.raw_item, BaseModel):
-            # All output items are Pydantic models that can be converted to input items.
-            return self.raw_item.model_dump(exclude_unset=True)  # type: ignore
-        else:
-            raise AgentsException(f"Unexpected raw item type: {type(self.raw_item)}")
-
-
-@dataclass
-class MessageOutputItem(RunItemBase[ResponseOutputMessage]):
-    """Represents a message from the LLM."""
-
-    raw_item: ResponseOutputMessage
-    """The raw response output message."""
-
-    type: Literal["message_output_item"] = "message_output_item"
-
-
-@dataclass
-class HandoffCallItem(RunItemBase[ResponseFunctionToolCall]):
-    """Represents a tool call for a handoff from one agent to another."""
-
-    raw_item: ResponseFunctionToolCall
-    """The raw response function tool call that represents the handoff."""
-
-    type: Literal["handoff_call_item"] = "handoff_call_item"
-
-
-@dataclass
-class HandoffOutputItem(RunItemBase[TResponseInputItem]):
-    """Represents the output of a handoff."""
-
-    raw_item: TResponseInputItem
-    """The raw input item that represents the handoff taking place."""
-
-    source_agent: Agent[Any]
-    """The agent that made the handoff."""
-
-    target_agent: Agent[Any]
-    """The agent that is being handed off to."""
-
-    type: Literal["handoff_output_item"] = "handoff_output_item"
-
-
-ToolCallItemTypes: TypeAlias = Union[
-    ResponseFunctionToolCall,
-    ResponseComputerToolCall,
-    ResponseFileSearchToolCall,
-    ResponseFunctionWebSearch,
-]
-"""A type that represents a tool call item."""
-
-
-@dataclass
-class ToolCallItem(RunItemBase[ToolCallItemTypes]):
-    """Represents a tool call e.g. a function call or computer action call."""
-
-    raw_item: ToolCallItemTypes
-    """The raw tool call item."""
-
-    type: Literal["tool_call_item"] = "tool_call_item"
-
-
-@dataclass
-class ToolCallOutputItem(RunItemBase[Union[FunctionCallOutput, ComputerCallOutput]]):
-    """Represents the output of a tool call."""
-
-    raw_item: FunctionCallOutput | ComputerCallOutput
-    """The raw item from the model."""
-
-    output: str
-    """The output of the tool call."""
-
-    type: Literal["tool_call_output_item"] = "tool_call_output_item"
-
-
-@dataclass
-class ReasoningItem(RunItemBase[Reasoning]):
-    """Represents a reasoning item."""
-
-    raw_item: Reasoning
-    """The raw reasoning item."""
-
-    type: Literal["reasoning_item"] = "reasoning_item"
-
-
-RunItem: TypeAlias = Union[
-    MessageOutputItem,
-    HandoffCallItem,
-    HandoffOutputItem,
-    ToolCallItem,
-    ToolCallOutputItem,
-    ReasoningItem,
-]
-"""An item generated by an agent."""
-
-
-@dataclass
-class ModelResponse:
-    output: list[TResponseOutputItem]
-    """A list of outputs (messages, tool calls, etc) generated by the model"""
-
-    usage: Usage
-    """The usage information for the response."""
-
-    referenceable_id: str | None
-    """An ID for the response which can be used to refer to the response in subsequent calls to the
-    model. Not supported by all model providers.
-    """
-
-    def to_input_items(self) -> list[TResponseInputItem]:
-        """Convert the output into a list of input items suitable for passing to the model."""
-        # We happen to know that the shape of the Pydantic output items are the same as the
-        # equivalent TypedDict input items, so we can just convert each one.
-        # This is also tested via unit tests.
-        return [it.model_dump(exclude_unset=True) for it in self.output]  # type: ignore
-
-
-class ItemHelpers:
-    @classmethod
-    def extract_last_content(cls, message: TResponseOutputItem) -> str:
-        """Extracts the last text content or refusal from a message."""
-        if not isinstance(message, ResponseOutputMessage):
-            return ""
-
-        last_content = message.content[-1]
-        if isinstance(last_content, ResponseOutputText):
-            return last_content.text
-        elif isinstance(last_content, ResponseOutputRefusal):
-            return last_content.refusal
-        else:
-            raise ModelBehaviorError(f"Unexpected content type: {type(last_content)}")
-
-    @classmethod
-    def extract_last_text(cls, message: TResponseOutputItem) -> str | None:
-        """Extracts the last text content from a message, if any. Ignores refusals."""
-        if isinstance(message, ResponseOutputMessage):
-            last_content = message.content[-1]
-            if isinstance(last_content, ResponseOutputText):
-                return last_content.text
-
-        return None
-
-    @classmethod
-    def input_to_new_input_list(
-        cls, input: str | list[TResponseInputItem]
-    ) -> list[TResponseInputItem]:
-        """Converts a string or list of input items into a list of input items."""
-        if isinstance(input, str):
-            return [
-                {
-                    "content": input,
-                    "role": "user",
-                }
-            ]
-        return copy.deepcopy(input)
-
-    @classmethod
-    def text_message_outputs(cls, items: list[RunItem]) -> str:
-        """Concatenates all the text content from a list of message output items."""
-        text = ""
-        for item in items:
-            if isinstance(item, MessageOutputItem):
-                text += cls.text_message_output(item)
-        return text
-
-    @classmethod
-    def text_message_output(cls, message: MessageOutputItem) -> str:
-        """Extracts all the text content from a single message output item."""
-        text = ""
-        for item in message.raw_item.content:
-            if isinstance(item, ResponseOutputText):
-                text += item.text
-        return text
-
-    @classmethod
-    def tool_call_output_item(
-        cls, tool_call: ResponseFunctionToolCall, output: str
-    ) -> FunctionCallOutput:
-        """Creates a tool call output item from a tool call and its output."""
-        return {
-            "call_id": tool_call.call_id,
-            "output": output,
-            "type": "function_call_output",
-        }
diff --git a/tests/src/agents/lifecycle.py b/tests/src/agents/lifecycle.py
deleted file mode 100644
index 8643248b..00000000
--- a/tests/src/agents/lifecycle.py
+++ /dev/null
@@ -1,105 +0,0 @@
-from typing import Any, Generic
-
-from .agent import Agent
-from .run_context import RunContextWrapper, TContext
-from .tool import Tool
-
-
-class RunHooks(Generic[TContext]):
-    """A class that receives callbacks on various lifecycle events in an agent run. Subclass and
-    override the methods you need.
-    """
-
-    async def on_agent_start(
-        self, context: RunContextWrapper[TContext], agent: Agent[TContext]
-    ) -> None:
-        """Called before the agent is invoked. Called each time the current agent changes."""
-        pass
-
-    async def on_agent_end(
-        self,
-        context: RunContextWrapper[TContext],
-        agent: Agent[TContext],
-        output: Any,
-    ) -> None:
-        """Called when the agent produces a final output."""
-        pass
-
-    async def on_handoff(
-        self,
-        context: RunContextWrapper[TContext],
-        from_agent: Agent[TContext],
-        to_agent: Agent[TContext],
-    ) -> None:
-        """Called when a handoff occurs."""
-        pass
-
-    async def on_tool_start(
-        self,
-        context: RunContextWrapper[TContext],
-        agent: Agent[TContext],
-        tool: Tool,
-    ) -> None:
-        """Called before a tool is invoked."""
-        pass
-
-    async def on_tool_end(
-        self,
-        context: RunContextWrapper[TContext],
-        agent: Agent[TContext],
-        tool: Tool,
-        result: str,
-    ) -> None:
-        """Called after a tool is invoked."""
-        pass
-
-
-class AgentHooks(Generic[TContext]):
-    """A class that receives callbacks on various lifecycle events for a specific agent. You can
-    set this on `agent.hooks` to receive events for that specific agent.
-
-    Subclass and override the methods you need.
-    """
-
-    async def on_start(self, context: RunContextWrapper[TContext], agent: Agent[TContext]) -> None:
-        """Called before the agent is invoked. Called each time the running agent is changed to this
-        agent."""
-        pass
-
-    async def on_end(
-        self,
-        context: RunContextWrapper[TContext],
-        agent: Agent[TContext],
-        output: Any,
-    ) -> None:
-        """Called when the agent produces a final output."""
-        pass
-
-    async def on_handoff(
-        self,
-        context: RunContextWrapper[TContext],
-        agent: Agent[TContext],
-        source: Agent[TContext],
-    ) -> None:
-        """Called when the agent is being handed off to. The `source` is the agent that is handing
-        off to this agent."""
-        pass
-
-    async def on_tool_start(
-        self,
-        context: RunContextWrapper[TContext],
-        agent: Agent[TContext],
-        tool: Tool,
-    ) -> None:
-        """Called before a tool is invoked."""
-        pass
-
-    async def on_tool_end(
-        self,
-        context: RunContextWrapper[TContext],
-        agent: Agent[TContext],
-        tool: Tool,
-        result: str,
-    ) -> None:
-        """Called after a tool is invoked."""
-        pass
diff --git a/tests/src/agents/logger.py b/tests/src/agents/logger.py
deleted file mode 100644
index bd81a827..00000000
--- a/tests/src/agents/logger.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import logging
-
-logger = logging.getLogger("openai.agents")
diff --git a/tests/src/agents/model_settings.py b/tests/src/agents/model_settings.py
deleted file mode 100644
index 78cf9a83..00000000
--- a/tests/src/agents/model_settings.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Literal
-
-
-@dataclass
-class ModelSettings:
-    """Settings to use when calling an LLM.
-
-    This class holds optional model configuration parameters (e.g. temperature,
-    top_p, penalties, truncation, etc.).
-    """
-    temperature: float | None = None
-    top_p: float | None = None
-    frequency_penalty: float | None = None
-    presence_penalty: float | None = None
-    tool_choice: Literal["auto", "required", "none"] | str | None = None
-    parallel_tool_calls: bool | None = False
-    truncation: Literal["auto", "disabled"] | None = None
-
-    def resolve(self, override: ModelSettings | None) -> ModelSettings:
-        """Produce a new ModelSettings by overlaying any non-None values from the
-        override on top of this instance."""
-        if override is None:
-            return self
-        return ModelSettings(
-            temperature=override.temperature or self.temperature,
-            top_p=override.top_p or self.top_p,
-            frequency_penalty=override.frequency_penalty or self.frequency_penalty,
-            presence_penalty=override.presence_penalty or self.presence_penalty,
-            tool_choice=override.tool_choice or self.tool_choice,
-            parallel_tool_calls=override.parallel_tool_calls or self.parallel_tool_calls,
-            truncation=override.truncation or self.truncation,
-        )
diff --git a/tests/src/agents/models/__init__.py b/tests/src/agents/models/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/src/agents/models/_openai_shared.py b/tests/src/agents/models/_openai_shared.py
deleted file mode 100644
index 2e145018..00000000
--- a/tests/src/agents/models/_openai_shared.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from __future__ import annotations
-
-from openai import AsyncOpenAI
-
-_default_openai_key: str | None = None
-_default_openai_client: AsyncOpenAI | None = None
-_use_responses_by_default: bool = True
-
-
-def set_default_openai_key(key: str) -> None:
-    global _default_openai_key
-    _default_openai_key = key
-
-
-def get_default_openai_key() -> str | None:
-    return _default_openai_key
-
-
-def set_default_openai_client(client: AsyncOpenAI) -> None:
-    global _default_openai_client
-    _default_openai_client = client
-
-
-def get_default_openai_client() -> AsyncOpenAI | None:
-    return _default_openai_client
-
-
-def set_use_responses_by_default(use_responses: bool) -> None:
-    global _use_responses_by_default
-    _use_responses_by_default = use_responses
-
-
-def get_use_responses_by_default() -> bool:
-    return _use_responses_by_default
diff --git a/tests/src/agents/models/fake_id.py b/tests/src/agents/models/fake_id.py
deleted file mode 100644
index 0565b0a7..00000000
--- a/tests/src/agents/models/fake_id.py
+++ /dev/null
@@ -1,5 +0,0 @@
-FAKE_RESPONSES_ID = "__fake_id__"
-"""This is a placeholder ID used to fill in the `id` field in Responses API related objects. It's
-useful when you're creating Responses objects from non-Responses APIs, e.g. the OpenAI Chat
-Completions API or other LLM providers.
-"""
diff --git a/tests/src/agents/models/interface.py b/tests/src/agents/models/interface.py
deleted file mode 100644
index e9a8700c..00000000
--- a/tests/src/agents/models/interface.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from __future__ import annotations
-
-import abc
-import enum
-from collections.abc import AsyncIterator
-from typing import TYPE_CHECKING
-
-from ..agent_output import AgentOutputSchema
-from ..handoffs import Handoff
-from ..items import ModelResponse, TResponseInputItem, TResponseStreamEvent
-from ..tool import Tool
-
-if TYPE_CHECKING:
-    from ..model_settings import ModelSettings
-
-
-class ModelTracing(enum.Enum):
-    DISABLED = 0
-    """Tracing is disabled entirely."""
-
-    ENABLED = 1
-    """Tracing is enabled, and all data is included."""
-
-    ENABLED_WITHOUT_DATA = 2
-    """Tracing is enabled, but inputs/outputs are not included."""
-
-    def is_disabled(self) -> bool:
-        return self == ModelTracing.DISABLED
-
-    def include_data(self) -> bool:
-        return self == ModelTracing.ENABLED
-
-
-class Model(abc.ABC):
-    """The base interface for calling an LLM."""
-
-    @abc.abstractmethod
-    async def get_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        tracing: ModelTracing,
-    ) -> ModelResponse:
-        """Get a response from the model.
-
-        Args:
-            system_instructions: The system instructions to use.
-            input: The input items to the model, in OpenAI Responses format.
-            model_settings: The model settings to use.
-            tools: The tools available to the model.
-            output_schema: The output schema to use.
-            handoffs: The handoffs available to the model.
-            tracing: Tracing configuration.
-
-        Returns:
-            The full model response.
-        """
-        pass
-
-    @abc.abstractmethod
-    def stream_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        tracing: ModelTracing,
-    ) -> AsyncIterator[TResponseStreamEvent]:
-        """Stream a response from the model.
-
-        Args:
-            system_instructions: The system instructions to use.
-            input: The input items to the model, in OpenAI Responses format.
-            model_settings: The model settings to use.
-            tools: The tools available to the model.
-            output_schema: The output schema to use.
-            handoffs: The handoffs available to the model.
-            tracing: Tracing configuration.
-
-        Returns:
-            An iterator of response stream events, in OpenAI Responses format.
-        """
-        pass
-
-
-class ModelProvider(abc.ABC):
-    """The base interface for a model provider.
-
-    Model provider is responsible for looking up Models by name.
-    """
-
-    @abc.abstractmethod
-    def get_model(self, model_name: str | None) -> Model:
-        """Get a model by name.
-
-        Args:
-            model_name: The name of the model to get.
-
-        Returns:
-            The model.
-        """
diff --git a/tests/src/agents/models/openai_chatcompletions.py b/tests/src/agents/models/openai_chatcompletions.py
deleted file mode 100644
index a7340d05..00000000
--- a/tests/src/agents/models/openai_chatcompletions.py
+++ /dev/null
@@ -1,952 +0,0 @@
-from __future__ import annotations
-
-import dataclasses
-import json
-import time
-from collections.abc import AsyncIterator, Iterable
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Literal, cast, overload
-
-from openai import NOT_GIVEN, AsyncOpenAI, AsyncStream, NotGiven
-from openai.types import ChatModel
-from openai.types.chat import (
-    ChatCompletion,
-    ChatCompletionAssistantMessageParam,
-    ChatCompletionChunk,
-    ChatCompletionContentPartImageParam,
-    ChatCompletionContentPartParam,
-    ChatCompletionContentPartTextParam,
-    ChatCompletionDeveloperMessageParam,
-    ChatCompletionMessage,
-    ChatCompletionMessageParam,
-    ChatCompletionMessageToolCallParam,
-    ChatCompletionSystemMessageParam,
-    ChatCompletionToolChoiceOptionParam,
-    ChatCompletionToolMessageParam,
-    ChatCompletionUserMessageParam,
-)
-from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
-from openai.types.chat.completion_create_params import ResponseFormat
-from openai.types.completion_usage import CompletionUsage
-from openai.types.responses import (
-    EasyInputMessageParam,
-    Response,
-    ResponseCompletedEvent,
-    ResponseContentPartAddedEvent,
-    ResponseContentPartDoneEvent,
-    ResponseCreatedEvent,
-    ResponseFileSearchToolCallParam,
-    ResponseFunctionCallArgumentsDeltaEvent,
-    ResponseFunctionToolCall,
-    ResponseFunctionToolCallParam,
-    ResponseInputContentParam,
-    ResponseInputImageParam,
-    ResponseInputTextParam,
-    ResponseOutputItem,
-    ResponseOutputItemAddedEvent,
-    ResponseOutputItemDoneEvent,
-    ResponseOutputMessage,
-    ResponseOutputMessageParam,
-    ResponseOutputRefusal,
-    ResponseOutputText,
-    ResponseRefusalDeltaEvent,
-    ResponseTextDeltaEvent,
-)
-from openai.types.responses.response_input_param import FunctionCallOutput, ItemReference, Message
-
-from .. import _debug
-from ..agent_output import AgentOutputSchema
-from ..exceptions import AgentsException, UserError
-from ..handoffs import Handoff
-from ..items import ModelResponse, TResponseInputItem, TResponseOutputItem, TResponseStreamEvent
-from ..logger import logger
-from ..tool import FunctionTool, Tool
-from ..tracing import generation_span
-from ..tracing.span_data import GenerationSpanData
-from ..tracing.spans import Span
-from ..usage import Usage
-from ..version import __version__
-from .fake_id import FAKE_RESPONSES_ID
-from .interface import Model, ModelTracing
-
-if TYPE_CHECKING:
-    from ..model_settings import ModelSettings
-
-
-_USER_AGENT = f"Agents/Python {__version__}"
-_HEADERS = {"User-Agent": _USER_AGENT}
-
-
-@dataclass
-class _StreamingState:
-    started: bool = False
-    text_content_index_and_output: tuple[int, ResponseOutputText] | None = None
-    refusal_content_index_and_output: tuple[int, ResponseOutputRefusal] | None = None
-    function_calls: dict[int, ResponseFunctionToolCall] = field(default_factory=dict)
-
-
-class OpenAIChatCompletionsModel(Model):
-    def __init__(
-        self,
-        model: str | ChatModel,
-        openai_client: AsyncOpenAI,
-    ) -> None:
-        self.model = model
-        self._client = openai_client
-
-    def _non_null_or_not_given(self, value: Any) -> Any:
-        return value if value is not None else NOT_GIVEN
-
-    async def get_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        tracing: ModelTracing,
-    ) -> ModelResponse:
-        with generation_span(
-            model=str(self.model),
-            model_config=dataclasses.asdict(model_settings)
-            | {"base_url": str(self._client.base_url)},
-            disabled=tracing.is_disabled(),
-        ) as span_generation:
-            response = await self._fetch_response(
-                system_instructions,
-                input,
-                model_settings,
-                tools,
-                output_schema,
-                handoffs,
-                span_generation,
-                tracing,
-                stream=False,
-            )
-
-            if _debug.DONT_LOG_MODEL_DATA:
-                logger.debug("Received model response")
-            else:
-                logger.debug(
-                    f"LLM resp:\n{json.dumps(response.choices[0].message.model_dump(), indent=2)}\n"
-                )
-
-            usage = (
-                Usage(
-                    requests=1,
-                    input_tokens=response.usage.prompt_tokens,
-                    output_tokens=response.usage.completion_tokens,
-                    total_tokens=response.usage.total_tokens,
-                )
-                if response.usage
-                else Usage()
-            )
-            if tracing.include_data():
-                span_generation.span_data.output = [response.choices[0].message.model_dump()]
-            span_generation.span_data.usage = {
-                "input_tokens": usage.input_tokens,
-                "output_tokens": usage.output_tokens,
-            }
-
-            items = _Converter.message_to_output_items(response.choices[0].message)
-
-            return ModelResponse(
-                output=items,
-                usage=usage,
-                referenceable_id=None,
-            )
-
-    async def stream_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        tracing: ModelTracing,
-    ) -> AsyncIterator[TResponseStreamEvent]:
-        """
-        Yields a partial message as it is generated, as well as the usage information.
-        """
-        with generation_span(
-            model=str(self.model),
-            model_config=dataclasses.asdict(model_settings)
-            | {"base_url": str(self._client.base_url)},
-            disabled=tracing.is_disabled(),
-        ) as span_generation:
-            response, stream = await self._fetch_response(
-                system_instructions,
-                input,
-                model_settings,
-                tools,
-                output_schema,
-                handoffs,
-                span_generation,
-                tracing,
-                stream=True,
-            )
-
-            usage: CompletionUsage | None = None
-            state = _StreamingState()
-
-            async for chunk in stream:
-                if not state.started:
-                    state.started = True
-                    yield ResponseCreatedEvent(
-                        response=response,
-                        type="response.created",
-                    )
-
-                # The usage is only available in the last chunk
-                usage = chunk.usage
-
-                if not chunk.choices or not chunk.choices[0].delta:
-                    continue
-
-                delta = chunk.choices[0].delta
-
-                # Handle text
-                if delta.content:
-                    if not state.text_content_index_and_output:
-                        # Initialize a content tracker for streaming text
-                        state.text_content_index_and_output = (
-                            0 if not state.refusal_content_index_and_output else 1,
-                            ResponseOutputText(
-                                text="",
-                                type="output_text",
-                                annotations=[],
-                            ),
-                        )
-                        # Start a new assistant message stream
-                        assistant_item = ResponseOutputMessage(
-                            id=FAKE_RESPONSES_ID,
-                            content=[],
-                            role="assistant",
-                            type="message",
-                            status="in_progress",
-                        )
-                        # Notify consumers of the start of a new output message + first content part
-                        yield ResponseOutputItemAddedEvent(
-                            item=assistant_item,
-                            output_index=0,
-                            type="response.output_item.added",
-                        )
-                        yield ResponseContentPartAddedEvent(
-                            content_index=state.text_content_index_and_output[0],
-                            item_id=FAKE_RESPONSES_ID,
-                            output_index=0,
-                            part=ResponseOutputText(
-                                text="",
-                                type="output_text",
-                                annotations=[],
-                            ),
-                            type="response.content_part.added",
-                        )
-                    # Emit the delta for this segment of content
-                    yield ResponseTextDeltaEvent(
-                        content_index=state.text_content_index_and_output[0],
-                        delta=delta.content,
-                        item_id=FAKE_RESPONSES_ID,
-                        output_index=0,
-                        type="response.output_text.delta",
-                    )
-                    # Accumulate the text into the response part
-                    state.text_content_index_and_output[1].text += delta.content
-
-                # Handle refusals (model declines to answer)
-                if delta.refusal:
-                    if not state.refusal_content_index_and_output:
-                        # Initialize a content tracker for streaming refusal text
-                        state.refusal_content_index_and_output = (
-                            0 if not state.text_content_index_and_output else 1,
-                            ResponseOutputRefusal(refusal="", type="refusal"),
-                        )
-                        # Start a new assistant message if one doesn't exist yet (in-progress)
-                        assistant_item = ResponseOutputMessage(
-                            id=FAKE_RESPONSES_ID,
-                            content=[],
-                            role="assistant",
-                            type="message",
-                            status="in_progress",
-                        )
-                        # Notify downstream that assistant message + first content part are starting
-                        yield ResponseOutputItemAddedEvent(
-                            item=assistant_item,
-                            output_index=0,
-                            type="response.output_item.added",
-                        )
-                        yield ResponseContentPartAddedEvent(
-                            content_index=state.refusal_content_index_and_output[0],
-                            item_id=FAKE_RESPONSES_ID,
-                            output_index=0,
-                            part=ResponseOutputText(
-                                text="",
-                                type="output_text",
-                                annotations=[],
-                            ),
-                            type="response.content_part.added",
-                        )
-                    # Emit the delta for this segment of refusal
-                    yield ResponseRefusalDeltaEvent(
-                        content_index=state.refusal_content_index_and_output[0],
-                        delta=delta.refusal,
-                        item_id=FAKE_RESPONSES_ID,
-                        output_index=0,
-                        type="response.refusal.delta",
-                    )
-                    # Accumulate the refusal string in the output part
-                    state.refusal_content_index_and_output[1].refusal += delta.refusal
-
-                # Handle tool calls
-                # Because we don't know the name of the function until the end of the stream, we'll
-                # save everything and yield events at the end
-                if delta.tool_calls:
-                    for tc_delta in delta.tool_calls:
-                        if tc_delta.index not in state.function_calls:
-                            state.function_calls[tc_delta.index] = ResponseFunctionToolCall(
-                                id=FAKE_RESPONSES_ID,
-                                arguments="",
-                                name="",
-                                type="function_call",
-                                call_id="",
-                            )
-                        tc_function = tc_delta.function
-
-                        state.function_calls[tc_delta.index].arguments += (
-                            tc_function.arguments if tc_function else ""
-                        ) or ""
-                        state.function_calls[tc_delta.index].name += (
-                            tc_function.name if tc_function else ""
-                        ) or ""
-                        state.function_calls[tc_delta.index].call_id += tc_delta.id or ""
-
-            function_call_starting_index = 0
-            if state.text_content_index_and_output:
-                function_call_starting_index += 1
-                # Send end event for this content part
-                yield ResponseContentPartDoneEvent(
-                    content_index=state.text_content_index_and_output[0],
-                    item_id=FAKE_RESPONSES_ID,
-                    output_index=0,
-                    part=state.text_content_index_and_output[1],
-                    type="response.content_part.done",
-                )
-
-            if state.refusal_content_index_and_output:
-                function_call_starting_index += 1
-                # Send end event for this content part
-                yield ResponseContentPartDoneEvent(
-                    content_index=state.refusal_content_index_and_output[0],
-                    item_id=FAKE_RESPONSES_ID,
-                    output_index=0,
-                    part=state.refusal_content_index_and_output[1],
-                    type="response.content_part.done",
-                )
-
-            # Actually send events for the function calls
-            for function_call in state.function_calls.values():
-                # First, a ResponseOutputItemAdded for the function call
-                yield ResponseOutputItemAddedEvent(
-                    item=ResponseFunctionToolCall(
-                        id=FAKE_RESPONSES_ID,
-                        call_id=function_call.call_id,
-                        arguments=function_call.arguments,
-                        name=function_call.name,
-                        type="function_call",
-                    ),
-                    output_index=function_call_starting_index,
-                    type="response.output_item.added",
-                )
-                # Then, yield the args
-                yield ResponseFunctionCallArgumentsDeltaEvent(
-                    delta=function_call.arguments,
-                    item_id=FAKE_RESPONSES_ID,
-                    output_index=function_call_starting_index,
-                    type="response.function_call_arguments.delta",
-                )
-                # Finally, the ResponseOutputItemDone
-                yield ResponseOutputItemDoneEvent(
-                    item=ResponseFunctionToolCall(
-                        id=FAKE_RESPONSES_ID,
-                        call_id=function_call.call_id,
-                        arguments=function_call.arguments,
-                        name=function_call.name,
-                        type="function_call",
-                    ),
-                    output_index=function_call_starting_index,
-                    type="response.output_item.done",
-                )
-
-            # Finally, send the Response completed event
-            outputs: list[ResponseOutputItem] = []
-            if state.text_content_index_and_output or state.refusal_content_index_and_output:
-                assistant_msg = ResponseOutputMessage(
-                    id=FAKE_RESPONSES_ID,
-                    content=[],
-                    role="assistant",
-                    type="message",
-                    status="completed",
-                )
-                if state.text_content_index_and_output:
-                    assistant_msg.content.append(state.text_content_index_and_output[1])
-                if state.refusal_content_index_and_output:
-                    assistant_msg.content.append(state.refusal_content_index_and_output[1])
-                outputs.append(assistant_msg)
-
-                # send a ResponseOutputItemDone for the assistant message
-                yield ResponseOutputItemDoneEvent(
-                    item=assistant_msg,
-                    output_index=0,
-                    type="response.output_item.done",
-                )
-
-            for function_call in state.function_calls.values():
-                outputs.append(function_call)
-
-            final_response = response.model_copy(update={"output": outputs, "usage": usage})
-
-            yield ResponseCompletedEvent(
-                response=final_response,
-                type="response.completed",
-            )
-            if tracing.include_data():
-                span_generation.span_data.output = [final_response.model_dump()]
-
-            if usage:
-                span_generation.span_data.usage = {
-                    "input_tokens": usage.prompt_tokens,
-                    "output_tokens": usage.completion_tokens,
-                }
-
-    @overload
-    async def _fetch_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        span: Span[GenerationSpanData],
-        tracing: ModelTracing,
-        stream: Literal[True],
-    ) -> tuple[Response, AsyncStream[ChatCompletionChunk]]: ...
-
-    @overload
-    async def _fetch_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        span: Span[GenerationSpanData],
-        tracing: ModelTracing,
-        stream: Literal[False],
-    ) -> ChatCompletion: ...
-
-    async def _fetch_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        span: Span[GenerationSpanData],
-        tracing: ModelTracing,
-        stream: bool = False,
-    ) -> ChatCompletion | tuple[Response, AsyncStream[ChatCompletionChunk]]:
-        converted_messages = _Converter.items_to_messages(input)
-
-        if system_instructions:
-            converted_messages.insert(
-                0,
-                {
-                    "content": system_instructions,
-                    "role": "system",
-                },
-            )
-        if tracing.include_data():
-            span.span_data.input = converted_messages
-
-        parallel_tool_calls = (
-            True if model_settings.parallel_tool_calls and tools and len(tools) > 0 else NOT_GIVEN
-        )
-        tool_choice = _Converter.convert_tool_choice(model_settings.tool_choice)
-        response_format = _Converter.convert_response_format(output_schema)
-
-        converted_tools = [ToolConverter.to_openai(tool) for tool in tools] if tools else []
-
-        for handoff in handoffs:
-            converted_tools.append(ToolConverter.convert_handoff_tool(handoff))
-
-        if _debug.DONT_LOG_MODEL_DATA:
-            logger.debug("Calling LLM")
-        else:
-            logger.debug(
-                f"{json.dumps(converted_messages, indent=2)}\n"
-                f"Tools:\n{json.dumps(converted_tools, indent=2)}\n"
-                f"Stream: {stream}\n"
-                f"Tool choice: {tool_choice}\n"
-                f"Response format: {response_format}\n"
-            )
-
-        ret = await self._get_client().chat.completions.create(
-            model=self.model,
-            messages=converted_messages,
-            tools=converted_tools or NOT_GIVEN,
-            temperature=self._non_null_or_not_given(model_settings.temperature),
-            top_p=self._non_null_or_not_given(model_settings.top_p),
-            frequency_penalty=self._non_null_or_not_given(model_settings.frequency_penalty),
-            presence_penalty=self._non_null_or_not_given(model_settings.presence_penalty),
-            tool_choice=tool_choice,
-            response_format=response_format,
-            parallel_tool_calls=parallel_tool_calls,
-            stream=stream,
-            stream_options={"include_usage": True} if stream else NOT_GIVEN,
-            extra_headers=_HEADERS,
-        )
-
-        if isinstance(ret, ChatCompletion):
-            return ret
-
-        response = Response(
-            id=FAKE_RESPONSES_ID,
-            created_at=time.time(),
-            model=self.model,
-            object="response",
-            output=[],
-            tool_choice=cast(Literal["auto", "required", "none"], tool_choice)
-            if tool_choice != NOT_GIVEN
-            else "auto",
-            top_p=model_settings.top_p,
-            temperature=model_settings.temperature,
-            tools=[],
-            parallel_tool_calls=parallel_tool_calls or False,
-        )
-        return response, ret
-
-    def _get_client(self) -> AsyncOpenAI:
-        if self._client is None:
-            self._client = AsyncOpenAI()
-        return self._client
-
-
-class _Converter:
-    @classmethod
-    def convert_tool_choice(
-        cls, tool_choice: Literal["auto", "required", "none"] | str | None
-    ) -> ChatCompletionToolChoiceOptionParam | NotGiven:
-        if tool_choice is None:
-            return NOT_GIVEN
-        elif tool_choice == "auto":
-            return "auto"
-        elif tool_choice == "required":
-            return "required"
-        elif tool_choice == "none":
-            return "none"
-        else:
-            return {
-                "type": "function",
-                "function": {
-                    "name": tool_choice,
-                },
-            }
-
-    @classmethod
-    def convert_response_format(
-        cls, final_output_schema: AgentOutputSchema | None
-    ) -> ResponseFormat | NotGiven:
-        if not final_output_schema or final_output_schema.is_plain_text():
-            return NOT_GIVEN
-
-        return {
-            "type": "json_schema",
-            "json_schema": {
-                "name": "final_output",
-                "strict": final_output_schema.strict_json_schema,
-                "schema": final_output_schema.json_schema(),
-            },
-        }
-
-    @classmethod
-    def message_to_output_items(cls, message: ChatCompletionMessage) -> list[TResponseOutputItem]:
-        items: list[TResponseOutputItem] = []
-
-        message_item = ResponseOutputMessage(
-            id=FAKE_RESPONSES_ID,
-            content=[],
-            role="assistant",
-            type="message",
-            status="completed",
-        )
-        if message.content:
-            message_item.content.append(
-                ResponseOutputText(text=message.content, type="output_text", annotations=[])
-            )
-        if message.refusal:
-            message_item.content.append(
-                ResponseOutputRefusal(refusal=message.refusal, type="refusal")
-            )
-        if message.audio:
-            raise AgentsException("Audio is not currently supported")
-
-        if message_item.content:
-            items.append(message_item)
-
-        if message.tool_calls:
-            for tool_call in message.tool_calls:
-                items.append(
-                    ResponseFunctionToolCall(
-                        id=FAKE_RESPONSES_ID,
-                        call_id=tool_call.id,
-                        arguments=tool_call.function.arguments,
-                        name=tool_call.function.name,
-                        type="function_call",
-                    )
-                )
-
-        return items
-
-    @classmethod
-    def maybe_easy_input_message(cls, item: Any) -> EasyInputMessageParam | None:
-        if not isinstance(item, dict):
-            return None
-
-        keys = item.keys()
-        # EasyInputMessageParam only has these two keys
-        if keys != {"content", "role"}:
-            return None
-
-        role = item.get("role", None)
-        if role not in ("user", "assistant", "system", "developer"):
-            return None
-
-        if "content" not in item:
-            return None
-
-        return cast(EasyInputMessageParam, item)
-
-    @classmethod
-    def maybe_input_message(cls, item: Any) -> Message | None:
-        if (
-            isinstance(item, dict)
-            and item.get("type") == "message"
-            and item.get("role")
-            in (
-                "user",
-                "system",
-                "developer",
-            )
-        ):
-            return cast(Message, item)
-
-        return None
-
-    @classmethod
-    def maybe_file_search_call(cls, item: Any) -> ResponseFileSearchToolCallParam | None:
-        if isinstance(item, dict) and item.get("type") == "file_search_call":
-            return cast(ResponseFileSearchToolCallParam, item)
-        return None
-
-    @classmethod
-    def maybe_function_tool_call(cls, item: Any) -> ResponseFunctionToolCallParam | None:
-        if isinstance(item, dict) and item.get("type") == "function_call":
-            return cast(ResponseFunctionToolCallParam, item)
-        return None
-
-    @classmethod
-    def maybe_function_tool_call_output(
-        cls,
-        item: Any,
-    ) -> FunctionCallOutput | None:
-        if isinstance(item, dict) and item.get("type") == "function_call_output":
-            return cast(FunctionCallOutput, item)
-        return None
-
-    @classmethod
-    def maybe_item_reference(cls, item: Any) -> ItemReference | None:
-        if isinstance(item, dict) and item.get("type") == "item_reference":
-            return cast(ItemReference, item)
-        return None
-
-    @classmethod
-    def maybe_response_output_message(cls, item: Any) -> ResponseOutputMessageParam | None:
-        # ResponseOutputMessage is only used for messages with role assistant
-        if (
-            isinstance(item, dict)
-            and item.get("type") == "message"
-            and item.get("role") == "assistant"
-        ):
-            return cast(ResponseOutputMessageParam, item)
-        return None
-
-    @classmethod
-    def extract_text_content(
-        cls, content: str | Iterable[ResponseInputContentParam]
-    ) -> str | list[ChatCompletionContentPartTextParam]:
-        all_content = cls.extract_all_content(content)
-        if isinstance(all_content, str):
-            return all_content
-        out: list[ChatCompletionContentPartTextParam] = []
-        for c in all_content:
-            if c.get("type") == "text":
-                out.append(cast(ChatCompletionContentPartTextParam, c))
-        return out
-
-    @classmethod
-    def extract_all_content(
-        cls, content: str | Iterable[ResponseInputContentParam]
-    ) -> str | list[ChatCompletionContentPartParam]:
-        if isinstance(content, str):
-            return content
-        out: list[ChatCompletionContentPartParam] = []
-
-        for c in content:
-            if isinstance(c, dict) and c.get("type") == "input_text":
-                casted_text_param = cast(ResponseInputTextParam, c)
-                out.append(
-                    ChatCompletionContentPartTextParam(
-                        type="text",
-                        text=casted_text_param["text"],
-                    )
-                )
-            elif isinstance(c, dict) and c.get("type") == "input_image":
-                casted_image_param = cast(ResponseInputImageParam, c)
-                if "image_url" not in casted_image_param or not casted_image_param["image_url"]:
-                    raise UserError(
-                        f"Only image URLs are supported for input_image {casted_image_param}"
-                    )
-                out.append(
-                    ChatCompletionContentPartImageParam(
-                        type="image_url",
-                        image_url={
-                            "url": casted_image_param["image_url"],
-                            "detail": casted_image_param["detail"],
-                        },
-                    )
-                )
-            elif isinstance(c, dict) and c.get("type") == "input_file":
-                raise UserError(f"File uploads are not supported for chat completions {c}")
-            else:
-                raise UserError(f"Unknonw content: {c}")
-        return out
-
-    @classmethod
-    def items_to_messages(
-        cls,
-        items: str | Iterable[TResponseInputItem],
-    ) -> list[ChatCompletionMessageParam]:
-        """
-        Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam.
-
-        Rules:
-        - EasyInputMessage or InputMessage (role=user) => ChatCompletionUserMessageParam
-        - EasyInputMessage or InputMessage (role=system) => ChatCompletionSystemMessageParam
-        - EasyInputMessage or InputMessage (role=developer) => ChatCompletionDeveloperMessageParam
-        - InputMessage (role=assistant) => Start or flush a ChatCompletionAssistantMessageParam
-        - response_output_message => Also produces/flushes a ChatCompletionAssistantMessageParam
-        - tool calls get attached to the *current* assistant message, or create one if none.
-        - tool outputs => ChatCompletionToolMessageParam
-        """
-
-        if isinstance(items, str):
-            return [
-                ChatCompletionUserMessageParam(
-                    role="user",
-                    content=items,
-                )
-            ]
-
-        result: list[ChatCompletionMessageParam] = []
-        current_assistant_msg: ChatCompletionAssistantMessageParam | None = None
-
-        def flush_assistant_message() -> None:
-            nonlocal current_assistant_msg
-            if current_assistant_msg is not None:
-                # The API doesn't support empty arrays for tool_calls
-                if not current_assistant_msg.get("tool_calls"):
-                    del current_assistant_msg["tool_calls"]
-                result.append(current_assistant_msg)
-                current_assistant_msg = None
-
-        def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
-            nonlocal current_assistant_msg
-            if current_assistant_msg is None:
-                current_assistant_msg = ChatCompletionAssistantMessageParam(role="assistant")
-                current_assistant_msg["tool_calls"] = []
-            return current_assistant_msg
-
-        for item in items:
-            # 1) Check easy input message
-            if easy_msg := cls.maybe_easy_input_message(item):
-                role = easy_msg["role"]
-                content = easy_msg["content"]
-
-                if role == "user":
-                    flush_assistant_message()
-                    msg_user: ChatCompletionUserMessageParam = {
-                        "role": "user",
-                        "content": cls.extract_all_content(content),
-                    }
-                    result.append(msg_user)
-                elif role == "system":
-                    flush_assistant_message()
-                    msg_system: ChatCompletionSystemMessageParam = {
-                        "role": "system",
-                        "content": cls.extract_text_content(content),
-                    }
-                    result.append(msg_system)
-                elif role == "developer":
-                    flush_assistant_message()
-                    msg_developer: ChatCompletionDeveloperMessageParam = {
-                        "role": "developer",
-                        "content": cls.extract_text_content(content),
-                    }
-                    result.append(msg_developer)
-                else:
-                    raise UserError(f"Unexpected role in easy_input_message: {role}")
-
-            # 2) Check input message
-            elif in_msg := cls.maybe_input_message(item):
-                role = in_msg["role"]
-                content = in_msg["content"]
-                flush_assistant_message()
-
-                if role == "user":
-                    msg_user = {
-                        "role": "user",
-                        "content": cls.extract_all_content(content),
-                    }
-                    result.append(msg_user)
-                elif role == "system":
-                    msg_system = {
-                        "role": "system",
-                        "content": cls.extract_text_content(content),
-                    }
-                    result.append(msg_system)
-                elif role == "developer":
-                    msg_developer = {
-                        "role": "developer",
-                        "content": cls.extract_text_content(content),
-                    }
-                    result.append(msg_developer)
-                else:
-                    raise UserError(f"Unexpected role in input_message: {role}")
-
-            # 3) response output message => assistant
-            elif resp_msg := cls.maybe_response_output_message(item):
-                flush_assistant_message()
-                new_asst = ChatCompletionAssistantMessageParam(role="assistant")
-                contents = resp_msg["content"]
-
-                text_segments = []
-                for c in contents:
-                    if c["type"] == "output_text":
-                        text_segments.append(c["text"])
-                    elif c["type"] == "refusal":
-                        new_asst["refusal"] = c["refusal"]
-                    elif c["type"] == "output_audio":
-                        # Can't handle this, b/c chat completions expects an ID which we dont have
-                        raise UserError(
-                            f"Only audio IDs are supported for chat completions, but got: {c}"
-                        )
-                    else:
-                        raise UserError(f"Unknown content type in ResponseOutputMessage: {c}")
-
-                if text_segments:
-                    combined = "\n".join(text_segments)
-                    new_asst["content"] = combined
-
-                new_asst["tool_calls"] = []
-                current_assistant_msg = new_asst
-
-            # 4) function/file-search calls => attach to assistant
-            elif file_search := cls.maybe_file_search_call(item):
-                asst = ensure_assistant_message()
-                tool_calls = list(asst.get("tool_calls", []))
-                new_tool_call = ChatCompletionMessageToolCallParam(
-                    id=file_search["id"],
-                    type="function",
-                    function={
-                        "name": "file_search_call",
-                        "arguments": json.dumps(
-                            {
-                                "queries": file_search.get("queries", []),
-                                "status": file_search.get("status"),
-                            }
-                        ),
-                    },
-                )
-                tool_calls.append(new_tool_call)
-                asst["tool_calls"] = tool_calls
-
-            elif func_call := cls.maybe_function_tool_call(item):
-                asst = ensure_assistant_message()
-                tool_calls = list(asst.get("tool_calls", []))
-                new_tool_call = ChatCompletionMessageToolCallParam(
-                    id=func_call["call_id"],
-                    type="function",
-                    function={
-                        "name": func_call["name"],
-                        "arguments": func_call["arguments"],
-                    },
-                )
-                tool_calls.append(new_tool_call)
-                asst["tool_calls"] = tool_calls
-            # 5) function call output => tool message
-            elif func_output := cls.maybe_function_tool_call_output(item):
-                flush_assistant_message()
-                msg: ChatCompletionToolMessageParam = {
-                    "role": "tool",
-                    "tool_call_id": func_output["call_id"],
-                    "content": func_output["output"],
-                }
-                result.append(msg)
-
-            # 6) item reference => handle or raise
-            elif item_ref := cls.maybe_item_reference(item):
-                raise UserError(
-                    f"Encountered an item_reference, which is not supported: {item_ref}"
-                )
-
-            # 7) If we haven't recognized it => fail or ignore
-            else:
-                raise UserError(f"Unhandled item type or structure: {item}")
-
-        flush_assistant_message()
-        return result
-
-
-class ToolConverter:
-    @classmethod
-    def to_openai(cls, tool: Tool) -> ChatCompletionToolParam:
-        if isinstance(tool, FunctionTool):
-            return {
-                "type": "function",
-                "function": {
-                    "name": tool.name,
-                    "description": tool.description or "",
-                    "parameters": tool.params_json_schema,
-                },
-            }
-
-        raise UserError(
-            f"Hosted tools are not supported with the ChatCompletions API. FGot tool type: "
-            f"{type(tool)}, tool: {tool}"
-        )
-
-    @classmethod
-    def convert_handoff_tool(cls, handoff: Handoff[Any]) -> ChatCompletionToolParam:
-        return {
-            "type": "function",
-            "function": {
-                "name": handoff.tool_name,
-                "description": handoff.tool_description,
-                "parameters": handoff.input_json_schema,
-            },
-        }
diff --git a/tests/src/agents/models/openai_provider.py b/tests/src/agents/models/openai_provider.py
deleted file mode 100644
index 51946638..00000000
--- a/tests/src/agents/models/openai_provider.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from __future__ import annotations
-
-import httpx
-from openai import AsyncOpenAI, DefaultAsyncHttpxClient
-
-from . import _openai_shared
-from .interface import Model, ModelProvider
-from .openai_chatcompletions import OpenAIChatCompletionsModel
-from .openai_responses import OpenAIResponsesModel
-
-DEFAULT_MODEL: str = "gpt-4o"
-
-
-_http_client: httpx.AsyncClient | None = None
-
-
-# If we create a new httpx client for each request, that would mean no sharing of connection pools,
-# which would mean worse latency and resource usage. So, we share the client across requests.
-def shared_http_client() -> httpx.AsyncClient:
-    global _http_client
-    if _http_client is None:
-        _http_client = DefaultAsyncHttpxClient()
-    return _http_client
-
-
-class OpenAIProvider(ModelProvider):
-    def __init__(
-        self,
-        *,
-        api_key: str | None = None,
-        base_url: str | None = None,
-        openai_client: AsyncOpenAI | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        use_responses: bool | None = None,
-    ) -> None:
-        if openai_client is not None:
-            assert api_key is None and base_url is None, (
-                "Don't provide api_key or base_url if you provide openai_client"
-            )
-            self._client = openai_client
-        else:
-            self._client = _openai_shared.get_default_openai_client() or AsyncOpenAI(
-                api_key=api_key or _openai_shared.get_default_openai_key(),
-                base_url=base_url,
-                organization=organization,
-                project=project,
-                http_client=shared_http_client(),
-            )
-
-        self._is_openai_model = self._client.base_url.host.startswith("api.openai.com")
-        if use_responses is not None:
-            self._use_responses = use_responses
-        else:
-            self._use_responses = _openai_shared.get_use_responses_by_default()
-
-    def get_model(self, model_name: str | None) -> Model:
-        if model_name is None:
-            model_name = DEFAULT_MODEL
-
-        return (
-            OpenAIResponsesModel(model=model_name, openai_client=self._client)
-            if self._use_responses
-            else OpenAIChatCompletionsModel(model=model_name, openai_client=self._client)
-        )
diff --git a/tests/src/agents/models/openai_responses.py b/tests/src/agents/models/openai_responses.py
deleted file mode 100644
index a10d7b98..00000000
--- a/tests/src/agents/models/openai_responses.py
+++ /dev/null
@@ -1,384 +0,0 @@
-from __future__ import annotations
-
-import json
-from collections.abc import AsyncIterator
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Literal, overload
-
-from openai import NOT_GIVEN, AsyncOpenAI, AsyncStream, NotGiven
-from openai.types import ChatModel
-from openai.types.responses import (
-    Response,
-    ResponseCompletedEvent,
-    ResponseStreamEvent,
-    ResponseTextConfigParam,
-    ToolParam,
-    WebSearchToolParam,
-    response_create_params,
-)
-
-from .. import _debug
-from ..agent_output import AgentOutputSchema
-from ..exceptions import UserError
-from ..handoffs import Handoff
-from ..items import ItemHelpers, ModelResponse, TResponseInputItem
-from ..logger import logger
-from ..tool import ComputerTool, FileSearchTool, FunctionTool, Tool, WebSearchTool
-from ..tracing import SpanError, response_span
-from ..usage import Usage
-from ..version import __version__
-from .interface import Model, ModelTracing
-
-if TYPE_CHECKING:
-    from ..model_settings import ModelSettings
-
-
-_USER_AGENT = f"Agents/Python {__version__}"
-_HEADERS = {"User-Agent": _USER_AGENT}
-
-# From the Responses API
-IncludeLiteral = Literal[
-    "file_search_call.results",
-    "message.input_image.image_url",
-    "computer_call_output.output.image_url",
-]
-
-
-class OpenAIResponsesModel(Model):
-    """
-    Implementation of `Model` that uses the OpenAI Responses API.
-    """
-
-    def __init__(
-        self,
-        model: str | ChatModel,
-        openai_client: AsyncOpenAI,
-    ) -> None:
-        self.model = model
-        self._client = openai_client
-
-    def _non_null_or_not_given(self, value: Any) -> Any:
-        return value if value is not None else NOT_GIVEN
-
-    async def get_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        tracing: ModelTracing,
-    ) -> ModelResponse:
-        with response_span(disabled=tracing.is_disabled()) as span_response:
-            try:
-                response = await self._fetch_response(
-                    system_instructions,
-                    input,
-                    model_settings,
-                    tools,
-                    output_schema,
-                    handoffs,
-                    stream=False,
-                )
-
-                if _debug.DONT_LOG_MODEL_DATA:
-                    logger.debug("LLM responsed")
-                else:
-                    logger.debug(
-                        "LLM resp:\n"
-                        f"{json.dumps([x.model_dump() for x in response.output], indent=2)}\n"
-                    )
-
-                usage = (
-                    Usage(
-                        requests=1,
-                        input_tokens=response.usage.input_tokens,
-                        output_tokens=response.usage.output_tokens,
-                        total_tokens=response.usage.total_tokens,
-                    )
-                    if response.usage
-                    else Usage()
-                )
-
-                if tracing.include_data():
-                    span_response.span_data.response = response
-                    span_response.span_data.input = input
-            except Exception as e:
-                span_response.set_error(
-                    SpanError(
-                        message="Error getting response",
-                        data={
-                            "error": str(e) if tracing.include_data() else e.__class__.__name__,
-                        },
-                    )
-                )
-                logger.error(f"Error getting response: {e}")
-                raise
-
-        return ModelResponse(
-            output=response.output,
-            usage=usage,
-            referenceable_id=response.id,
-        )
-
-    async def stream_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        tracing: ModelTracing,
-    ) -> AsyncIterator[ResponseStreamEvent]:
-        """
-        Yields a partial message as it is generated, as well as the usage information.
-        """
-        with response_span(disabled=tracing.is_disabled()) as span_response:
-            try:
-                stream = await self._fetch_response(
-                    system_instructions,
-                    input,
-                    model_settings,
-                    tools,
-                    output_schema,
-                    handoffs,
-                    stream=True,
-                )
-
-                final_response: Response | None = None
-
-                async for chunk in stream:
-                    if isinstance(chunk, ResponseCompletedEvent):
-                        final_response = chunk.response
-                    yield chunk
-
-                if final_response and tracing.include_data():
-                    span_response.span_data.response = final_response
-                    span_response.span_data.input = input
-
-            except Exception as e:
-                span_response.set_error(
-                    SpanError(
-                        message="Error streaming response",
-                        data={
-                            "error": str(e) if tracing.include_data() else e.__class__.__name__,
-                        },
-                    )
-                )
-                logger.error(f"Error streaming response: {e}")
-                raise
-
-    @overload
-    async def _fetch_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        stream: Literal[True],
-    ) -> AsyncStream[ResponseStreamEvent]: ...
-
-    @overload
-    async def _fetch_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        stream: Literal[False],
-    ) -> Response: ...
-
-    async def _fetch_response(
-        self,
-        system_instructions: str | None,
-        input: str | list[TResponseInputItem],
-        model_settings: ModelSettings,
-        tools: list[Tool],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        stream: Literal[True] | Literal[False] = False,
-    ) -> Response | AsyncStream[ResponseStreamEvent]:
-        list_input = ItemHelpers.input_to_new_input_list(input)
-
-        parallel_tool_calls = (
-            True if model_settings.parallel_tool_calls and tools and len(tools) > 0 else NOT_GIVEN
-        )
-
-        tool_choice = Converter.convert_tool_choice(model_settings.tool_choice)
-        converted_tools = Converter.convert_tools(tools, handoffs)
-        response_format = Converter.get_response_format(output_schema)
-
-        if _debug.DONT_LOG_MODEL_DATA:
-            logger.debug("Calling LLM")
-        else:
-            logger.debug(
-                f"Calling LLM {self.model} with input:\n"
-                f"{json.dumps(list_input, indent=2)}\n"
-                f"Tools:\n{json.dumps(converted_tools.tools, indent=2)}\n"
-                f"Stream: {stream}\n"
-                f"Tool choice: {tool_choice}\n"
-                f"Response format: {response_format}\n"
-            )
-
-        return await self._client.responses.create(
-            instructions=self._non_null_or_not_given(system_instructions),
-            model=self.model,
-            input=list_input,
-            include=converted_tools.includes,
-            tools=converted_tools.tools,
-            temperature=self._non_null_or_not_given(model_settings.temperature),
-            top_p=self._non_null_or_not_given(model_settings.top_p),
-            truncation=self._non_null_or_not_given(model_settings.truncation),
-            tool_choice=tool_choice,
-            parallel_tool_calls=parallel_tool_calls,
-            stream=stream,
-            extra_headers=_HEADERS,
-            text=response_format,
-        )
-
-    def _get_client(self) -> AsyncOpenAI:
-        if self._client is None:
-            self._client = AsyncOpenAI()
-        return self._client
-
-
-@dataclass
-class ConvertedTools:
-    tools: list[ToolParam]
-    includes: list[IncludeLiteral]
-
-
-class Converter:
-    @classmethod
-    def convert_tool_choice(
-        cls, tool_choice: Literal["auto", "required", "none"] | str | None
-    ) -> response_create_params.ToolChoice | NotGiven:
-        if tool_choice is None:
-            return NOT_GIVEN
-        elif tool_choice == "required":
-            return "required"
-        elif tool_choice == "auto":
-            return "auto"
-        elif tool_choice == "none":
-            return "none"
-        elif tool_choice == "file_search":
-            return {
-                "type": "file_search",
-            }
-        elif tool_choice == "web_search_preview":
-            return {
-                "type": "web_search_preview",
-            }
-        elif tool_choice == "computer_use_preview":
-            return {
-                "type": "computer_use_preview",
-            }
-        else:
-            return {
-                "type": "function",
-                "name": tool_choice,
-            }
-
-    @classmethod
-    def get_response_format(
-        cls, output_schema: AgentOutputSchema | None
-    ) -> ResponseTextConfigParam | NotGiven:
-        if output_schema is None or output_schema.is_plain_text():
-            return NOT_GIVEN
-        else:
-            return {
-                "format": {
-                    "type": "json_schema",
-                    "name": "final_output",
-                    "schema": output_schema.json_schema(),
-                    "strict": output_schema.strict_json_schema,
-                }
-            }
-
-    @classmethod
-    def convert_tools(
-        cls,
-        tools: list[Tool],
-        handoffs: list[Handoff[Any]],
-    ) -> ConvertedTools:
-        converted_tools: list[ToolParam] = []
-        includes: list[IncludeLiteral] = []
-
-        computer_tools = [tool for tool in tools if isinstance(tool, ComputerTool)]
-        if len(computer_tools) > 1:
-            raise UserError(f"You can only provide one computer tool. Got {len(computer_tools)}")
-
-        for tool in tools:
-            converted_tool, include = cls._convert_tool(tool)
-            converted_tools.append(converted_tool)
-            if include:
-                includes.append(include)
-
-        for handoff in handoffs:
-            converted_tools.append(cls._convert_handoff_tool(handoff))
-
-        return ConvertedTools(tools=converted_tools, includes=includes)
-
-    @classmethod
-    def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, IncludeLiteral | None]:
-        """Returns converted tool and includes"""
-
-        if isinstance(tool, FunctionTool):
-            converted_tool: ToolParam = {
-                "name": tool.name,
-                "parameters": tool.params_json_schema,
-                "strict": tool.strict_json_schema,
-                "type": "function",
-                "description": tool.description,
-            }
-            includes: IncludeLiteral | None = None
-        elif isinstance(tool, WebSearchTool):
-            ws: WebSearchToolParam = {
-                "type": "web_search_preview",
-                "user_location": tool.user_location,
-                "search_context_size": tool.search_context_size,
-            }
-            converted_tool = ws
-            includes = None
-        elif isinstance(tool, FileSearchTool):
-            converted_tool = {
-                "type": "file_search",
-                "vector_store_ids": tool.vector_store_ids,
-            }
-            if tool.max_num_results:
-                converted_tool["max_num_results"] = tool.max_num_results
-            if tool.ranking_options:
-                converted_tool["ranking_options"] = tool.ranking_options
-            if tool.filters:
-                converted_tool["filters"] = tool.filters
-
-            includes = "file_search_call.results" if tool.include_search_results else None
-        elif isinstance(tool, ComputerTool):
-            converted_tool = {
-                "type": "computer-preview",
-                "environment": tool.computer.environment,
-                "display_width": tool.computer.dimensions[0],
-                "display_height": tool.computer.dimensions[1],
-            }
-            includes = None
-
-        else:
-            raise UserError(f"Unknown tool type: {type(tool)}, tool")
-
-        return converted_tool, includes
-
-    @classmethod
-    def _convert_handoff_tool(cls, handoff: Handoff) -> ToolParam:
-        return {
-            "name": handoff.tool_name,
-            "parameters": handoff.input_json_schema,
-            "strict": handoff.strict_json_schema,
-            "type": "function",
-            "description": handoff.tool_description,
-        }
diff --git a/tests/src/agents/result.py b/tests/src/agents/result.py
deleted file mode 100644
index 56838273..00000000
--- a/tests/src/agents/result.py
+++ /dev/null
@@ -1,220 +0,0 @@
-from __future__ import annotations
-
-import abc
-import asyncio
-from collections.abc import AsyncIterator
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, cast
-
-from typing_extensions import TypeVar
-
-from ._run_impl import QueueCompleteSentinel
-from .agent import Agent
-from .agent_output import AgentOutputSchema
-from .exceptions import InputGuardrailTripwireTriggered, MaxTurnsExceeded
-from .guardrail import InputGuardrailResult, OutputGuardrailResult
-from .items import ItemHelpers, ModelResponse, RunItem, TResponseInputItem
-from .logger import logger
-from .stream_events import StreamEvent
-from .tracing import Trace
-
-if TYPE_CHECKING:
-    from ._run_impl import QueueCompleteSentinel
-    from .agent import Agent
-
-T = TypeVar("T")
-
-
-@dataclass
-class RunResultBase(abc.ABC):
-    input: str | list[TResponseInputItem]
-    """The original input items i.e. the items before run() was called. This may be a mutated
-    version of the input, if there are handoff input filters that mutate the input.
-    """
-
-    new_items: list[RunItem]
-    """The new items generated during the agent run. These include things like new messages, tool
-    calls and their outputs, etc.
-    """
-
-    raw_responses: list[ModelResponse]
-    """The raw LLM responses generated by the model during the agent run."""
-
-    final_output: Any
-    """The output of the last agent."""
-
-    input_guardrail_results: list[InputGuardrailResult]
-    """Guardrail results for the input messages."""
-
-    output_guardrail_results: list[OutputGuardrailResult]
-    """Guardrail results for the final output of the agent."""
-
-    @property
-    @abc.abstractmethod
-    def last_agent(self) -> Agent[Any]:
-        """The last agent that was run."""
-
-    def final_output_as(self, cls: type[T], raise_if_incorrect_type: bool = False) -> T:
-        """A convenience method to cast the final output to a specific type. By default, the cast
-        is only for the typechecker. If you set `raise_if_incorrect_type` to True, we'll raise a
-        TypeError if the final output is not of the given type.
-
-        Args:
-            cls: The type to cast the final output to.
-            raise_if_incorrect_type: If True, we'll raise a TypeError if the final output is not of
-                the given type.
-
-        Returns:
-            The final output casted to the given type.
-        """
-        if raise_if_incorrect_type and not isinstance(self.final_output, cls):
-            raise TypeError(f"Final output is not of type {cls.__name__}")
-
-        return cast(T, self.final_output)
-
-    def to_input_list(self) -> list[TResponseInputItem]:
-        """Creates a new input list, merging the original input with all the new items generated."""
-        original_items: list[TResponseInputItem] = ItemHelpers.input_to_new_input_list(self.input)
-        new_items = [item.to_input_item() for item in self.new_items]
-
-        return original_items + new_items
-
-
-@dataclass
-class RunResult(RunResultBase):
-    _last_agent: Agent[Any]
-
-    @property
-    def last_agent(self) -> Agent[Any]:
-        """The last agent that was run."""
-        return self._last_agent
-
-
-@dataclass
-class RunResultStreaming(RunResultBase):
-    """The result of an agent run in streaming mode. You can use the `stream_events` method to
-    receive semantic events as they are generated.
-
-    The streaming method will raise:
-    - A MaxTurnsExceeded exception if the agent exceeds the max_turns limit.
-    - A GuardrailTripwireTriggered exception if a guardrail is tripped.
-    """
-
-    current_agent: Agent[Any]
-    """The current agent that is running."""
-
-    current_turn: int
-    """The current turn number."""
-
-    max_turns: int
-    """The maximum number of turns the agent can run for."""
-
-    final_output: Any
-    """The final output of the agent. This is None until the agent has finished running."""
-
-    _current_agent_output_schema: AgentOutputSchema | None = field(repr=False)
-
-    _trace: Trace | None = field(repr=False)
-
-    is_complete: bool = False
-    """Whether the agent has finished running."""
-
-    # Queues that the background run_loop writes to
-    _event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] = field(
-        default_factory=asyncio.Queue, repr=False
-    )
-    _input_guardrail_queue: asyncio.Queue[InputGuardrailResult] = field(
-        default_factory=asyncio.Queue, repr=False
-    )
-
-    # Store the asyncio tasks that we're waiting on
-    _run_impl_task: asyncio.Task[Any] | None = field(default=None, repr=False)
-    _input_guardrails_task: asyncio.Task[Any] | None = field(default=None, repr=False)
-    _output_guardrails_task: asyncio.Task[Any] | None = field(default=None, repr=False)
-    _stored_exception: Exception | None = field(default=None, repr=False)
-
-    @property
-    def last_agent(self) -> Agent[Any]:
-        """The last agent that was run. Updates as the agent run progresses, so the true last agent
-        is only available after the agent run is complete.
-        """
-        return self.current_agent
-
-    async def stream_events(self) -> AsyncIterator[StreamEvent]:
-        """Stream deltas for new items as they are generated. We're using the types from the
-        OpenAI Responses API, so these are semantic events: each event has a `type` field that
-        describes the type of the event, along with the data for that event.
-
-        This will raise:
-        - A MaxTurnsExceeded exception if the agent exceeds the max_turns limit.
-        - A GuardrailTripwireTriggered exception if a guardrail is tripped.
-        """
-        while True:
-            self._check_errors()
-            if self._stored_exception:
-                logger.debug("Breaking due to stored exception")
-                self.is_complete = True
-                break
-
-            if self.is_complete and self._event_queue.empty():
-                break
-
-            try:
-                item = await self._event_queue.get()
-            except asyncio.CancelledError:
-                break
-
-            if isinstance(item, QueueCompleteSentinel):
-                self._event_queue.task_done()
-                # Check for errors, in case the queue was completed due to an exception
-                self._check_errors()
-                break
-
-            yield item
-            self._event_queue.task_done()
-
-        if self._trace:
-            self._trace.finish(reset_current=True)
-
-        self._cleanup_tasks()
-
-        if self._stored_exception:
-            raise self._stored_exception
-
-    def _check_errors(self):
-        if self.current_turn > self.max_turns:
-            self._stored_exception = MaxTurnsExceeded(f"Max turns ({self.max_turns}) exceeded")
-
-        # Fetch all the completed guardrail results from the queue and raise if needed
-        while not self._input_guardrail_queue.empty():
-            guardrail_result = self._input_guardrail_queue.get_nowait()
-            if guardrail_result.output.tripwire_triggered:
-                self._stored_exception = InputGuardrailTripwireTriggered(guardrail_result)
-
-        # Check the tasks for any exceptions
-        if self._run_impl_task and self._run_impl_task.done():
-            exc = self._run_impl_task.exception()
-            if exc and isinstance(exc, Exception):
-                self._stored_exception = exc
-
-        if self._input_guardrails_task and self._input_guardrails_task.done():
-            exc = self._input_guardrails_task.exception()
-            if exc and isinstance(exc, Exception):
-                self._stored_exception = exc
-
-        if self._output_guardrails_task and self._output_guardrails_task.done():
-            exc = self._output_guardrails_task.exception()
-            if exc and isinstance(exc, Exception):
-                self._stored_exception = exc
-
-    def _cleanup_tasks(self):
-        if self._run_impl_task and not self._run_impl_task.done():
-            self._run_impl_task.cancel()
-
-        if self._input_guardrails_task and not self._input_guardrails_task.done():
-            self._input_guardrails_task.cancel()
-
-        if self._output_guardrails_task and not self._output_guardrails_task.done():
-            self._output_guardrails_task.cancel()
-            self._output_guardrails_task.cancel()
-            self._output_guardrails_task.cancel()
diff --git a/tests/src/agents/run.py b/tests/src/agents/run.py
deleted file mode 100644
index dfff7e38..00000000
--- a/tests/src/agents/run.py
+++ /dev/null
@@ -1,904 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import copy
-from dataclasses import dataclass, field
-from typing import Any, cast
-
-from openai.types.responses import ResponseCompletedEvent
-
-from . import Model, _utils
-from ._run_impl import (
-    NextStepFinalOutput,
-    NextStepHandoff,
-    NextStepRunAgain,
-    QueueCompleteSentinel,
-    RunImpl,
-    SingleStepResult,
-    TraceCtxManager,
-    get_model_tracing_impl,
-)
-from .agent import Agent
-from .agent_output import AgentOutputSchema
-from .exceptions import (
-    AgentsException,
-    InputGuardrailTripwireTriggered,
-    MaxTurnsExceeded,
-    ModelBehaviorError,
-    OutputGuardrailTripwireTriggered,
-)
-from .guardrail import InputGuardrail, InputGuardrailResult, OutputGuardrail, OutputGuardrailResult
-from .handoffs import Handoff, HandoffInputFilter, handoff
-from .items import ItemHelpers, ModelResponse, RunItem, TResponseInputItem
-from .lifecycle import RunHooks
-from .logger import logger
-from .model_settings import ModelSettings
-from .models.interface import ModelProvider
-from .models.openai_provider import OpenAIProvider
-from .result import RunResult, RunResultStreaming
-from .run_context import RunContextWrapper, TContext
-from .stream_events import AgentUpdatedStreamEvent, RawResponsesStreamEvent
-from .tracing import Span, SpanError, agent_span, get_current_trace, trace
-from .tracing.span_data import AgentSpanData
-from .usage import Usage
-
-DEFAULT_MAX_TURNS = 10
-
-
-@dataclass
-class RunConfig:
-    """Configures settings for the entire agent run."""
-
-    model: str | Model | None = None
-    """The model to use for the entire agent run. If set, will override the model set on every
-    agent. The model_provider passed in below must be able to resolve this model name.
-    """
-
-    model_provider: ModelProvider = field(default_factory=OpenAIProvider)
-    """The model provider to use when looking up string model names. Defaults to OpenAI."""
-
-    model_settings: ModelSettings | None = None
-    """Configure global model settings. Any non-null values will override the agent-specific model
-    settings.
-    """
-
-    handoff_input_filter: HandoffInputFilter | None = None
-    """A global input filter to apply to all handoffs. If `Handoff.input_filter` is set, then that
-    will take precedence. The input filter allows you to edit the inputs that are sent to the new
-    agent. See the documentation in `Handoff.input_filter` for more details.
-    """
-
-    input_guardrails: list[InputGuardrail[Any]] | None = None
-    """A list of input guardrails to run on the initial run input."""
-
-    output_guardrails: list[OutputGuardrail[Any]] | None = None
-    """A list of output guardrails to run on the final output of the run."""
-
-    tracing_disabled: bool = False
-    """Whether tracing is disabled for the agent run. If disabled, we will not trace the agent run.
-    """
-
-    trace_include_sensitive_data: bool = True
-    """Whether we include potentially sensitive data (for example: inputs/outputs of tool calls or
-    LLM generations) in traces. If False, we'll still create spans for these events, but the
-    sensitive data will not be included.
-    """
-
-    workflow_name: str = "Agent workflow"
-    """The name of the run, used for tracing. Should be a logical name for the run, like
-    "Code generation workflow" or "Customer support agent".
-    """
-
-    trace_id: str | None = None
-    """A custom trace ID to use for tracing. If not provided, we will generate a new trace ID."""
-
-    group_id: str | None = None
-    """
-    A grouping identifier to use for tracing, to link multiple traces from the same conversation
-    or process. For example, you might use a chat thread ID.
-    """
-
-    trace_metadata: dict[str, Any] | None = None
-    """
-    An optional dictionary of additional metadata to include with the trace.
-    """
-
-
-class Runner:
-    @classmethod
-    async def run(
-        cls,
-        starting_agent: Agent[TContext],
-        input: str | list[TResponseInputItem],
-        *,
-        context: TContext | None = None,
-        max_turns: int = DEFAULT_MAX_TURNS,
-        hooks: RunHooks[TContext] | None = None,
-        run_config: RunConfig | None = None,
-    ) -> RunResult:
-        """Run a workflow starting at the given agent. The agent will run in a loop until a final
-        output is generated. The loop runs like so:
-        1. The agent is invoked with the given input.
-        2. If there is a final output (i.e. the agent produces something of type
-            `agent.output_type`, the loop terminates.
-        3. If there's a handoff, we run the loop again, with the new agent.
-        4. Else, we run tool calls (if any), and re-run the loop.
-
-        In two cases, the agent may raise an exception:
-        1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
-        2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered exception is raised.
-
-        Note that only the first agent's input guardrails are run.
-
-        Args:
-            starting_agent: The starting agent to run.
-            input: The initial input to the agent. You can pass a single string for a user message,
-                or a list of input items.
-            context: The context to run the agent with.
-            max_turns: The maximum number of turns to run the agent for. A turn is defined as one
-                AI invocation (including any tool calls that might occur).
-            hooks: An object that receives callbacks on various lifecycle events.
-            run_config: Global settings for the entire agent run.
-
-        Returns:
-            A run result containing all the inputs, guardrail results and the output of the last
-            agent. Agents may perform handoffs, so we don't know the specific type of the output.
-        """
-        if hooks is None:
-            hooks = RunHooks[Any]()
-        if run_config is None:
-            run_config = RunConfig()
-
-        with TraceCtxManager(
-            workflow_name=run_config.workflow_name,
-            trace_id=run_config.trace_id,
-            group_id=run_config.group_id,
-            metadata=run_config.trace_metadata,
-            disabled=run_config.tracing_disabled,
-        ):
-            current_turn = 0
-            original_input: str | list[TResponseInputItem] = copy.deepcopy(input)
-            generated_items: list[RunItem] = []
-            model_responses: list[ModelResponse] = []
-
-            context_wrapper: RunContextWrapper[TContext] = RunContextWrapper(
-                context=context,  # type: ignore
-            )
-
-            input_guardrail_results: list[InputGuardrailResult] = []
-
-            current_span: Span[AgentSpanData] | None = None
-            current_agent = starting_agent
-            should_run_agent_start_hooks = True
-
-            try:
-                while True:
-                    # Start an agent span if we don't have one. This span is ended if the current
-                    # agent changes, or if the agent loop ends.
-                    if current_span is None:
-                        handoff_names = [h.agent_name for h in cls._get_handoffs(current_agent)]
-                        tool_names = [t.name for t in current_agent.tools]
-                        if output_schema := cls._get_output_schema(current_agent):
-                            output_type_name = output_schema.output_type_name()
-                        else:
-                            output_type_name = "str"
-
-                        current_span = agent_span(
-                            name=current_agent.name,
-                            handoffs=handoff_names,
-                            tools=tool_names,
-                            output_type=output_type_name,
-                        )
-                        current_span.start(mark_as_current=True)
-
-                    current_turn += 1
-                    if current_turn > max_turns:
-                        _utils.attach_error_to_span(
-                            current_span,
-                            SpanError(
-                                message="Max turns exceeded",
-                                data={"max_turns": max_turns},
-                            ),
-                        )
-                        raise MaxTurnsExceeded(f"Max turns ({max_turns}) exceeded")
-
-                    logger.debug(
-                        f"Running agent {current_agent.name} (turn {current_turn})",
-                    )
-
-                    if current_turn == 1:
-                        input_guardrail_results, turn_result = await asyncio.gather(
-                            cls._run_input_guardrails(
-                                starting_agent,
-                                starting_agent.input_guardrails
-                                + (run_config.input_guardrails or []),
-                                copy.deepcopy(input),
-                                context_wrapper,
-                            ),
-                            cls._run_single_turn(
-                                agent=current_agent,
-                                original_input=original_input,
-                                generated_items=generated_items,
-                                hooks=hooks,
-                                context_wrapper=context_wrapper,
-                                run_config=run_config,
-                                should_run_agent_start_hooks=should_run_agent_start_hooks,
-                            ),
-                        )
-                    else:
-                        turn_result = await cls._run_single_turn(
-                            agent=current_agent,
-                            original_input=original_input,
-                            generated_items=generated_items,
-                            hooks=hooks,
-                            context_wrapper=context_wrapper,
-                            run_config=run_config,
-                            should_run_agent_start_hooks=should_run_agent_start_hooks,
-                        )
-                    should_run_agent_start_hooks = False
-
-                    model_responses.append(turn_result.model_response)
-                    original_input = turn_result.original_input
-                    generated_items = turn_result.generated_items
-
-                    if isinstance(turn_result.next_step, NextStepFinalOutput):
-                        output_guardrail_results = await cls._run_output_guardrails(
-                            current_agent.output_guardrails + (run_config.output_guardrails or []),
-                            current_agent,
-                            turn_result.next_step.output,
-                            context_wrapper,
-                        )
-                        return RunResult(
-                            input=original_input,
-                            new_items=generated_items,
-                            raw_responses=model_responses,
-                            final_output=turn_result.next_step.output,
-                            _last_agent=current_agent,
-                            input_guardrail_results=input_guardrail_results,
-                            output_guardrail_results=output_guardrail_results,
-                        )
-                    elif isinstance(turn_result.next_step, NextStepHandoff):
-                        current_agent = cast(Agent[TContext], turn_result.next_step.new_agent)
-                        current_span.finish(reset_current=True)
-                        current_span = None
-                        should_run_agent_start_hooks = True
-                    elif isinstance(turn_result.next_step, NextStepRunAgain):
-                        pass
-                    else:
-                        raise AgentsException(
-                            f"Unknown next step type: {type(turn_result.next_step)}"
-                        )
-            finally:
-                if current_span:
-                    current_span.finish(reset_current=True)
-
-    @classmethod
-    def run_sync(
-        cls,
-        starting_agent: Agent[TContext],
-        input: str | list[TResponseInputItem],
-        *,
-        context: TContext | None = None,
-        max_turns: int = DEFAULT_MAX_TURNS,
-        hooks: RunHooks[TContext] | None = None,
-        run_config: RunConfig | None = None,
-    ) -> RunResult:
-        """Run a workflow synchronously, starting at the given agent. Note that this just wraps the
-        `run` method, so it will not work if there's already an event loop (e.g. inside an async
-        function, or in a Jupyter notebook or async context like FastAPI). For those cases, use
-        the `run` method instead.
-
-        The agent will run in a loop until a final output is generated. The loop runs like so:
-        1. The agent is invoked with the given input.
-        2. If there is a final output (i.e. the agent produces something of type
-            `agent.output_type`, the loop terminates.
-        3. If there's a handoff, we run the loop again, with the new agent.
-        4. Else, we run tool calls (if any), and re-run the loop.
-
-        In two cases, the agent may raise an exception:
-        1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
-        2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered exception is raised.
-
-        Note that only the first agent's input guardrails are run.
-
-        Args:
-            starting_agent: The starting agent to run.
-            input: The initial input to the agent. You can pass a single string for a user message,
-                or a list of input items.
-            context: The context to run the agent with.
-            max_turns: The maximum number of turns to run the agent for. A turn is defined as one
-                AI invocation (including any tool calls that might occur).
-            hooks: An object that receives callbacks on various lifecycle events.
-            run_config: Global settings for the entire agent run.
-
-        Returns:
-            A run result containing all the inputs, guardrail results and the output of the last
-            agent. Agents may perform handoffs, so we don't know the specific type of the output.
-        """
-        return asyncio.get_event_loop().run_until_complete(
-            cls.run(
-                starting_agent,
-                input,
-                context=context,
-                max_turns=max_turns,
-                hooks=hooks,
-                run_config=run_config,
-            )
-        )
-
-    @classmethod
-    def run_streamed(
-        cls,
-        starting_agent: Agent[TContext],
-        input: str | list[TResponseInputItem],
-        context: TContext | None = None,
-        max_turns: int = DEFAULT_MAX_TURNS,
-        hooks: RunHooks[TContext] | None = None,
-        run_config: RunConfig | None = None,
-    ) -> RunResultStreaming:
-        """Run a workflow starting at the given agent in streaming mode. The returned result object
-        contains a method you can use to stream semantic events as they are generated.
-
-        The agent will run in a loop until a final output is generated. The loop runs like so:
-        1. The agent is invoked with the given input.
-        2. If there is a final output (i.e. the agent produces something of type
-            `agent.output_type`, the loop terminates.
-        3. If there's a handoff, we run the loop again, with the new agent.
-        4. Else, we run tool calls (if any), and re-run the loop.
-
-        In two cases, the agent may raise an exception:
-        1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
-        2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered exception is raised.
-
-        Note that only the first agent's input guardrails are run.
-
-        Args:
-            starting_agent: The starting agent to run.
-            input: The initial input to the agent. You can pass a single string for a user message,
-                or a list of input items.
-            context: The context to run the agent with.
-            max_turns: The maximum number of turns to run the agent for. A turn is defined as one
-                AI invocation (including any tool calls that might occur).
-            hooks: An object that receives callbacks on various lifecycle events.
-            run_config: Global settings for the entire agent run.
-
-        Returns:
-            A result object that contains data about the run, as well as a method to stream events.
-        """
-        if hooks is None:
-            hooks = RunHooks[Any]()
-        if run_config is None:
-            run_config = RunConfig()
-
-        # If there's already a trace, we don't create a new one. In addition, we can't end the
-        # trace here, because the actual work is done in `stream_events` and this method ends
-        # before that.
-        new_trace = (
-            None
-            if get_current_trace()
-            else trace(
-                workflow_name=run_config.workflow_name,
-                trace_id=run_config.trace_id,
-                group_id=run_config.group_id,
-                metadata=run_config.trace_metadata,
-                disabled=run_config.tracing_disabled,
-            )
-        )
-        # Need to start the trace here, because the current trace contextvar is captured at
-        # asyncio.create_task time
-        if new_trace:
-            new_trace.start(mark_as_current=True)
-
-        output_schema = cls._get_output_schema(starting_agent)
-        context_wrapper: RunContextWrapper[TContext] = RunContextWrapper(
-            context=context  # type: ignore
-        )
-
-        streamed_result = RunResultStreaming(
-            input=copy.deepcopy(input),
-            new_items=[],
-            current_agent=starting_agent,
-            raw_responses=[],
-            final_output=None,
-            is_complete=False,
-            current_turn=0,
-            max_turns=max_turns,
-            input_guardrail_results=[],
-            output_guardrail_results=[],
-            _current_agent_output_schema=output_schema,
-            _trace=new_trace,
-        )
-
-        # Kick off the actual agent loop in the background and return the streamed result object.
-        streamed_result._run_impl_task = asyncio.create_task(
-            cls._run_streamed_impl(
-                starting_input=input,
-                streamed_result=streamed_result,
-                starting_agent=starting_agent,
-                max_turns=max_turns,
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-                run_config=run_config,
-            )
-        )
-        return streamed_result
-
-    @classmethod
-    async def _run_input_guardrails_with_queue(
-        cls,
-        agent: Agent[Any],
-        guardrails: list[InputGuardrail[TContext]],
-        input: str | list[TResponseInputItem],
-        context: RunContextWrapper[TContext],
-        streamed_result: RunResultStreaming,
-        parent_span: Span[Any],
-    ):
-        queue = streamed_result._input_guardrail_queue
-
-        # We'll run the guardrails and push them onto the queue as they complete
-        guardrail_tasks = [
-            asyncio.create_task(
-                RunImpl.run_single_input_guardrail(agent, guardrail, input, context)
-            )
-            for guardrail in guardrails
-        ]
-        guardrail_results = []
-        try:
-            for done in asyncio.as_completed(guardrail_tasks):
-                result = await done
-                if result.output.tripwire_triggered:
-                    _utils.attach_error_to_span(
-                        parent_span,
-                        SpanError(
-                            message="Guardrail tripwire triggered",
-                            data={
-                                "guardrail": result.guardrail.get_name(),
-                                "type": "input_guardrail",
-                            },
-                        ),
-                    )
-                queue.put_nowait(result)
-                guardrail_results.append(result)
-        except Exception:
-            for t in guardrail_tasks:
-                t.cancel()
-            raise
-
-        streamed_result.input_guardrail_results = guardrail_results
-
-    @classmethod
-    async def _run_streamed_impl(
-        cls,
-        starting_input: str | list[TResponseInputItem],
-        streamed_result: RunResultStreaming,
-        starting_agent: Agent[TContext],
-        max_turns: int,
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        run_config: RunConfig,
-    ):
-        current_span: Span[AgentSpanData] | None = None
-        current_agent = starting_agent
-        current_turn = 0
-        should_run_agent_start_hooks = True
-
-        streamed_result._event_queue.put_nowait(AgentUpdatedStreamEvent(new_agent=current_agent))
-
-        try:
-            while True:
-                if streamed_result.is_complete:
-                    break
-
-                # Start an agent span if we don't have one. This span is ended if the current
-                # agent changes, or if the agent loop ends.
-                if current_span is None:
-                    handoff_names = [h.agent_name for h in cls._get_handoffs(current_agent)]
-                    tool_names = [t.name for t in current_agent.tools]
-                    if output_schema := cls._get_output_schema(current_agent):
-                        output_type_name = output_schema.output_type_name()
-                    else:
-                        output_type_name = "str"
-
-                    current_span = agent_span(
-                        name=current_agent.name,
-                        handoffs=handoff_names,
-                        tools=tool_names,
-                        output_type=output_type_name,
-                    )
-                    current_span.start(mark_as_current=True)
-
-                current_turn += 1
-                streamed_result.current_turn = current_turn
-
-                if current_turn > max_turns:
-                    _utils.attach_error_to_span(
-                        current_span,
-                        SpanError(
-                            message="Max turns exceeded",
-                            data={"max_turns": max_turns},
-                        ),
-                    )
-                    streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
-                    break
-
-                if current_turn == 1:
-                    # Run the input guardrails in the background and put the results on the queue
-                    streamed_result._input_guardrails_task = asyncio.create_task(
-                        cls._run_input_guardrails_with_queue(
-                            starting_agent,
-                            starting_agent.input_guardrails + (run_config.input_guardrails or []),
-                            copy.deepcopy(ItemHelpers.input_to_new_input_list(starting_input)),
-                            context_wrapper,
-                            streamed_result,
-                            current_span,
-                        )
-                    )
-                try:
-                    turn_result = await cls._run_single_turn_streamed(
-                        streamed_result,
-                        current_agent,
-                        hooks,
-                        context_wrapper,
-                        run_config,
-                        should_run_agent_start_hooks,
-                    )
-                    should_run_agent_start_hooks = False
-
-                    streamed_result.raw_responses = streamed_result.raw_responses + [
-                        turn_result.model_response
-                    ]
-                    streamed_result.input = turn_result.original_input
-                    streamed_result.new_items = turn_result.generated_items
-
-                    if isinstance(turn_result.next_step, NextStepHandoff):
-                        current_agent = turn_result.next_step.new_agent
-                        current_span.finish(reset_current=True)
-                        current_span = None
-                        should_run_agent_start_hooks = True
-                        streamed_result._event_queue.put_nowait(
-                            AgentUpdatedStreamEvent(new_agent=current_agent)
-                        )
-                    elif isinstance(turn_result.next_step, NextStepFinalOutput):
-                        streamed_result._output_guardrails_task = asyncio.create_task(
-                            cls._run_output_guardrails(
-                                current_agent.output_guardrails
-                                + (run_config.output_guardrails or []),
-                                current_agent,
-                                turn_result.next_step.output,
-                                context_wrapper,
-                            )
-                        )
-
-                        try:
-                            output_guardrail_results = await streamed_result._output_guardrails_task
-                        except Exception:
-                            # Exceptions will be checked in the stream_events loop
-                            output_guardrail_results = []
-
-                        streamed_result.output_guardrail_results = output_guardrail_results
-                        streamed_result.final_output = turn_result.next_step.output
-                        streamed_result.is_complete = True
-                        streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
-                    elif isinstance(turn_result.next_step, NextStepRunAgain):
-                        pass
-                except Exception as e:
-                    if current_span:
-                        _utils.attach_error_to_span(
-                            current_span,
-                            SpanError(
-                                message="Error in agent run",
-                                data={"error": str(e)},
-                            ),
-                        )
-                    streamed_result.is_complete = True
-                    streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
-                    raise
-
-            streamed_result.is_complete = True
-        finally:
-            if current_span:
-                current_span.finish(reset_current=True)
-
-    @classmethod
-    async def _run_single_turn_streamed(
-        cls,
-        streamed_result: RunResultStreaming,
-        agent: Agent[TContext],
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        run_config: RunConfig,
-        should_run_agent_start_hooks: bool,
-    ) -> SingleStepResult:
-        if should_run_agent_start_hooks:
-            await asyncio.gather(
-                hooks.on_agent_start(context_wrapper, agent),
-                (
-                    agent.hooks.on_start(context_wrapper, agent)
-                    if agent.hooks
-                    else _utils.noop_coroutine()
-                ),
-            )
-
-        output_schema = cls._get_output_schema(agent)
-
-        streamed_result.current_agent = agent
-        streamed_result._current_agent_output_schema = output_schema
-
-        system_prompt = await agent.get_system_prompt(context_wrapper)
-
-        handoffs = cls._get_handoffs(agent)
-
-        model = cls._get_model(agent, run_config)
-        model_settings = agent.model_settings.resolve(run_config.model_settings)
-        final_response: ModelResponse | None = None
-
-        input = ItemHelpers.input_to_new_input_list(streamed_result.input)
-        input.extend([item.to_input_item() for item in streamed_result.new_items])
-
-        # 1. Stream the output events
-        async for event in model.stream_response(
-            system_prompt,
-            input,
-            model_settings,
-            agent.tools,
-            output_schema,
-            handoffs,
-            get_model_tracing_impl(
-                run_config.tracing_disabled, run_config.trace_include_sensitive_data
-            ),
-        ):
-            if isinstance(event, ResponseCompletedEvent):
-                usage = (
-                    Usage(
-                        requests=1,
-                        input_tokens=event.response.usage.input_tokens,
-                        output_tokens=event.response.usage.output_tokens,
-                        total_tokens=event.response.usage.total_tokens,
-                    )
-                    if event.response.usage
-                    else Usage()
-                )
-                final_response = ModelResponse(
-                    output=event.response.output,
-                    usage=usage,
-                    referenceable_id=event.response.id,
-                )
-
-            streamed_result._event_queue.put_nowait(RawResponsesStreamEvent(data=event))
-
-        # 2. At this point, the streaming is complete for this turn of the agent loop.
-        if not final_response:
-            raise ModelBehaviorError("Model did not produce a final response!")
-
-        # 3. Now, we can process the turn as we do in the non-streaming case
-        single_step_result = await cls._get_single_step_result_from_response(
-            agent=agent,
-            original_input=streamed_result.input,
-            pre_step_items=streamed_result.new_items,
-            new_response=final_response,
-            output_schema=output_schema,
-            handoffs=handoffs,
-            hooks=hooks,
-            context_wrapper=context_wrapper,
-            run_config=run_config,
-        )
-
-        RunImpl.stream_step_result_to_queue(single_step_result, streamed_result._event_queue)
-        return single_step_result
-
-    @classmethod
-    async def _run_single_turn(
-        cls,
-        *,
-        agent: Agent[TContext],
-        original_input: str | list[TResponseInputItem],
-        generated_items: list[RunItem],
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        run_config: RunConfig,
-        should_run_agent_start_hooks: bool,
-    ) -> SingleStepResult:
-        # Ensure we run the hooks before anything else
-        if should_run_agent_start_hooks:
-            await asyncio.gather(
-                hooks.on_agent_start(context_wrapper, agent),
-                (
-                    agent.hooks.on_start(context_wrapper, agent)
-                    if agent.hooks
-                    else _utils.noop_coroutine()
-                ),
-            )
-
-        system_prompt = await agent.get_system_prompt(context_wrapper)
-
-        output_schema = cls._get_output_schema(agent)
-        handoffs = cls._get_handoffs(agent)
-        input = ItemHelpers.input_to_new_input_list(original_input)
-        input.extend([generated_item.to_input_item() for generated_item in generated_items])
-
-        new_response = await cls._get_new_response(
-            agent,
-            system_prompt,
-            input,
-            output_schema,
-            handoffs,
-            context_wrapper,
-            run_config,
-        )
-
-        return await cls._get_single_step_result_from_response(
-            agent=agent,
-            original_input=original_input,
-            pre_step_items=generated_items,
-            new_response=new_response,
-            output_schema=output_schema,
-            handoffs=handoffs,
-            hooks=hooks,
-            context_wrapper=context_wrapper,
-            run_config=run_config,
-        )
-
-    @classmethod
-    async def _get_single_step_result_from_response(
-        cls,
-        *,
-        agent: Agent[TContext],
-        original_input: str | list[TResponseInputItem],
-        pre_step_items: list[RunItem],
-        new_response: ModelResponse,
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        hooks: RunHooks[TContext],
-        context_wrapper: RunContextWrapper[TContext],
-        run_config: RunConfig,
-    ) -> SingleStepResult:
-        processed_response = RunImpl.process_model_response(
-            agent=agent,
-            response=new_response,
-            output_schema=output_schema,
-            handoffs=handoffs,
-        )
-        return await RunImpl.execute_tools_and_side_effects(
-            agent=agent,
-            original_input=original_input,
-            pre_step_items=pre_step_items,
-            new_response=new_response,
-            processed_response=processed_response,
-            output_schema=output_schema,
-            hooks=hooks,
-            context_wrapper=context_wrapper,
-            run_config=run_config,
-        )
-
-    @classmethod
-    async def _run_input_guardrails(
-        cls,
-        agent: Agent[Any],
-        guardrails: list[InputGuardrail[TContext]],
-        input: str | list[TResponseInputItem],
-        context: RunContextWrapper[TContext],
-    ) -> list[InputGuardrailResult]:
-        if not guardrails:
-            return []
-
-        guardrail_tasks = [
-            asyncio.create_task(
-                RunImpl.run_single_input_guardrail(agent, guardrail, input, context)
-            )
-            for guardrail in guardrails
-        ]
-
-        guardrail_results = []
-
-        for done in asyncio.as_completed(guardrail_tasks):
-            result = await done
-            if result.output.tripwire_triggered:
-                # Cancel all guardrail tasks if a tripwire is triggered.
-                for t in guardrail_tasks:
-                    t.cancel()
-                _utils.attach_error_to_current_span(
-                    SpanError(
-                        message="Guardrail tripwire triggered",
-                        data={"guardrail": result.guardrail.get_name()},
-                    )
-                )
-                raise InputGuardrailTripwireTriggered(result)
-            else:
-                guardrail_results.append(result)
-
-        return guardrail_results
-
-    @classmethod
-    async def _run_output_guardrails(
-        cls,
-        guardrails: list[OutputGuardrail[TContext]],
-        agent: Agent[TContext],
-        agent_output: Any,
-        context: RunContextWrapper[TContext],
-    ) -> list[OutputGuardrailResult]:
-        if not guardrails:
-            return []
-
-        guardrail_tasks = [
-            asyncio.create_task(
-                RunImpl.run_single_output_guardrail(guardrail, agent, agent_output, context)
-            )
-            for guardrail in guardrails
-        ]
-
-        guardrail_results = []
-
-        for done in asyncio.as_completed(guardrail_tasks):
-            result = await done
-            if result.output.tripwire_triggered:
-                # Cancel all guardrail tasks if a tripwire is triggered.
-                for t in guardrail_tasks:
-                    t.cancel()
-                _utils.attach_error_to_current_span(
-                    SpanError(
-                        message="Guardrail tripwire triggered",
-                        data={"guardrail": result.guardrail.get_name()},
-                    )
-                )
-                raise OutputGuardrailTripwireTriggered(result)
-            else:
-                guardrail_results.append(result)
-
-        return guardrail_results
-
-    @classmethod
-    async def _get_new_response(
-        cls,
-        agent: Agent[TContext],
-        system_prompt: str | None,
-        input: list[TResponseInputItem],
-        output_schema: AgentOutputSchema | None,
-        handoffs: list[Handoff],
-        context_wrapper: RunContextWrapper[TContext],
-        run_config: RunConfig,
-    ) -> ModelResponse:
-        model = cls._get_model(agent, run_config)
-        model_settings = agent.model_settings.resolve(run_config.model_settings)
-        new_response = await model.get_response(
-            system_instructions=system_prompt,
-            input=input,
-            model_settings=model_settings,
-            tools=agent.tools,
-            output_schema=output_schema,
-            handoffs=handoffs,
-            tracing=get_model_tracing_impl(
-                run_config.tracing_disabled, run_config.trace_include_sensitive_data
-            ),
-        )
-
-        context_wrapper.usage.add(new_response.usage)
-
-        return new_response
-
-    @classmethod
-    def _get_output_schema(cls, agent: Agent[Any]) -> AgentOutputSchema | None:
-        if agent.output_type is None or agent.output_type is str:
-            return None
-
-        return AgentOutputSchema(agent.output_type)
-
-    @classmethod
-    def _get_handoffs(cls, agent: Agent[Any]) -> list[Handoff]:
-        handoffs = []
-        for handoff_item in agent.handoffs:
-            if isinstance(handoff_item, Handoff):
-                handoffs.append(handoff_item)
-            elif isinstance(handoff_item, Agent):
-                handoffs.append(handoff(handoff_item))
-        return handoffs
-
-    @classmethod
-    def _get_model(cls, agent: Agent[Any], run_config: RunConfig) -> Model:
-        if isinstance(run_config.model, Model):
-            return run_config.model
-        elif isinstance(run_config.model, str):
-            return run_config.model_provider.get_model(run_config.model)
-        elif isinstance(agent.model, Model):
-            return agent.model
-
-        return run_config.model_provider.get_model(agent.model)
diff --git a/tests/src/agents/run_context.py b/tests/src/agents/run_context.py
deleted file mode 100644
index 579a215f..00000000
--- a/tests/src/agents/run_context.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Any, Generic
-
-from typing_extensions import TypeVar
-
-from .usage import Usage
-
-TContext = TypeVar("TContext", default=Any)
-
-
-@dataclass
-class RunContextWrapper(Generic[TContext]):
-    """This wraps the context object that you passed to `Runner.run()`. It also contains
-    information about the usage of the agent run so far.
-
-    NOTE: Contexts are not passed to the LLM. They're a way to pass dependencies and data to code
-    you implement, like tool functions, callbacks, hooks, etc.
-    """
-
-    context: TContext
-    """The context object (or None), passed by you to `Runner.run()`"""
-
-    usage: Usage = field(default_factory=Usage)
-    """The usage of the agent run so far. For streamed responses, the usage will be stale until the
-    last chunk of the stream is processed.
-    """
diff --git a/tests/src/agents/stream_events.py b/tests/src/agents/stream_events.py
deleted file mode 100644
index bd37d11f..00000000
--- a/tests/src/agents/stream_events.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Any, Literal, Union
-
-from typing_extensions import TypeAlias
-
-from .agent import Agent
-from .items import RunItem, TResponseStreamEvent
-
-
-@dataclass
-class RawResponsesStreamEvent:
-    """Streaming event from the LLM. These are 'raw' events, i.e. they are directly passed through
-    from the LLM.
-    """
-
-    data: TResponseStreamEvent
-    """The raw responses streaming event from the LLM."""
-
-    type: Literal["raw_response_event"] = "raw_response_event"
-    """The type of the event."""
-
-
-@dataclass
-class RunItemStreamEvent:
-    """Streaming events that wrap a `RunItem`. As the agent processes the LLM response, it will
-    generate these events for new messages, tool calls, tool outputs, handoffs, etc.
-    """
-
-    name: Literal[
-        "message_output_created",
-        "handoff_requested",
-        "handoff_occured",
-        "tool_called",
-        "tool_output",
-        "reasoning_item_created",
-    ]
-    """The name of the event."""
-
-    item: RunItem
-    """The item that was created."""
-
-    type: Literal["run_item_stream_event"] = "run_item_stream_event"
-
-
-@dataclass
-class AgentUpdatedStreamEvent:
-    """Event that notifies that there is a new agent running."""
-
-    new_agent: Agent[Any]
-    """The new agent."""
-
-    type: Literal["agent_updated_stream_event"] = "agent_updated_stream_event"
-
-
-StreamEvent: TypeAlias = Union[RawResponsesStreamEvent, RunItemStreamEvent, AgentUpdatedStreamEvent]
-"""A streaming event from an agent."""
diff --git a/tests/src/agents/strict_schema.py b/tests/src/agents/strict_schema.py
deleted file mode 100644
index 910ad85f..00000000
--- a/tests/src/agents/strict_schema.py
+++ /dev/null
@@ -1,167 +0,0 @@
-from __future__ import annotations
-
-from typing import Any
-
-from openai import NOT_GIVEN
-from typing_extensions import TypeGuard
-
-from .exceptions import UserError
-
-_EMPTY_SCHEMA = {
-    "additionalProperties": False,
-    "type": "object",
-    "properties": {},
-    "required": [],
-}
-
-
-def ensure_strict_json_schema(
-    schema: dict[str, Any],
-) -> dict[str, Any]:
-    """Mutates the given JSON schema to ensure it conforms to the `strict` standard
-    that the OpenAI API expects.
-    """
-    if schema == {}:
-        return _EMPTY_SCHEMA
-    return _ensure_strict_json_schema(schema, path=(), root=schema)
-
-
-# Adapted from https://github.com/openai/openai-python/blob/main/src/openai/lib/_pydantic.py
-def _ensure_strict_json_schema(
-    json_schema: object,
-    *,
-    path: tuple[str, ...],
-    root: dict[str, object],
-) -> dict[str, Any]:
-    if not is_dict(json_schema):
-        raise TypeError(f"Expected {json_schema} to be a dictionary; path={path}")
-
-    defs = json_schema.get("$defs")
-    if is_dict(defs):
-        for def_name, def_schema in defs.items():
-            _ensure_strict_json_schema(def_schema, path=(*path, "$defs", def_name), root=root)
-
-    definitions = json_schema.get("definitions")
-    if is_dict(definitions):
-        for definition_name, definition_schema in definitions.items():
-            _ensure_strict_json_schema(
-                definition_schema, path=(*path, "definitions", definition_name), root=root
-            )
-
-    typ = json_schema.get("type")
-    if typ == "object" and "additionalProperties" not in json_schema:
-        json_schema["additionalProperties"] = False
-    elif (
-        typ == "object"
-        and "additionalProperties" in json_schema
-        and json_schema["additionalProperties"] is True
-    ):
-        raise UserError(
-            "additionalProperties should not be set for object types. This could be because "
-            "you're using an older version of Pydantic, or because you configured additional "
-            "properties to be allowed. If you really need this, update the function or output tool "
-            "to not use a strict schema."
-        )
-
-    # object types
-    # { 'type': 'object', 'properties': { 'a':  {...} } }
-    properties = json_schema.get("properties")
-    if is_dict(properties):
-        json_schema["required"] = list(properties.keys())
-        json_schema["properties"] = {
-            key: _ensure_strict_json_schema(prop_schema, path=(*path, "properties", key), root=root)
-            for key, prop_schema in properties.items()
-        }
-
-    # arrays
-    # { 'type': 'array', 'items': {...} }
-    items = json_schema.get("items")
-    if is_dict(items):
-        json_schema["items"] = _ensure_strict_json_schema(items, path=(*path, "items"), root=root)
-
-    # unions
-    any_of = json_schema.get("anyOf")
-    if is_list(any_of):
-        json_schema["anyOf"] = [
-            _ensure_strict_json_schema(variant, path=(*path, "anyOf", str(i)), root=root)
-            for i, variant in enumerate(any_of)
-        ]
-
-    # intersections
-    all_of = json_schema.get("allOf")
-    if is_list(all_of):
-        if len(all_of) == 1:
-            json_schema.update(
-                _ensure_strict_json_schema(all_of[0], path=(*path, "allOf", "0"), root=root)
-            )
-            json_schema.pop("allOf")
-        else:
-            json_schema["allOf"] = [
-                _ensure_strict_json_schema(entry, path=(*path, "allOf", str(i)), root=root)
-                for i, entry in enumerate(all_of)
-            ]
-
-    # strip `None` defaults as there's no meaningful distinction here
-    # the schema will still be `nullable` and the model will default
-    # to using `None` anyway
-    if json_schema.get("default", NOT_GIVEN) is None:
-        json_schema.pop("default")
-
-    # we can't use `$ref`s if there are also other properties defined, e.g.
-    # `{"$ref": "...", "description": "my description"}`
-    #
-    # so we unravel the ref
-    # `{"type": "string", "description": "my description"}`
-    ref = json_schema.get("$ref")
-    if ref and has_more_than_n_keys(json_schema, 1):
-        assert isinstance(ref, str), f"Received non-string $ref - {ref}"
-
-        resolved = resolve_ref(root=root, ref=ref)
-        if not is_dict(resolved):
-            raise ValueError(
-                f"Expected `$ref: {ref}` to resolved to a dictionary but got {resolved}"
-            )
-
-        # properties from the json schema take priority over the ones on the `$ref`
-        json_schema.update({**resolved, **json_schema})
-        json_schema.pop("$ref")
-        # Since the schema expanded from `$ref` might not have `additionalProperties: false` applied
-        # we call `_ensure_strict_json_schema` again to fix the inlined schema and ensure it's valid
-        return _ensure_strict_json_schema(json_schema, path=path, root=root)
-
-    return json_schema
-
-
-def resolve_ref(*, root: dict[str, object], ref: str) -> object:
-    if not ref.startswith("#/"):
-        raise ValueError(f"Unexpected $ref format {ref!r}; Does not start with #/")
-
-    path = ref[2:].split("/")
-    resolved = root
-    for key in path:
-        value = resolved[key]
-        assert is_dict(value), (
-            f"encountered non-dictionary entry while resolving {ref} - {resolved}"
-        )
-        resolved = value
-
-    return resolved
-
-
-def is_dict(obj: object) -> TypeGuard[dict[str, object]]:
-    # just pretend that we know there are only `str` keys
-    # as that check is not worth the performance cost
-    return isinstance(obj, dict)
-
-
-def is_list(obj: object) -> TypeGuard[list[object]]:
-    return isinstance(obj, list)
-
-
-def has_more_than_n_keys(obj: dict[str, object], n: int) -> bool:
-    i = 0
-    for _ in obj.keys():
-        i += 1
-        if i > n:
-            return True
-    return False
diff --git a/tests/src/agents/tool.py b/tests/src/agents/tool.py
deleted file mode 100644
index 75872680..00000000
--- a/tests/src/agents/tool.py
+++ /dev/null
@@ -1,286 +0,0 @@
-from __future__ import annotations
-
-import inspect
-import json
-from collections.abc import Awaitable
-from dataclasses import dataclass
-from typing import Any, Callable, Literal, Union, overload
-
-from openai.types.responses.file_search_tool_param import Filters, RankingOptions
-from openai.types.responses.web_search_tool_param import UserLocation
-from pydantic import ValidationError
-from typing_extensions import Concatenate, ParamSpec
-
-from . import _debug, _utils
-from ._utils import MaybeAwaitable
-from .computer import AsyncComputer, Computer
-from .exceptions import ModelBehaviorError
-from .function_schema import DocstringStyle, function_schema
-from .logger import logger
-from .run_context import RunContextWrapper
-from .tracing import SpanError
-
-ToolParams = ParamSpec("ToolParams")
-
-ToolFunctionWithoutContext = Callable[ToolParams, Any]
-ToolFunctionWithContext = Callable[Concatenate[RunContextWrapper[Any], ToolParams], Any]
-
-ToolFunction = Union[ToolFunctionWithoutContext[ToolParams], ToolFunctionWithContext[ToolParams]]
-
-
-@dataclass
-class FunctionTool:
-    """A tool that wraps a function. In most cases, you should use  the `function_tool` helpers to
-    create a FunctionTool, as they let you easily wrap a Python function.
-    """
-
-    name: str
-    """The name of the tool, as shown to the LLM. Generally the name of the function."""
-
-    description: str
-    """A description of the tool, as shown to the LLM."""
-
-    params_json_schema: dict[str, Any]
-    """The JSON schema for the tool's parameters."""
-
-    on_invoke_tool: Callable[[RunContextWrapper[Any], str], Awaitable[str]]
-    """A function that invokes the tool with the given context and parameters. The params passed
-    are:
-    1. The tool run context.
-    2. The arguments from the LLM, as a JSON string.
-
-    You must return a string representation of the tool output. In case of errors, you can either
-    raise an Exception (which will cause the run to fail) or return a string error message (which
-    will be sent back to the LLM).
-    """
-
-    strict_json_schema: bool = True
-    """Whether the JSON schema is in strict mode. We **strongly** recommend setting this to True,
-    as it increases the likelihood of correct JSON input."""
-
-
-@dataclass
-class FileSearchTool:
-    """A hosted tool that lets the LLM search through a vector store. Currently only supported with
-    OpenAI models, using the Responses API.
-    """
-
-    vector_store_ids: list[str]
-    """The IDs of the vector stores to search."""
-
-    max_num_results: int | None = None
-    """The maximum number of results to return."""
-
-    include_search_results: bool = False
-    """Whether to include the search results in the output produced by the LLM."""
-
-    ranking_options: RankingOptions | None = None
-    """Ranking options for search."""
-
-    filters: Filters | None = None
-    """A filter to apply based on file attributes."""
-
-    @property
-    def name(self):
-        return "file_search"
-
-
-@dataclass
-class WebSearchTool:
-    """A hosted tool that lets the LLM search the web. Currently only supported with OpenAI models,
-    using the Responses API.
-    """
-
-    user_location: UserLocation | None = None
-    """Optional location for the search. Lets you customize results to be relevant to a location."""
-
-    search_context_size: Literal["low", "medium", "high"] = "medium"
-    """The amount of context to use for the search."""
-
-    @property
-    def name(self):
-        return "web_search_preview"
-
-
-@dataclass
-class ComputerTool:
-    """A hosted tool that lets the LLM control a computer."""
-
-    computer: Computer | AsyncComputer
-    """The computer implementation, which describes the environment and dimensions of the computer,
-    as well as implements the computer actions like click, screenshot, etc.
-    """
-
-    @property
-    def name(self):
-        return "computer_use_preview"
-
-
-Tool = Union[FunctionTool, FileSearchTool, WebSearchTool, ComputerTool]
-"""A tool that can be used in an agent."""
-
-
-def default_tool_error_function(ctx: RunContextWrapper[Any], error: Exception) -> str:
-    """The default tool error function, which just returns a generic error message."""
-    return f"An error occurred while running the tool. Please try again. Error: {str(error)}"
-
-
-ToolErrorFunction = Callable[[RunContextWrapper[Any], Exception], MaybeAwaitable[str]]
-
-
-@overload
-def function_tool(
-    func: ToolFunction[...],
-    *,
-    name_override: str | None = None,
-    description_override: str | None = None,
-    docstring_style: DocstringStyle | None = None,
-    use_docstring_info: bool = True,
-    failure_error_function: ToolErrorFunction | None = None,
-) -> FunctionTool:
-    """Overload for usage as @function_tool (no parentheses)."""
-    ...
-
-
-@overload
-def function_tool(
-    *,
-    name_override: str | None = None,
-    description_override: str | None = None,
-    docstring_style: DocstringStyle | None = None,
-    use_docstring_info: bool = True,
-    failure_error_function: ToolErrorFunction | None = None,
-) -> Callable[[ToolFunction[...]], FunctionTool]:
-    """Overload for usage as @function_tool(...)."""
-    ...
-
-
-def function_tool(
-    func: ToolFunction[...] | None = None,
-    *,
-    name_override: str | None = None,
-    description_override: str | None = None,
-    docstring_style: DocstringStyle | None = None,
-    use_docstring_info: bool = True,
-    failure_error_function: ToolErrorFunction | None = default_tool_error_function,
-) -> FunctionTool | Callable[[ToolFunction[...]], FunctionTool]:
-    """
-    Decorator to create a FunctionTool from a function. By default, we will:
-    1. Parse the function signature to create a JSON schema for the tool's parameters.
-    2. Use the function's docstring to populate the tool's description.
-    3. Use the function's docstring to populate argument descriptions.
-    The docstring style is detected automatically, but you can override it.
-
-    If the function takes a `RunContextWrapper` as the first argument, it *must* match the
-    context type of the agent that uses the tool.
-
-    Args:
-        func: The function to wrap.
-        name_override: If provided, use this name for the tool instead of the function's name.
-        description_override: If provided, use this description for the tool instead of the
-            function's docstring.
-        docstring_style: If provided, use this style for the tool's docstring. If not provided,
-            we will attempt to auto-detect the style.
-        use_docstring_info: If True, use the function's docstring to populate the tool's
-            description and argument descriptions.
-        failure_error_function: If provided, use this function to generate an error message when
-            the tool call fails. The error message is sent to the LLM. If you pass None, then no
-            error message will be sent and instead an Exception will be raised.
-    """
-
-    def _create_function_tool(the_func: ToolFunction[...]) -> FunctionTool:
-        schema = function_schema(
-            func=the_func,
-            name_override=name_override,
-            description_override=description_override,
-            docstring_style=docstring_style,
-            use_docstring_info=use_docstring_info,
-        )
-
-        async def _on_invoke_tool_impl(ctx: RunContextWrapper[Any], input: str) -> str:
-            try:
-                json_data: dict[str, Any] = json.loads(input) if input else {}
-            except Exception as e:
-                if _debug.DONT_LOG_TOOL_DATA:
-                    logger.debug(f"Invalid JSON input for tool {schema.name}")
-                else:
-                    logger.debug(f"Invalid JSON input for tool {schema.name}: {input}")
-                raise ModelBehaviorError(
-                    f"Invalid JSON input for tool {schema.name}: {input}"
-                ) from e
-
-            if _debug.DONT_LOG_TOOL_DATA:
-                logger.debug(f"Invoking tool {schema.name}")
-            else:
-                logger.debug(f"Invoking tool {schema.name} with input {input}")
-
-            try:
-                parsed = (
-                    schema.params_pydantic_model(**json_data)
-                    if json_data
-                    else schema.params_pydantic_model()
-                )
-            except ValidationError as e:
-                raise ModelBehaviorError(f"Invalid JSON input for tool {schema.name}: {e}") from e
-
-            args, kwargs_dict = schema.to_call_args(parsed)
-
-            if not _debug.DONT_LOG_TOOL_DATA:
-                logger.debug(f"Tool call args: {args}, kwargs: {kwargs_dict}")
-
-            if inspect.iscoroutinefunction(the_func):
-                if schema.takes_context:
-                    result = await the_func(ctx, *args, **kwargs_dict)
-                else:
-                    result = await the_func(*args, **kwargs_dict)
-            else:
-                if schema.takes_context:
-                    result = the_func(ctx, *args, **kwargs_dict)
-                else:
-                    result = the_func(*args, **kwargs_dict)
-
-            if _debug.DONT_LOG_TOOL_DATA:
-                logger.debug(f"Tool {schema.name} completed.")
-            else:
-                logger.debug(f"Tool {schema.name} returned {result}")
-
-            return str(result)
-
-        async def _on_invoke_tool(ctx: RunContextWrapper[Any], input: str) -> str:
-            try:
-                return await _on_invoke_tool_impl(ctx, input)
-            except Exception as e:
-                if failure_error_function is None:
-                    raise
-
-                result = failure_error_function(ctx, e)
-                if inspect.isawaitable(result):
-                    return await result
-
-                _utils.attach_error_to_current_span(
-                    SpanError(
-                        message="Error running tool (non-fatal)",
-                        data={
-                            "tool_name": schema.name,
-                            "error": str(e),
-                        },
-                    )
-                )
-                return result
-
-        return FunctionTool(
-            name=schema.name,
-            description=schema.description or "",
-            params_json_schema=schema.params_json_schema,
-            on_invoke_tool=_on_invoke_tool,
-        )
-
-    # If func is actually a callable, we were used as @function_tool with no parentheses
-    if callable(func):
-        return _create_function_tool(func)
-
-    # Otherwise, we were used as @function_tool(...), so return a decorator
-    def decorator(real_func: ToolFunction[...]) -> FunctionTool:
-        return _create_function_tool(real_func)
-
-    return decorator
diff --git a/tests/src/agents/tracing/__init__.py b/tests/src/agents/tracing/__init__.py
deleted file mode 100644
index 8e802018..00000000
--- a/tests/src/agents/tracing/__init__.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import atexit
-
-from .create import (
-    agent_span,
-    custom_span,
-    function_span,
-    generation_span,
-    get_current_span,
-    get_current_trace,
-    guardrail_span,
-    handoff_span,
-    response_span,
-    trace,
-)
-from .processor_interface import TracingProcessor
-from .processors import default_exporter, default_processor
-from .setup import GLOBAL_TRACE_PROVIDER
-from .span_data import (
-    AgentSpanData,
-    CustomSpanData,
-    FunctionSpanData,
-    GenerationSpanData,
-    GuardrailSpanData,
-    HandoffSpanData,
-    ResponseSpanData,
-    SpanData,
-)
-from .spans import Span, SpanError
-from .traces import Trace
-from .util import gen_span_id, gen_trace_id
-
-__all__ = [
-    "add_trace_processor",
-    "agent_span",
-    "custom_span",
-    "function_span",
-    "generation_span",
-    "get_current_span",
-    "get_current_trace",
-    "guardrail_span",
-    "handoff_span",
-    "response_span",
-    "set_trace_processors",
-    "set_tracing_disabled",
-    "trace",
-    "Trace",
-    "SpanError",
-    "Span",
-    "SpanData",
-    "AgentSpanData",
-    "CustomSpanData",
-    "FunctionSpanData",
-    "GenerationSpanData",
-    "GuardrailSpanData",
-    "HandoffSpanData",
-    "ResponseSpanData",
-    "TracingProcessor",
-    "gen_trace_id",
-    "gen_span_id",
-]
-
-
-def add_trace_processor(span_processor: TracingProcessor) -> None:
-    """
-    Adds a new trace processor. This processor will receive all traces/spans.
-    """
-    GLOBAL_TRACE_PROVIDER.register_processor(span_processor)
-
-
-def set_trace_processors(processors: list[TracingProcessor]) -> None:
-    """
-    Set the list of trace processors. This will replace the current list of processors.
-    """
-    GLOBAL_TRACE_PROVIDER.set_processors(processors)
-
-
-def set_tracing_disabled(disabled: bool) -> None:
-    """
-    Set whether tracing is globally disabled.
-    """
-    GLOBAL_TRACE_PROVIDER.set_disabled(disabled)
-
-
-def set_tracing_export_api_key(api_key: str) -> None:
-    """
-    Set the OpenAI API key for the backend exporter.
-    """
-    default_exporter().set_api_key(api_key)
-
-
-# Add the default processor, which exports traces and spans to the backend in batches. You can
-# change the default behavior by either:
-# 1. calling add_trace_processor(), which adds additional processors, or
-# 2. calling set_trace_processors(), which replaces the default processor.
-add_trace_processor(default_processor())
-
-atexit.register(GLOBAL_TRACE_PROVIDER.shutdown)
diff --git a/tests/src/agents/tracing/create.py b/tests/src/agents/tracing/create.py
deleted file mode 100644
index 8d7fc493..00000000
--- a/tests/src/agents/tracing/create.py
+++ /dev/null
@@ -1,306 +0,0 @@
-from __future__ import annotations
-
-from collections.abc import Mapping, Sequence
-from typing import TYPE_CHECKING, Any
-
-from .logger import logger
-from .setup import GLOBAL_TRACE_PROVIDER
-from .span_data import (
-    AgentSpanData,
-    CustomSpanData,
-    FunctionSpanData,
-    GenerationSpanData,
-    GuardrailSpanData,
-    HandoffSpanData,
-    ResponseSpanData,
-)
-from .spans import Span
-from .traces import Trace
-
-if TYPE_CHECKING:
-    from openai.types.responses import Response
-
-
-def trace(
-    workflow_name: str,
-    trace_id: str | None = None,
-    group_id: str | None = None,
-    metadata: dict[str, Any] | None = None,
-    disabled: bool = False,
-) -> Trace:
-    """
-    Create a new trace. The trace will not be started automatically; you should either use
-    it as a context manager (`with trace(...):`) or call `trace.start()` + `trace.finish()`
-    manually.
-
-    In addition to the workflow name and optional grouping identifier, you can provide
-    an arbitrary metadata dictionary to attach additional user-defined information to
-    the trace.
-
-    Args:
-        workflow_name: The name of the logical app or workflow. For example, you might provide
-            "code_bot" for a coding agent, or "customer_support_agent" for a customer support agent.
-        trace_id: The ID of the trace. Optional. If not provided, we will generate an ID. We
-            recommend using `util.gen_trace_id()` to generate a trace ID, to guarantee that IDs are
-            correctly formatted.
-        group_id: Optional grouping identifier to link multiple traces from the same conversation
-            or process. For instance, you might use a chat thread ID.
-        metadata: Optional dictionary of additional metadata to attach to the trace.
-        disabled: If True, we will return a Trace but the Trace will not be recorded. This will
-            not be checked if there's an existing trace and `even_if_trace_running` is True.
-
-    Returns:
-        The newly created trace object.
-    """
-    current_trace = GLOBAL_TRACE_PROVIDER.get_current_trace()
-    if current_trace:
-        logger.warning(
-            "Trace already exists. Creating a new trace, but this is probably a mistake."
-        )
-
-    return GLOBAL_TRACE_PROVIDER.create_trace(
-        name=workflow_name,
-        trace_id=trace_id,
-        group_id=group_id,
-        metadata=metadata,
-        disabled=disabled,
-    )
-
-
-def get_current_trace() -> Trace | None:
-    """Returns the currently active trace, if present."""
-    return GLOBAL_TRACE_PROVIDER.get_current_trace()
-
-
-def get_current_span() -> Span[Any] | None:
-    """Returns the currently active span, if present."""
-    return GLOBAL_TRACE_PROVIDER.get_current_span()
-
-
-def agent_span(
-    name: str,
-    handoffs: list[str] | None = None,
-    tools: list[str] | None = None,
-    output_type: str | None = None,
-    span_id: str | None = None,
-    parent: Trace | Span[Any] | None = None,
-    disabled: bool = False,
-) -> Span[AgentSpanData]:
-    """Create a new agent span. The span will not be started automatically, you should either do
-    `with agent_span() ...` or call `span.start()` + `span.finish()` manually.
-
-    Args:
-        name: The name of the agent.
-        handoffs: Optional list of agent names to which this agent could hand off control.
-        tools: Optional list of tool names available to this agent.
-        output_type: Optional name of the output type produced by the agent.
-        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
-            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
-            correctly formatted.
-        parent: The parent span or trace. If not provided, we will automatically use the current
-            trace/span as the parent.
-        disabled: If True, we will return a Span but the Span will not be recorded.
-
-    Returns:
-        The newly created agent span.
-    """
-    return GLOBAL_TRACE_PROVIDER.create_span(
-        span_data=AgentSpanData(name=name, handoffs=handoffs, tools=tools, output_type=output_type),
-        span_id=span_id,
-        parent=parent,
-        disabled=disabled,
-    )
-
-
-def function_span(
-    name: str,
-    input: str | None = None,
-    output: str | None = None,
-    span_id: str | None = None,
-    parent: Trace | Span[Any] | None = None,
-    disabled: bool = False,
-) -> Span[FunctionSpanData]:
-    """Create a new function span. The span will not be started automatically, you should either do
-    `with function_span() ...` or call `span.start()` + `span.finish()` manually.
-
-    Args:
-        name: The name of the function.
-        input: The input to the function.
-        output: The output of the function.
-        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
-            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
-            correctly formatted.
-        parent: The parent span or trace. If not provided, we will automatically use the current
-            trace/span as the parent.
-        disabled: If True, we will return a Span but the Span will not be recorded.
-
-    Returns:
-        The newly created function span.
-    """
-    return GLOBAL_TRACE_PROVIDER.create_span(
-        span_data=FunctionSpanData(name=name, input=input, output=output),
-        span_id=span_id,
-        parent=parent,
-        disabled=disabled,
-    )
-
-
-def generation_span(
-    input: Sequence[Mapping[str, Any]] | None = None,
-    output: Sequence[Mapping[str, Any]] | None = None,
-    model: str | None = None,
-    model_config: Mapping[str, Any] | None = None,
-    usage: dict[str, Any] | None = None,
-    span_id: str | None = None,
-    parent: Trace | Span[Any] | None = None,
-    disabled: bool = False,
-) -> Span[GenerationSpanData]:
-    """Create a new generation span. The span will not be started automatically, you should either
-    do `with generation_span() ...` or call `span.start()` + `span.finish()` manually.
-
-    This span captures the details of a model generation, including the
-    input message sequence, any generated outputs, the model name and
-    configuration, and usage data. If you only need to capture a model
-    response identifier, use `response_span()` instead.
-
-    Args:
-        input: The sequence of input messages sent to the model.
-        output: The sequence of output messages received from the model.
-        model: The model identifier used for the generation.
-        model_config: The model configuration (hyperparameters) used.
-        usage: A dictionary of usage information (input tokens, output tokens, etc.).
-        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
-            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
-            correctly formatted.
-        parent: The parent span or trace. If not provided, we will automatically use the current
-            trace/span as the parent.
-        disabled: If True, we will return a Span but the Span will not be recorded.
-
-    Returns:
-        The newly created generation span.
-    """
-    return GLOBAL_TRACE_PROVIDER.create_span(
-        span_data=GenerationSpanData(
-            input=input, output=output, model=model, model_config=model_config, usage=usage
-        ),
-        span_id=span_id,
-        parent=parent,
-        disabled=disabled,
-    )
-
-
-def response_span(
-    response: Response | None = None,
-    span_id: str | None = None,
-    parent: Trace | Span[Any] | None = None,
-    disabled: bool = False,
-) -> Span[ResponseSpanData]:
-    """Create a new response span. The span will not be started automatically, you should either do
-    `with response_span() ...` or call `span.start()` + `span.finish()` manually.
-
-    Args:
-        response: The OpenAI Response object.
-        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
-            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
-            correctly formatted.
-        parent: The parent span or trace. If not provided, we will automatically use the current
-            trace/span as the parent.
-        disabled: If True, we will return a Span but the Span will not be recorded.
-    """
-    return GLOBAL_TRACE_PROVIDER.create_span(
-        span_data=ResponseSpanData(response=response),
-        span_id=span_id,
-        parent=parent,
-        disabled=disabled,
-    )
-
-
-def handoff_span(
-    from_agent: str | None = None,
-    to_agent: str | None = None,
-    span_id: str | None = None,
-    parent: Trace | Span[Any] | None = None,
-    disabled: bool = False,
-) -> Span[HandoffSpanData]:
-    """Create a new handoff span. The span will not be started automatically, you should either do
-    `with handoff_span() ...` or call `span.start()` + `span.finish()` manually.
-
-    Args:
-        from_agent: The name of the agent that is handing off.
-        to_agent: The name of the agent that is receiving the handoff.
-        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
-            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
-            correctly formatted.
-        parent: The parent span or trace. If not provided, we will automatically use the current
-            trace/span as the parent.
-        disabled: If True, we will return a Span but the Span will not be recorded.
-
-    Returns:
-        The newly created handoff span.
-    """
-    return GLOBAL_TRACE_PROVIDER.create_span(
-        span_data=HandoffSpanData(from_agent=from_agent, to_agent=to_agent),
-        span_id=span_id,
-        parent=parent,
-        disabled=disabled,
-    )
-
-
-def custom_span(
-    name: str,
-    data: dict[str, Any] | None = None,
-    span_id: str | None = None,
-    parent: Trace | Span[Any] | None = None,
-    disabled: bool = False,
-) -> Span[CustomSpanData]:
-    """Create a new custom span, to which you can add your own metadata. The span will not be
-    started automatically, you should either do `with custom_span() ...` or call
-    `span.start()` + `span.finish()` manually.
-
-    Args:
-        name: The name of the custom span.
-        data: Arbitrary structured data to associate with the span.
-        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
-            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
-            correctly formatted.
-        parent: The parent span or trace. If not provided, we will automatically use the current
-            trace/span as the parent.
-        disabled: If True, we will return a Span but the Span will not be recorded.
-
-    Returns:
-        The newly created custom span.
-    """
-    return GLOBAL_TRACE_PROVIDER.create_span(
-        span_data=CustomSpanData(name=name, data=data or {}),
-        span_id=span_id,
-        parent=parent,
-        disabled=disabled,
-    )
-
-
-def guardrail_span(
-    name: str,
-    triggered: bool = False,
-    span_id: str | None = None,
-    parent: Trace | Span[Any] | None = None,
-    disabled: bool = False,
-) -> Span[GuardrailSpanData]:
-    """Create a new guardrail span. The span will not be started automatically, you should either
-    do `with guardrail_span() ...` or call `span.start()` + `span.finish()` manually.
-
-    Args:
-        name: The name of the guardrail.
-        triggered: Whether the guardrail was triggered.
-        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
-            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
-            correctly formatted.
-        parent: The parent span or trace. If not provided, we will automatically use the current
-            trace/span as the parent.
-        disabled: If True, we will return a Span but the Span will not be recorded.
-    """
-    return GLOBAL_TRACE_PROVIDER.create_span(
-        span_data=GuardrailSpanData(name=name, triggered=triggered),
-        span_id=span_id,
-        parent=parent,
-        disabled=disabled,
-    )
diff --git a/tests/src/agents/tracing/logger.py b/tests/src/agents/tracing/logger.py
deleted file mode 100644
index 661d09b5..00000000
--- a/tests/src/agents/tracing/logger.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import logging
-
-logger = logging.getLogger("openai.agents.tracing")
diff --git a/tests/src/agents/tracing/processor_interface.py b/tests/src/agents/tracing/processor_interface.py
deleted file mode 100644
index 4dcd897c..00000000
--- a/tests/src/agents/tracing/processor_interface.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import abc
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from .spans import Span
-    from .traces import Trace
-
-
-class TracingProcessor(abc.ABC):
-    """Interface for processing spans."""
-
-    @abc.abstractmethod
-    def on_trace_start(self, trace: "Trace") -> None:
-        """Called when a trace is started.
-
-        Args:
-            trace: The trace that started.
-        """
-        pass
-
-    @abc.abstractmethod
-    def on_trace_end(self, trace: "Trace") -> None:
-        """Called when a trace is finished.
-
-        Args:
-            trace: The trace that started.
-        """
-        pass
-
-    @abc.abstractmethod
-    def on_span_start(self, span: "Span[Any]") -> None:
-        """Called when a span is started.
-
-        Args:
-            span: The span that started.
-        """
-        pass
-
-    @abc.abstractmethod
-    def on_span_end(self, span: "Span[Any]") -> None:
-        """Called when a span is finished. Should not block or raise exceptions.
-
-        Args:
-            span: The span that finished.
-        """
-        pass
-
-    @abc.abstractmethod
-    def shutdown(self) -> None:
-        """Called when the application stops."""
-        pass
-
-    @abc.abstractmethod
-    def force_flush(self) -> None:
-        """Forces an immediate flush of all queued spans/traces."""
-        pass
-
-
-class TracingExporter(abc.ABC):
-    """Exports traces and spans. For example, could log them or send them to a backend."""
-
-    @abc.abstractmethod
-    def export(self, items: list["Trace | Span[Any]"]) -> None:
-        """Exports a list of traces and spans.
-
-        Args:
-            items: The items to export.
-        """
-        pass
diff --git a/tests/src/agents/tracing/processors.py b/tests/src/agents/tracing/processors.py
deleted file mode 100644
index 282bc23c..00000000
--- a/tests/src/agents/tracing/processors.py
+++ /dev/null
@@ -1,261 +0,0 @@
-from __future__ import annotations
-
-import os
-import queue
-import random
-import threading
-import time
-from typing import Any
-
-import httpx
-
-from .logger import logger
-from .processor_interface import TracingExporter, TracingProcessor
-from .spans import Span
-from .traces import Trace
-
-
-class ConsoleSpanExporter(TracingExporter):
-    """Prints the traces and spans to the console."""
-
-    def export(self, items: list[Trace | Span[Any]]) -> None:
-        for item in items:
-            if isinstance(item, Trace):
-                print(f"[Exporter] Export trace_id={item.trace_id}, name={item.name}, ")
-            else:
-                print(f"[Exporter] Export span: {item.export()}")
-
-
-class BackendSpanExporter(TracingExporter):
-    def __init__(
-        self,
-        api_key: str | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        endpoint: str = "https://api.openai.com/v1/traces/ingest",
-        max_retries: int = 3,
-        base_delay: float = 1.0,
-        max_delay: float = 30.0,
-    ):
-        """
-        Args:
-            api_key: The API key for the "Authorization" header. Defaults to
-                `os.environ["OPENAI_TRACE_API_KEY"]` if not provided.
-            organization: The OpenAI organization to use. Defaults to
-                `os.environ["OPENAI_ORG_ID"]` if not provided.
-            project: The OpenAI project to use. Defaults to
-                `os.environ["OPENAI_PROJECT_ID"]` if not provided.
-            endpoint: The HTTP endpoint to which traces/spans are posted.
-            max_retries: Maximum number of retries upon failures.
-            base_delay: Base delay (in seconds) for the first backoff.
-            max_delay: Maximum delay (in seconds) for backoff growth.
-        """
-        self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
-        self.organization = organization or os.environ.get("OPENAI_ORG_ID")
-        self.project = project or os.environ.get("OPENAI_PROJECT_ID")
-        self.endpoint = endpoint
-        self.max_retries = max_retries
-        self.base_delay = base_delay
-        self.max_delay = max_delay
-
-        # Keep a client open for connection pooling across multiple export calls
-        self._client = httpx.Client(timeout=httpx.Timeout(timeout=60, connect=5.0))
-
-    def set_api_key(self, api_key: str):
-        """Set the OpenAI API key for the exporter.
-
-        Args:
-            api_key: The OpenAI API key to use. This is the same key used by the OpenAI Python
-                client.
-        """
-        self.api_key = api_key
-
-    def export(self, items: list[Trace | Span[Any]]) -> None:
-        if not items:
-            return
-
-        if not self.api_key:
-            logger.warning("OPENAI_API_KEY is not set, skipping trace export")
-            return
-
-        traces: list[dict[str, Any]] = []
-        spans: list[dict[str, Any]] = []
-
-        data = [item.export() for item in items if item.export()]
-        payload = {"data": data}
-
-        headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json",
-            "OpenAI-Beta": "traces=v1",
-        }
-
-        # Exponential backoff loop
-        attempt = 0
-        delay = self.base_delay
-        while True:
-            attempt += 1
-            try:
-                response = self._client.post(url=self.endpoint, headers=headers, json=payload)
-
-                # If the response is successful, break out of the loop
-                if response.status_code < 300:
-                    logger.debug(f"Exported {len(traces)} traces, {len(spans)} spans")
-                    return
-
-                # If the response is a client error (4xx), we wont retry
-                if 400 <= response.status_code < 500:
-                    logger.error(f"Tracing client error {response.status_code}: {response.text}")
-                    return
-
-                # For 5xx or other unexpected codes, treat it as transient and retry
-                logger.warning(f"Server error {response.status_code}, retrying.")
-            except httpx.RequestError as exc:
-                # Network or other I/O error, we'll retry
-                logger.warning(f"Request failed: {exc}")
-
-            # If we reach here, we need to retry or give up
-            if attempt >= self.max_retries:
-                logger.error("Max retries reached, giving up on this batch.")
-                return
-
-            # Exponential backoff + jitter
-            sleep_time = delay + random.uniform(0, 0.1 * delay)  # 10% jitter
-            time.sleep(sleep_time)
-            delay = min(delay * 2, self.max_delay)
-
-    def close(self):
-        """Close the underlying HTTP client."""
-        self._client.close()
-
-
-class BatchTraceProcessor(TracingProcessor):
-    """Some implementation notes:
-    1. Using Queue, which is thread-safe.
-    2. Using a background thread to export spans, to minimize any performance issues.
-    3. Spans are stored in memory until they are exported.
-    """
-
-    def __init__(
-        self,
-        exporter: TracingExporter,
-        max_queue_size: int = 8192,
-        max_batch_size: int = 128,
-        schedule_delay: float = 5.0,
-        export_trigger_ratio: float = 0.7,
-    ):
-        """
-        Args:
-            exporter: The exporter to use.
-            max_queue_size: The maximum number of spans to store in the queue. After this, we will
-                start dropping spans.
-            max_batch_size: The maximum number of spans to export in a single batch.
-            schedule_delay: The delay between checks for new spans to export.
-            export_trigger_ratio: The ratio of the queue size at which we will trigger an export.
-        """
-        self._exporter = exporter
-        self._queue: queue.Queue[Trace | Span[Any]] = queue.Queue(maxsize=max_queue_size)
-        self._max_queue_size = max_queue_size
-        self._max_batch_size = max_batch_size
-        self._schedule_delay = schedule_delay
-        self._shutdown_event = threading.Event()
-
-        # The queue size threshold at which we export immediately.
-        self._export_trigger_size = int(max_queue_size * export_trigger_ratio)
-
-        # Track when we next *must* perform a scheduled export
-        self._next_export_time = time.time() + self._schedule_delay
-
-        self._shutdown_event = threading.Event()
-        self._worker_thread = threading.Thread(target=self._run, daemon=True)
-        self._worker_thread.start()
-
-    def on_trace_start(self, trace: Trace) -> None:
-        try:
-            self._queue.put_nowait(trace)
-        except queue.Full:
-            logger.warning("Queue is full, dropping trace.")
-
-    def on_trace_end(self, trace: Trace) -> None:
-        # We send traces via on_trace_start, so we don't need to do anything here.
-        pass
-
-    def on_span_start(self, span: Span[Any]) -> None:
-        # We send spans via on_span_end, so we don't need to do anything here.
-        pass
-
-    def on_span_end(self, span: Span[Any]) -> None:
-        try:
-            self._queue.put_nowait(span)
-        except queue.Full:
-            logger.warning("Queue is full, dropping span.")
-
-    def shutdown(self, timeout: float | None = None):
-        """
-        Called when the application stops. We signal our thread to stop, then join it.
-        """
-        self._shutdown_event.set()
-        self._worker_thread.join(timeout=timeout)
-
-    def force_flush(self):
-        """
-        Forces an immediate flush of all queued spans.
-        """
-        self._export_batches(force=True)
-
-    def _run(self):
-        while not self._shutdown_event.is_set():
-            current_time = time.time()
-            queue_size = self._queue.qsize()
-
-            # If it's time for a scheduled flush or queue is above the trigger threshold
-            if current_time >= self._next_export_time or queue_size >= self._export_trigger_size:
-                self._export_batches(force=False)
-                # Reset the next scheduled flush time
-                self._next_export_time = time.time() + self._schedule_delay
-            else:
-                # Sleep a short interval so we don't busy-wait.
-                time.sleep(0.2)
-
-        # Final drain after shutdown
-        self._export_batches(force=True)
-
-    def _export_batches(self, force: bool = False):
-        """Drains the queue and exports in batches. If force=True, export everything.
-        Otherwise, export up to `max_batch_size` repeatedly until the queue is empty or below a
-        certain threshold.
-        """
-        while True:
-            items_to_export: list[Span[Any] | Trace] = []
-
-            # Gather a batch of spans up to max_batch_size
-            while not self._queue.empty() and (
-                force or len(items_to_export) < self._max_batch_size
-            ):
-                try:
-                    items_to_export.append(self._queue.get_nowait())
-                except queue.Empty:
-                    # Another thread might have emptied the queue between checks
-                    break
-
-            # If we collected nothing, we're done
-            if not items_to_export:
-                break
-
-            # Export the batch
-            self._exporter.export(items_to_export)
-
-
-# Create a shared global instance:
-_global_exporter = BackendSpanExporter()
-_global_processor = BatchTraceProcessor(_global_exporter)
-
-
-def default_exporter() -> BackendSpanExporter:
-    """The default exporter, which exports traces and spans to the backend in batches."""
-    return _global_exporter
-
-
-def default_processor() -> BatchTraceProcessor:
-    """The default processor, which exports traces and spans to the backend in batches."""
-    return _global_processor
diff --git a/tests/src/agents/tracing/scope.py b/tests/src/agents/tracing/scope.py
deleted file mode 100644
index 9ccd9f87..00000000
--- a/tests/src/agents/tracing/scope.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Holds the current active span
-import contextvars
-from typing import TYPE_CHECKING, Any
-
-from .logger import logger
-
-if TYPE_CHECKING:
-    from .spans import Span
-    from .traces import Trace
-
-_current_span: contextvars.ContextVar["Span[Any] | None"] = contextvars.ContextVar(
-    "current_span", default=None
-)
-
-_current_trace: contextvars.ContextVar["Trace | None"] = contextvars.ContextVar(
-    "current_trace", default=None
-)
-
-
-class Scope:
-    @classmethod
-    def get_current_span(cls) -> "Span[Any] | None":
-        return _current_span.get()
-
-    @classmethod
-    def set_current_span(cls, span: "Span[Any] | None") -> "contextvars.Token[Span[Any] | None]":
-        return _current_span.set(span)
-
-    @classmethod
-    def reset_current_span(cls, token: "contextvars.Token[Span[Any] | None]") -> None:
-        _current_span.reset(token)
-
-    @classmethod
-    def get_current_trace(cls) -> "Trace | None":
-        return _current_trace.get()
-
-    @classmethod
-    def set_current_trace(cls, trace: "Trace | None") -> "contextvars.Token[Trace | None]":
-        logger.debug(f"Setting current trace: {trace.trace_id if trace else None}")
-        return _current_trace.set(trace)
-
-    @classmethod
-    def reset_current_trace(cls, token: "contextvars.Token[Trace | None]") -> None:
-        logger.debug("Resetting current trace")
-        _current_trace.reset(token)
diff --git a/tests/src/agents/tracing/setup.py b/tests/src/agents/tracing/setup.py
deleted file mode 100644
index bc340c9f..00000000
--- a/tests/src/agents/tracing/setup.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from __future__ import annotations
-
-import os
-import threading
-from typing import Any
-
-from . import util
-from .logger import logger
-from .processor_interface import TracingProcessor
-from .scope import Scope
-from .spans import NoOpSpan, Span, SpanImpl, TSpanData
-from .traces import NoOpTrace, Trace, TraceImpl
-
-
-class SynchronousMultiTracingProcessor(TracingProcessor):
-    """
-    Forwards all calls to a list of TracingProcessors, in order of registration.
-    """
-
-    def __init__(self):
-        # Using a tuple to avoid race conditions when iterating over processors
-        self._processors: tuple[TracingProcessor, ...] = ()
-        self._lock = threading.Lock()
-
-    def add_tracing_processor(self, tracing_processor: TracingProcessor):
-        """
-        Add a processor to the list of processors. Each processor will receive all traces/spans.
-        """
-        with self._lock:
-            self._processors += (tracing_processor,)
-
-    def set_processors(self, processors: list[TracingProcessor]):
-        """
-        Set the list of processors. This will replace the current list of processors.
-        """
-        with self._lock:
-            self._processors = tuple(processors)
-
-    def on_trace_start(self, trace: Trace) -> None:
-        """
-        Called when a trace is started.
-        """
-        for processor in self._processors:
-            processor.on_trace_start(trace)
-
-    def on_trace_end(self, trace: Trace) -> None:
-        """
-        Called when a trace is finished.
-        """
-        for processor in self._processors:
-            processor.on_trace_end(trace)
-
-    def on_span_start(self, span: Span[Any]) -> None:
-        """
-        Called when a span is started.
-        """
-        for processor in self._processors:
-            processor.on_span_start(span)
-
-    def on_span_end(self, span: Span[Any]) -> None:
-        """
-        Called when a span is finished.
-        """
-        for processor in self._processors:
-            processor.on_span_end(span)
-
-    def shutdown(self) -> None:
-        """
-        Called when the application stops.
-        """
-        for processor in self._processors:
-            logger.debug(f"Shutting down trace processor {processor}")
-            processor.shutdown()
-
-    def force_flush(self):
-        """
-        Force the processors to flush their buffers.
-        """
-        for processor in self._processors:
-            processor.force_flush()
-
-
-class TraceProvider:
-    def __init__(self):
-        self._multi_processor = SynchronousMultiTracingProcessor()
-        self._disabled = os.environ.get("OPENAI_AGENTS_DISABLE_TRACING", "false").lower() in (
-            "true",
-            "1",
-        )
-
-    def register_processor(self, processor: TracingProcessor):
-        """
-        Add a processor to the list of processors. Each processor will receive all traces/spans.
-        """
-        self._multi_processor.add_tracing_processor(processor)
-
-    def set_processors(self, processors: list[TracingProcessor]):
-        """
-        Set the list of processors. This will replace the current list of processors.
-        """
-        self._multi_processor.set_processors(processors)
-
-    def get_current_trace(self) -> Trace | None:
-        """
-        Returns the currently active trace, if any.
-        """
-        return Scope.get_current_trace()
-
-    def get_current_span(self) -> Span[Any] | None:
-        """
-        Returns the currently active span, if any.
-        """
-        return Scope.get_current_span()
-
-    def set_disabled(self, disabled: bool) -> None:
-        """
-        Set whether tracing is disabled.
-        """
-        self._disabled = disabled
-
-    def create_trace(
-        self,
-        name: str,
-        trace_id: str | None = None,
-        group_id: str | None = None,
-        metadata: dict[str, Any] | None = None,
-        disabled: bool = False,
-    ) -> Trace:
-        """
-        Create a new trace.
-        """
-        if self._disabled or disabled:
-            logger.debug(f"Tracing is disabled. Not creating trace {name}")
-            return NoOpTrace()
-
-        trace_id = trace_id or util.gen_trace_id()
-
-        logger.debug(f"Creating trace {name} with id {trace_id}")
-
-        return TraceImpl(
-            name=name,
-            trace_id=trace_id,
-            group_id=group_id,
-            metadata=metadata,
-            processor=self._multi_processor,
-        )
-
-    def create_span(
-        self,
-        span_data: TSpanData,
-        span_id: str | None = None,
-        parent: Trace | Span[Any] | None = None,
-        disabled: bool = False,
-    ) -> Span[TSpanData]:
-        """
-        Create a new span.
-        """
-        if self._disabled or disabled:
-            logger.debug(f"Tracing is disabled. Not creating span {span_data}")
-            return NoOpSpan(span_data)
-
-        if not parent:
-            current_span = Scope.get_current_span()
-            current_trace = Scope.get_current_trace()
-            if current_trace is None:
-                logger.error(
-                    "No active trace. Make sure to start a trace with `trace()` first"
-                    "Returning NoOpSpan."
-                )
-                return NoOpSpan(span_data)
-            elif isinstance(current_trace, NoOpTrace) or isinstance(current_span, NoOpSpan):
-                logger.debug(
-                    f"Parent {current_span} or {current_trace} is no-op, returning NoOpSpan"
-                )
-                return NoOpSpan(span_data)
-
-            parent_id = current_span.span_id if current_span else None
-            trace_id = current_trace.trace_id
-
-        elif isinstance(parent, Trace):
-            if isinstance(parent, NoOpTrace):
-                logger.debug(f"Parent {parent} is no-op, returning NoOpSpan")
-                return NoOpSpan(span_data)
-            trace_id = parent.trace_id
-            parent_id = None
-        elif isinstance(parent, Span):
-            if isinstance(parent, NoOpSpan):
-                logger.debug(f"Parent {parent} is no-op, returning NoOpSpan")
-                return NoOpSpan(span_data)
-            parent_id = parent.span_id
-            trace_id = parent.trace_id
-
-        logger.debug(f"Creating span {span_data} with id {span_id}")
-
-        return SpanImpl(
-            trace_id=trace_id,
-            span_id=span_id,
-            parent_id=parent_id,
-            processor=self._multi_processor,
-            span_data=span_data,
-        )
-
-    def shutdown(self) -> None:
-        try:
-            logger.debug("Shutting down trace provider")
-            self._multi_processor.shutdown()
-        except Exception as e:
-            logger.error(f"Error shutting down trace provider: {e}")
-
-
-GLOBAL_TRACE_PROVIDER = TraceProvider()
diff --git a/tests/src/agents/tracing/span_data.py b/tests/src/agents/tracing/span_data.py
deleted file mode 100644
index 5e5d38cb..00000000
--- a/tests/src/agents/tracing/span_data.py
+++ /dev/null
@@ -1,188 +0,0 @@
-from __future__ import annotations
-
-import abc
-from collections.abc import Mapping, Sequence
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from openai.types.responses import Response, ResponseInputItemParam
-
-
-class SpanData(abc.ABC):
-    @abc.abstractmethod
-    def export(self) -> dict[str, Any]:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def type(self) -> str:
-        pass
-
-
-class AgentSpanData(SpanData):
-    __slots__ = ("name", "handoffs", "tools", "output_type")
-
-    def __init__(
-        self,
-        name: str,
-        handoffs: list[str] | None = None,
-        tools: list[str] | None = None,
-        output_type: str | None = None,
-    ):
-        self.name = name
-        self.handoffs: list[str] | None = handoffs
-        self.tools: list[str] | None = tools
-        self.output_type: str | None = output_type
-
-    @property
-    def type(self) -> str:
-        return "agent"
-
-    def export(self) -> dict[str, Any]:
-        return {
-            "type": self.type,
-            "name": self.name,
-            "handoffs": self.handoffs,
-            "tools": self.tools,
-            "output_type": self.output_type,
-        }
-
-
-class FunctionSpanData(SpanData):
-    __slots__ = ("name", "input", "output")
-
-    def __init__(self, name: str, input: str | None, output: str | None):
-        self.name = name
-        self.input = input
-        self.output = output
-
-    @property
-    def type(self) -> str:
-        return "function"
-
-    def export(self) -> dict[str, Any]:
-        return {
-            "type": self.type,
-            "name": self.name,
-            "input": self.input,
-            "output": self.output,
-        }
-
-
-class GenerationSpanData(SpanData):
-    __slots__ = (
-        "input",
-        "output",
-        "model",
-        "model_config",
-        "usage",
-    )
-
-    def __init__(
-        self,
-        input: Sequence[Mapping[str, Any]] | None = None,
-        output: Sequence[Mapping[str, Any]] | None = None,
-        model: str | None = None,
-        model_config: Mapping[str, Any] | None = None,
-        usage: dict[str, Any] | None = None,
-    ):
-        self.input = input
-        self.output = output
-        self.model = model
-        self.model_config = model_config
-        self.usage = usage
-
-    @property
-    def type(self) -> str:
-        return "generation"
-
-    def export(self) -> dict[str, Any]:
-        return {
-            "type": self.type,
-            "input": self.input,
-            "output": self.output,
-            "model": self.model,
-            "model_config": self.model_config,
-            "usage": self.usage,
-        }
-
-
-class ResponseSpanData(SpanData):
-    __slots__ = ("response", "input")
-
-    def __init__(
-        self,
-        response: Response | None = None,
-        input: str | list[ResponseInputItemParam] | None = None,
-    ) -> None:
-        self.response = response
-        # This is not used by the OpenAI trace processors, but is useful for other tracing
-        # processor implementations
-        self.input = input
-
-    @property
-    def type(self) -> str:
-        return "response"
-
-    def export(self) -> dict[str, Any]:
-        return {
-            "type": self.type,
-            "response_id": self.response.id if self.response else None,
-        }
-
-
-class HandoffSpanData(SpanData):
-    __slots__ = ("from_agent", "to_agent")
-
-    def __init__(self, from_agent: str | None, to_agent: str | None):
-        self.from_agent = from_agent
-        self.to_agent = to_agent
-
-    @property
-    def type(self) -> str:
-        return "handoff"
-
-    def export(self) -> dict[str, Any]:
-        return {
-            "type": self.type,
-            "from_agent": self.from_agent,
-            "to_agent": self.to_agent,
-        }
-
-
-class CustomSpanData(SpanData):
-    __slots__ = ("name", "data")
-
-    def __init__(self, name: str, data: dict[str, Any]):
-        self.name = name
-        self.data = data
-
-    @property
-    def type(self) -> str:
-        return "custom"
-
-    def export(self) -> dict[str, Any]:
-        return {
-            "type": self.type,
-            "name": self.name,
-            "data": self.data,
-        }
-
-
-class GuardrailSpanData(SpanData):
-    __slots__ = ("name", "triggered")
-
-    def __init__(self, name: str, triggered: bool = False):
-        self.name = name
-        self.triggered = triggered
-
-    @property
-    def type(self) -> str:
-        return "guardrail"
-
-    def export(self) -> dict[str, Any]:
-        return {
-            "type": self.type,
-            "name": self.name,
-            "triggered": self.triggered,
-        }
diff --git a/tests/src/agents/tracing/spans.py b/tests/src/agents/tracing/spans.py
deleted file mode 100644
index d682a9a0..00000000
--- a/tests/src/agents/tracing/spans.py
+++ /dev/null
@@ -1,264 +0,0 @@
-from __future__ import annotations
-
-import abc
-import contextvars
-from typing import Any, Generic, TypeVar
-
-from typing_extensions import TypedDict
-
-from . import util
-from .logger import logger
-from .processor_interface import TracingProcessor
-from .scope import Scope
-from .span_data import SpanData
-
-TSpanData = TypeVar("TSpanData", bound=SpanData)
-
-
-class SpanError(TypedDict):
-    message: str
-    data: dict[str, Any] | None
-
-
-class Span(abc.ABC, Generic[TSpanData]):
-    @property
-    @abc.abstractmethod
-    def trace_id(self) -> str:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def span_id(self) -> str:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def span_data(self) -> TSpanData:
-        pass
-
-    @abc.abstractmethod
-    def start(self, mark_as_current: bool = False):
-        """
-        Start the span.
-
-        Args:
-            mark_as_current: If true, the span will be marked as the current span.
-        """
-        pass
-
-    @abc.abstractmethod
-    def finish(self, reset_current: bool = False) -> None:
-        """
-        Finish the span.
-
-        Args:
-            reset_current: If true, the span will be reset as the current span.
-        """
-        pass
-
-    @abc.abstractmethod
-    def __enter__(self) -> Span[TSpanData]:
-        pass
-
-    @abc.abstractmethod
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        pass
-
-    @property
-    @abc.abstractmethod
-    def parent_id(self) -> str | None:
-        pass
-
-    @abc.abstractmethod
-    def set_error(self, error: SpanError) -> None:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def error(self) -> SpanError | None:
-        pass
-
-    @abc.abstractmethod
-    def export(self) -> dict[str, Any] | None:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def started_at(self) -> str | None:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def ended_at(self) -> str | None:
-        pass
-
-
-class NoOpSpan(Span[TSpanData]):
-    __slots__ = ("_span_data", "_prev_span_token")
-
-    def __init__(self, span_data: TSpanData):
-        self._span_data = span_data
-        self._prev_span_token: contextvars.Token[Span[TSpanData] | None] | None = None
-
-    @property
-    def trace_id(self) -> str:
-        return "no-op"
-
-    @property
-    def span_id(self) -> str:
-        return "no-op"
-
-    @property
-    def span_data(self) -> TSpanData:
-        return self._span_data
-
-    @property
-    def parent_id(self) -> str | None:
-        return None
-
-    def start(self, mark_as_current: bool = False):
-        if mark_as_current:
-            self._prev_span_token = Scope.set_current_span(self)
-
-    def finish(self, reset_current: bool = False) -> None:
-        if reset_current and self._prev_span_token is not None:
-            Scope.reset_current_span(self._prev_span_token)
-            self._prev_span_token = None
-
-    def __enter__(self) -> Span[TSpanData]:
-        self.start(mark_as_current=True)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        reset_current = True
-        if exc_type is GeneratorExit:
-            logger.debug("GeneratorExit, skipping span reset")
-            reset_current = False
-
-        self.finish(reset_current=reset_current)
-
-    def set_error(self, error: SpanError) -> None:
-        pass
-
-    @property
-    def error(self) -> SpanError | None:
-        return None
-
-    def export(self) -> dict[str, Any] | None:
-        return None
-
-    @property
-    def started_at(self) -> str | None:
-        return None
-
-    @property
-    def ended_at(self) -> str | None:
-        return None
-
-
-class SpanImpl(Span[TSpanData]):
-    __slots__ = (
-        "_trace_id",
-        "_span_id",
-        "_parent_id",
-        "_started_at",
-        "_ended_at",
-        "_error",
-        "_prev_span_token",
-        "_processor",
-        "_span_data",
-    )
-
-    def __init__(
-        self,
-        trace_id: str,
-        span_id: str | None,
-        parent_id: str | None,
-        processor: TracingProcessor,
-        span_data: TSpanData,
-    ):
-        self._trace_id = trace_id
-        self._span_id = span_id or util.gen_span_id()
-        self._parent_id = parent_id
-        self._started_at: str | None = None
-        self._ended_at: str | None = None
-        self._processor = processor
-        self._error: SpanError | None = None
-        self._prev_span_token: contextvars.Token[Span[TSpanData] | None] | None = None
-        self._span_data = span_data
-
-    @property
-    def trace_id(self) -> str:
-        return self._trace_id
-
-    @property
-    def span_id(self) -> str:
-        return self._span_id
-
-    @property
-    def span_data(self) -> TSpanData:
-        return self._span_data
-
-    @property
-    def parent_id(self) -> str | None:
-        return self._parent_id
-
-    def start(self, mark_as_current: bool = False):
-        if self.started_at is not None:
-            logger.warning("Span already started")
-            return
-
-        self._started_at = util.time_iso()
-        self._processor.on_span_start(self)
-        if mark_as_current:
-            self._prev_span_token = Scope.set_current_span(self)
-
-    def finish(self, reset_current: bool = False) -> None:
-        if self.ended_at is not None:
-            logger.warning("Span already finished")
-            return
-
-        self._ended_at = util.time_iso()
-        self._processor.on_span_end(self)
-        if reset_current and self._prev_span_token is not None:
-            Scope.reset_current_span(self._prev_span_token)
-            self._prev_span_token = None
-
-    def __enter__(self) -> Span[TSpanData]:
-        self.start(mark_as_current=True)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        reset_current = True
-        if exc_type is GeneratorExit:
-            logger.debug("GeneratorExit, skipping span reset")
-            reset_current = False
-
-        self.finish(reset_current=reset_current)
-
-    def set_error(self, error: SpanError) -> None:
-        self._error = error
-
-    @property
-    def error(self) -> SpanError | None:
-        return self._error
-
-    @property
-    def started_at(self) -> str | None:
-        return self._started_at
-
-    @property
-    def ended_at(self) -> str | None:
-        return self._ended_at
-
-    def export(self) -> dict[str, Any] | None:
-        return {
-            "object": "trace.span",
-            "id": self.span_id,
-            "trace_id": self.trace_id,
-            "parent_id": self._parent_id,
-            "started_at": self._started_at,
-            "ended_at": self._ended_at,
-            "span_data": self.span_data.export(),
-            "error": self._error,
-        }
diff --git a/tests/src/agents/tracing/traces.py b/tests/src/agents/tracing/traces.py
deleted file mode 100644
index bf3b43df..00000000
--- a/tests/src/agents/tracing/traces.py
+++ /dev/null
@@ -1,195 +0,0 @@
-from __future__ import annotations
-
-import abc
-import contextvars
-from typing import Any
-
-from . import util
-from .logger import logger
-from .processor_interface import TracingProcessor
-from .scope import Scope
-
-
-class Trace:
-    """
-    A trace is the root level object that tracing creates. It represents a logical "workflow".
-    """
-
-    @abc.abstractmethod
-    def __enter__(self) -> Trace:
-        pass
-
-    @abc.abstractmethod
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        pass
-
-    @abc.abstractmethod
-    def start(self, mark_as_current: bool = False):
-        """
-        Start the trace.
-
-        Args:
-            mark_as_current: If true, the trace will be marked as the current trace.
-        """
-        pass
-
-    @abc.abstractmethod
-    def finish(self, reset_current: bool = False):
-        """
-        Finish the trace.
-
-        Args:
-            reset_current: If true, the trace will be reset as the current trace.
-        """
-        pass
-
-    @property
-    @abc.abstractmethod
-    def trace_id(self) -> str:
-        """
-        The trace ID.
-        """
-        pass
-
-    @property
-    @abc.abstractmethod
-    def name(self) -> str:
-        """
-        The name of the workflow being traced.
-        """
-        pass
-
-    @abc.abstractmethod
-    def export(self) -> dict[str, Any] | None:
-        """
-        Export the trace as a dictionary.
-        """
-        pass
-
-
-class NoOpTrace(Trace):
-    """
-    A no-op trace that will not be recorded.
-    """
-
-    def __init__(self):
-        self._started = False
-        self._prev_context_token: contextvars.Token[Trace | None] | None = None
-
-    def __enter__(self) -> Trace:
-        if self._started:
-            if not self._prev_context_token:
-                logger.error("Trace already started but no context token set")
-            return self
-
-        self._started = True
-        self.start(mark_as_current=True)
-
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.finish(reset_current=True)
-
-    def start(self, mark_as_current: bool = False):
-        if mark_as_current:
-            self._prev_context_token = Scope.set_current_trace(self)
-
-    def finish(self, reset_current: bool = False):
-        if reset_current and self._prev_context_token is not None:
-            Scope.reset_current_trace(self._prev_context_token)
-            self._prev_context_token = None
-
-    @property
-    def trace_id(self) -> str:
-        return "no-op"
-
-    @property
-    def name(self) -> str:
-        return "no-op"
-
-    def export(self) -> dict[str, Any] | None:
-        return None
-
-
-NO_OP_TRACE = NoOpTrace()
-
-
-class TraceImpl(Trace):
-    """
-    A trace that will be recorded by the tracing library.
-    """
-
-    __slots__ = (
-        "_name",
-        "_trace_id",
-        "group_id",
-        "metadata",
-        "_prev_context_token",
-        "_processor",
-        "_started",
-    )
-
-    def __init__(
-        self,
-        name: str,
-        trace_id: str | None,
-        group_id: str | None,
-        metadata: dict[str, Any] | None,
-        processor: TracingProcessor,
-    ):
-        self._name = name
-        self._trace_id = trace_id or util.gen_trace_id()
-        self.group_id = group_id
-        self.metadata = metadata
-        self._prev_context_token: contextvars.Token[Trace | None] | None = None
-        self._processor = processor
-        self._started = False
-
-    @property
-    def trace_id(self) -> str:
-        return self._trace_id
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    def start(self, mark_as_current: bool = False):
-        if self._started:
-            return
-
-        self._started = True
-        self._processor.on_trace_start(self)
-
-        if mark_as_current:
-            self._prev_context_token = Scope.set_current_trace(self)
-
-    def finish(self, reset_current: bool = False):
-        if not self._started:
-            return
-
-        self._processor.on_trace_end(self)
-
-        if reset_current and self._prev_context_token is not None:
-            Scope.reset_current_trace(self._prev_context_token)
-            self._prev_context_token = None
-
-    def __enter__(self) -> Trace:
-        if self._started:
-            if not self._prev_context_token:
-                logger.error("Trace already started but no context token set")
-            return self
-
-        self.start(mark_as_current=True)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.finish(reset_current=exc_type is not GeneratorExit)
-
-    def export(self) -> dict[str, Any] | None:
-        return {
-            "object": "trace",
-            "id": self.trace_id,
-            "workflow_name": self.name,
-            "group_id": self.group_id,
-            "metadata": self.metadata,
-        }
diff --git a/tests/src/agents/tracing/util.py b/tests/src/agents/tracing/util.py
deleted file mode 100644
index 3e5cad90..00000000
--- a/tests/src/agents/tracing/util.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import uuid
-from datetime import datetime, timezone
-
-
-def time_iso() -> str:
-    """Returns the current time in ISO 8601 format."""
-    return datetime.now(timezone.utc).isoformat()
-
-
-def gen_trace_id() -> str:
-    """Generates a new trace ID."""
-    return f"trace_{uuid.uuid4().hex}"
-
-
-def gen_span_id() -> str:
-    """Generates a new span ID."""
-    return f"span_{uuid.uuid4().hex[:24]}"
diff --git a/tests/src/agents/usage.py b/tests/src/agents/usage.py
deleted file mode 100644
index 23d989b4..00000000
--- a/tests/src/agents/usage.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from dataclasses import dataclass
-
-
-@dataclass
-class Usage:
-    requests: int = 0
-    """Total requests made to the LLM API."""
-
-    input_tokens: int = 0
-    """Total input tokens sent, across all requests."""
-
-    output_tokens: int = 0
-    """Total output tokens received, across all requests."""
-
-    total_tokens: int = 0
-    """Total tokens sent and received, across all requests."""
-
-    def add(self, other: "Usage") -> None:
-        self.requests += other.requests if other.requests else 0
-        self.input_tokens += other.input_tokens if other.input_tokens else 0
-        self.output_tokens += other.output_tokens if other.output_tokens else 0
-        self.total_tokens += other.total_tokens if other.total_tokens else 0
diff --git a/tests/src/agents/version.py b/tests/src/agents/version.py
deleted file mode 100644
index a0b7e9be..00000000
--- a/tests/src/agents/version.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import importlib.metadata
-
-try:
-    __version__ = importlib.metadata.version("agents")
-except importlib.metadata.PackageNotFoundError:
-    # Fallback if running from source without being installed
-    __version__ = "0.0.0"
diff --git a/tests/src/openai_agents.egg-info/PKG-INFO b/tests/src/openai_agents.egg-info/PKG-INFO
deleted file mode 100644
index ebf2d7c2..00000000
--- a/tests/src/openai_agents.egg-info/PKG-INFO
+++ /dev/null
@@ -1,217 +0,0 @@
-Metadata-Version: 2.2
-Name: openai-agents
-Version: 0.0.1
-Summary: OpenAI Agents SDK
-Author-email: OpenAI <support@openai.com>
-Project-URL: Homepage, https://github.com/openai/openai-agents-python
-Project-URL: Repository, https://github.com/openai/openai-agents-python
-Classifier: Typing :: Typed
-Classifier: Intended Audience :: Developers
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Intended Audience :: Developers
-Classifier: Intended Audience :: Information Technology
-Classifier: Operating System :: OS Independent
-Classifier: Operating System :: POSIX
-Classifier: Operating System :: MacOS
-Classifier: Operating System :: POSIX :: Linux
-Classifier: Operating System :: Microsoft :: Windows
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.9
-Description-Content-Type: text/markdown
-Requires-Dist: openai@ {root:parent:uri}/openai-1.30.1-py3-none-any.whl
-Requires-Dist: pydantic<3,>=2.10
-Requires-Dist: griffe<2,>=1.5.6
-Requires-Dist: typing-extensions<5,>=4.12.2
-Requires-Dist: requests<3,>=2.0
-Requires-Dist: types-requests<3,>=2.0
-
-# OpenAI Agents SDK
-
-The OpenAI Agents SDK is a lightweight yet powerful framework for building multi-agent workflows.
-
-### Core concepts: 
-1. [**Agents,**](docs/agents.md) which are LLMs configured with instructions, tools, guardrails, and handoffs
-2. [**Handoffs,**](docs/handoffs.md) which allow agents to transfer control to other agents for specific tasks
-3. [**Guardrails,**](docs/guardrails.md) which makes it easy to watch an agent execution and validate inputs/outputs
-4. [**Tracing,**](docs/tracing.md) which automatically captures the entire agentic run, allowing you to view, debug and optimize your workflows
-
-Explore examples of the SDK in action in the [examples](examples) directory.
-
-## Using the SDK
-
-1. Set up python env
-
-```
-python -m venv env
-source env/bin/activate
-```
-
-2. Install Agents SDK
-
-```
-pip install git+ssh://git@github.com/openai/agentsdk_prototype.git#subdirectory=agents
-```
-
-## Development (only needed if you need to edit the SDK/examples)
-
-0. Ensure you have [`uv`](https://docs.astral.sh/uv/) installed.
-
-```bash
-uv --version
-```
-
-1. Install dependencies/setup virtual environment
-
-```bash
-uv sync
-```
-
-2. Install the dependencies
-
-```bash
-uv sync --all-extras --all-packages
-```
-
-3. Activate the virtual environment
-
-```bash
-source .venv/bin/activate
-```
-
-## Tests
-
-Make sure the virtual environment is activated first.
-
-```bash
-pytest
-```
-
-## Hello world example
-
-```py
-from agents.agent import Agent
-from agents.run import Runner
-import asyncio
-
-agent = Agent(
-  name="Hello world",
-  instructions="You are a helpful agent."
-)
-
-async def main():
-    out = await Runner.run(agent, input="Hola, ¿cómo estás?")
-    print(out)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
-# The capital of the United States is Washington, D.C.
-```
-
-## Handoffs example
-
-```py
-from agents.agent import Agent
-from agents.run import Runner
-import asyncio
-
-spanish_agent = Agent(
-    name="spanish_agent",
-    instructions="You only speak Spanish.",
-)
-
-english_agent = Agent(
-    name="english_agent",
-    instructions="You only speak English",
-)
-
-triage_agent = Agent(
-    name="triage_agent",
-    instructions="Handoff to the appropriate agent based on the language of the request.",
-    handoffs=[spanish_agent, english_agent],
-)
-
-
-async def main():
-    out = await Runner.run(triage_agent, input="Hola, ¿cómo estás?")
-    print(out)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
-# ¡Hola! Estoy bien, gracias por preguntar. ¿Y tú, cómo estás?
-```
-
-## Functions example
-
-```python
-from agents.agent import Agent
-from agents.run import Runner
-import asyncio
-from agents.tool import function_tool
-
-
-@function_tool
-def get_weather(city: str) -> str:
-    print(f"Getting weather for {city}")
-    return f"The weather in {city} is sunny."
-
-
-agent = Agent(
-    name="Hello world",
-    instructions="You are a helpful agent.",
-    tools=[get_weather],
-)
-
-
-async def main():
-    out = await Runner.run(agent, input="What's the weather in Tokyo?")
-    print(out.final_output)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-
-For more complex systems, we recommend including detailed instructions about handoffs. We have a recommendation in `handoff.RECOMMENDED_PROMPT_PREFIX` that can be used to add these instructions to an agent.
-
-```py
-agent = Agent(
-    ...,
-    instructions=f"{handoff.RECOMMENDED_PROMPT_PREFIX}\n\n{instructions}"
-)
-```
-
-## The agent loop
-
-When you call `Runner.run()`, we run a loop until we get a final output.
-
-1. We call the LLM, using the model and settings on the agent, and the message history.
-2. The LLM returns a response, which may include tool calls.
-3. If the response has a final output (see below for the more on this), we return it and end the loop.
-4. If the response has a handoff, we set the agent to the new agent and go back to step 1.
-5. We process the tool calls (if any) and append the tool responses messsages. Then we go to step 1.
-
-There is a `max_turns` parameter that you can use to limit the number of times the loop executes.
-
-### Final output
-
-There are two ways to get a **final output**:
-
-1. If you set an `output_type` on the agent, the LLM is given a special tool called `final_output`. If it uses this tool, the output of the tool is the final output.
-2. If there's no `output_type`, then we assume the final output is a string. As soon as the LLM produces a message without any tool calls, that is considered the final output.
-
-As a result, the mental model for the agent loop is:
-
-1. If the current agent has an `output_type`, the loop runs until the agent uses that tool to return the final output.
-2. If the current agent does not have an `output_type`, the loop runs until the current agent produces a message without any tool calls.
-
-## Common agent patterns
-
-There are a number of useful patterns in agentic apps. There are a number of examples in [`examples/agent_patterns`](examples/agent_patterns), and we recommend reading them.
diff --git a/tests/src/openai_agents.egg-info/SOURCES.txt b/tests/src/openai_agents.egg-info/SOURCES.txt
deleted file mode 100644
index 695ad1fc..00000000
--- a/tests/src/openai_agents.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-README.md
-pyproject.toml
-src/agents/__init__.py
-src/agents/_config.py
-src/agents/_debug.py
-src/agents/_run_impl.py
-src/agents/_utils.py
-src/agents/agent.py
-src/agents/agent_output.py
-src/agents/call_agent_tool.py
-src/agents/computer.py
-src/agents/exceptions.py
-src/agents/function_schema.py
-src/agents/guardrail.py
-src/agents/handoffs.py
-src/agents/items.py
-src/agents/lifecycle.py
-src/agents/logger.py
-src/agents/model_settings.py
-src/agents/result.py
-src/agents/run.py
-src/agents/run_context.py
-src/agents/strict_schema.py
-src/agents/tool.py
-src/agents/usage.py
-src/agents/version.py
-src/agents/extensions/__init__.py
-src/agents/extensions/handoff_filters.py
-src/agents/extensions/handoff_prompt.py
-src/agents/models/__init__.py
-src/agents/models/_openai_shared.py
-src/agents/models/fake_id.py
-src/agents/models/interface.py
-src/agents/models/map.py
-src/agents/models/openai_chatcompletions.py
-src/agents/models/openai_responses.py
-src/agents/tracing/__init__.py
-src/agents/tracing/create.py
-src/agents/tracing/logger.py
-src/agents/tracing/processor_interface.py
-src/agents/tracing/processors.py
-src/agents/tracing/scope.py
-src/agents/tracing/setup.py
-src/agents/tracing/span_data.py
-src/agents/tracing/spans.py
-src/agents/tracing/traces.py
-src/agents/tracing/util.py
-src/openai_agents.egg-info/PKG-INFO
-src/openai_agents.egg-info/SOURCES.txt
-src/openai_agents.egg-info/dependency_links.txt
-src/openai_agents.egg-info/requires.txt
-src/openai_agents.egg-info/top_level.txt
-tests/test_agent_config.py
-tests/test_agent_hooks.py
-tests/test_agent_runner.py
-tests/test_agent_runner_streamed.py
-tests/test_agent_tracing.py
-tests/test_config.py
-tests/test_doc_parsing.py
-tests/test_function_schema.py
-tests/test_function_tool.py
-tests/test_function_tool_decorator.py
-tests/test_global_hooks.py
-tests/test_guardrails.py
-tests/test_handoff_tool.py
-tests/test_items_helpers.py
-tests/test_max_turns.py
-tests/test_model_mapper.py
-tests/test_openai_chatcompletions_converter.py
-tests/test_openai_responses_converter.py
-tests/test_output_tool.py
-tests/test_responses.py
-tests/test_run_config.py
-tests/test_run_step_execution.py
-tests/test_run_step_processing.py
-tests/test_tool_converter.py
-tests/test_trace_processor.py
-tests/test_tracing.py
-tests/test_tracing_errors.py
-tests/test_tracing_errors_streamed.py
-tests/testing_processor.py
\ No newline at end of file
diff --git a/tests/src/openai_agents.egg-info/dependency_links.txt b/tests/src/openai_agents.egg-info/dependency_links.txt
deleted file mode 100644
index 8b137891..00000000
--- a/tests/src/openai_agents.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/src/openai_agents.egg-info/requires.txt b/tests/src/openai_agents.egg-info/requires.txt
deleted file mode 100644
index 3dbad2b8..00000000
--- a/tests/src/openai_agents.egg-info/requires.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-openai@ {root:parent:uri}/openai-1.30.1-py3-none-any.whl
-pydantic<3,>=2.10
-griffe<2,>=1.5.6
-typing-extensions<5,>=4.12.2
-requests<3,>=2.0
-types-requests<3,>=2.0
diff --git a/tests/src/openai_agents.egg-info/top_level.txt b/tests/src/openai_agents.egg-info/top_level.txt
deleted file mode 100644
index 4a33ff62..00000000
--- a/tests/src/openai_agents.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-agents
diff --git a/tests/test_config.py b/tests/test_config.py
index 8f37200a..dba854db 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -49,13 +49,16 @@ def test_resp_set_default_openai_client():
 
 
 def test_set_default_openai_api():
-    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), \
+    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), (
         "Default should be responses"
+    )
 
     set_default_openai_api("chat_completions")
-    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIChatCompletionsModel), \
+    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIChatCompletionsModel), (
         "Should be chat completions model"
+    )
 
     set_default_openai_api("responses")
-    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), \
+    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), (
         "Should be responses model"
+    )