Skip to content

[gpt-oss] Small bug fixes for frontend #22512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 42 additions & 14 deletions vllm/entrypoints/context.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
import logging
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Union

from openai_harmony import Message, Role, StreamState
from openai_harmony import Author, Message, Role, StreamState, TextContent

from vllm.entrypoints.harmony_utils import (
get_encoding, get_streamable_parser_for_assistant, render_for_completion)
from vllm.entrypoints.tool import Tool
from vllm.outputs import RequestOutput

if TYPE_CHECKING:
from mcp.client import ClientSession

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -71,6 +76,7 @@ def __init__(
def append_output(self, output) -> None:
if isinstance(output, RequestOutput):
output_token_ids = output.outputs[0].token_ids
self.parser = get_streamable_parser_for_assistant()
for token_id in output_token_ids:
self.parser.process(token_id)
output_msgs = self.parser.messages
Expand Down Expand Up @@ -106,19 +112,41 @@ async def call_tool(self) -> list[Message]:
def render_for_completion(self) -> list[int]:
return render_for_completion(self.messages)

async def call_search_tool(
self,
tool_session: Tool,
last_msg: Message,
) -> list[Message]:
return await tool_session.get_result(self)

async def call_python_tool(
self,
tool_session: Tool,
last_msg: Message,
) -> list[Message]:
return await tool_session.get_result(self)
async def call_search_tool(self, tool_session: Union["ClientSession",
Tool],
last_msg: Message) -> list[Message]:
if isinstance(tool_session, Tool):
return await tool_session.get_result(self)
tool_name = last_msg.recipient.split(".")[1]
args = json.loads(last_msg.content[0].text)
result = await tool_session.call_tool(tool_name, args)
result_str = result.content[0].text
content = TextContent(text=result_str)
author = Author(role=Role.TOOL, name=last_msg.recipient)
return [
Message(author=author, content=[content], recipient=Role.ASSISTANT)
]

async def call_python_tool(self, tool_session: Union["ClientSession",
Tool],
last_msg: Message) -> list[Message]:
if isinstance(tool_session, Tool):
return await tool_session.get_result(self)
param = {
"code": last_msg.content[0].text,
}
result = await tool_session.call_tool("python", param)
result_str = result.content[0].text

content = TextContent(text=result_str)
author = Author(role=Role.TOOL, name="python")

return [
Message(author=author,
content=[content],
channel=last_msg.channel,
recipient=Role.ASSISTANT)
]


class StreamingHarmonyContext(HarmonyContext):
Expand Down
5 changes: 3 additions & 2 deletions vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
# yapf: enable
from openai.types.responses import (ResponseFunctionToolCall,
ResponseInputItemParam, ResponseOutputItem,
ResponsePrompt, ResponseStatus,
ResponseTextConfig)
ResponsePrompt, ResponseReasoningItem,
ResponseStatus, ResponseTextConfig)
from openai.types.responses.response import ToolChoice
from openai.types.responses.tool import Tool
from openai.types.shared import Metadata, Reasoning
Expand Down Expand Up @@ -239,6 +239,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],


ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
ResponseReasoningItem,
ResponseFunctionToolCall]


Expand Down
29 changes: 16 additions & 13 deletions vllm/entrypoints/openai/serving_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
from openai import BaseModel
# yapf conflicts with isort for this block
# yapf: disable
from openai.types.responses import (ResponseContentPartDoneEvent,
ResponseCreatedEvent,
from openai.types.responses import (ResponseCreatedEvent,
ResponseFunctionToolCall,
ResponseInProgressEvent,
ResponseOutputItem,
Expand Down Expand Up @@ -54,7 +53,7 @@
# yapf: enable
from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.tool_server import ToolServer
from vllm.entrypoints.tool_server import MCPToolServer, ToolServer
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
from vllm.logger import init_logger
from vllm.outputs import CompletionOutput
Expand Down Expand Up @@ -238,6 +237,15 @@ async def create_responses(
if raw_request:
raw_request.state.request_metadata = request_metadata

if self.tool_server is not None and isinstance(
self.tool_server, MCPToolServer
) and (request.background or request.stream) and request.tools and any(
tool.type in ["web_search_preview", "code_interpreter"]
for tool in request.tools):
return self.create_error_response(
"MCP tool server is not supported in background mode and "
"streaming mode")

# Schedule the request and get the result generator.
generators: list[AsyncGenerator[ConversationContext, None]] = []

Expand Down Expand Up @@ -844,9 +852,13 @@ def _send_event(event: BaseModel):
type="reasoning",
content=[
ResponseReasoningTextContent(
text=previous_item.content[0].text),
text=previous_item.content[0].text,
type="reasoning_text",
),
],
status="completed",
id=current_item_id,
summary=[],
)
yield _send_event(
ResponseReasoningTextDoneEvent(
Expand All @@ -857,15 +869,6 @@ def _send_event(event: BaseModel):
content_index=current_content_index,
text=previous_item.content[0].text,
))
yield _send_event(
ResponseContentPartDoneEvent(
type="response.content_part.done",
item_id=current_item_id,
sequence_number=-1,
output_index=current_output_index,
content_index=current_content_index,
part=reasoning_item,
))
yield _send_event(
ResponseOutputItemDoneEvent(
type="response.output_item.done",
Expand Down
15 changes: 13 additions & 2 deletions vllm/entrypoints/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Optional

from openai_harmony import Message

from vllm.logger import init_logger

Expand Down Expand Up @@ -70,7 +72,16 @@ def __init__(self):
"gpt_oss is not installed, code interpreter is disabled")
return

self.python_tool = PythonTool()
# NOTE (Chen): as of gpt-oss 0.0.2, there is a bug in _make_response
# and we do the following monkey patch to fix it.
class PatchedGptOssPythonTool(PythonTool):

def _make_response(self,
output: str,
channel: Optional[str] = None) -> Message:
return super()._make_response(output)

self.python_tool = PatchedGptOssPythonTool()
logger.info_once("Code interpreter tool initialized")

async def get_result(self, context: "ConversationContext") -> Any:
Expand Down
5 changes: 3 additions & 2 deletions vllm/entrypoints/tool_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from contextlib import AbstractAsyncContextManager, asynccontextmanager
from typing import TYPE_CHECKING, Any, Optional

from openai_harmony import ToolNamespaceConfig
from openai_harmony import ToolDescription, ToolNamespaceConfig

from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
from vllm.logger import init_logger
Expand Down Expand Up @@ -105,7 +105,6 @@ def __init__(self):
self.harmony_tool_descriptions = {}

async def add_tool_server(self, server_url: str):
from mcp.types import ToolDescription
tool_urls = server_url.split(",")
self.harmony_tool_descriptions = {}
self.urls: dict[str, str] = {}
Expand Down Expand Up @@ -133,6 +132,8 @@ async def add_tool_server(self, server_url: str):
logger.warning(
"Tool %s already exists. Ignoring duplicate tool server %s",
tool_from_mcp.name, url)
logger.info("MCPToolServer initialized with tools: %s",
list(self.harmony_tool_descriptions.keys()))

def has_tool(self, tool_name: str):
return tool_name in self.harmony_tool_descriptions
Expand Down