Skip to content

Commit 3d09e59

Browse files
authored
[V1][Misc] Shorten FinishReason enum and use constant strings (#12760)
1 parent fcf2e3d commit 3d09e59

File tree

5 files changed

+25
-21
lines changed

5 files changed

+25
-21
lines changed

vllm/v1/engine/__init__.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,17 @@
1414
from vllm.multimodal.inputs import PlaceholderRange
1515
from vllm.sampling_params import SamplingParams
1616

17+
# These are possible values of RequestOutput.finish_reason,
18+
# so form part of the external API.
19+
FINISH_REASON_STRINGS = ("stop", "length", "abort")
1720

18-
class RequestFinishedReason(enum.IntEnum):
21+
22+
class FinishReason(enum.IntEnum):
1923
"""
2024
Reason a request finished - stop, length, or abort.
2125
26+
Int rather than Str for more compact serialization.
27+
2228
stop - a stop string was emitted
2329
length - max_tokens was consumed, or max_model_len was reached
2430
abort - aborted for another reason
@@ -29,7 +35,7 @@ class RequestFinishedReason(enum.IntEnum):
2935
ABORT = 2
3036

3137
def __str__(self):
32-
return self.name.lower()
38+
return FINISH_REASON_STRINGS[self.value]
3339

3440

3541
@dataclass
@@ -62,7 +68,7 @@ class EngineCoreOutput(
6268
request_id: str
6369
new_token_ids: List[int]
6470
finished: bool
65-
finish_reason: Optional[RequestFinishedReason] = None
71+
finish_reason: Optional[FinishReason] = None
6672
stop_reason: Union[int, str, None] = None
6773

6874

vllm/v1/engine/detokenizer.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
from vllm.sampling_params import RequestOutputKind
99
from vllm.transformers_utils.detokenizer_utils import (
1010
AnyTokenizer, convert_prompt_ids_to_tokens, detokenize_incrementally)
11-
from vllm.v1.engine import (EngineCoreOutput, EngineCoreRequest,
12-
RequestFinishedReason)
11+
from vllm.v1.engine import EngineCoreOutput, EngineCoreRequest, FinishReason
1312

1413
logger = init_logger(__name__)
1514

@@ -19,7 +18,7 @@ class DetokenizerOutput:
1918
output_text: str
2019
token_ids: List[int]
2120
finished: bool
22-
finish_reason: Optional[RequestFinishedReason] = None
21+
finish_reason: Optional[FinishReason] = None
2322
stop_reason: Union[int, str, None] = None
2423

2524

@@ -148,7 +147,7 @@ def update_from_output(
148147
stop_str, truncate_to = stop
149148
if truncate_to != -1:
150149
self.output_text = self.output_text[:truncate_to]
151-
finish_reason = RequestFinishedReason.STOP
150+
finish_reason = FinishReason.STOP
152151
stop_reason = stop_str
153152

154153
# TODO: handle stop_token_ids here too?

vllm/v1/metrics/loggers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from vllm.config import ModelConfig
1111
from vllm.logger import init_logger
12-
from vllm.v1.engine import RequestFinishedReason
12+
from vllm.v1.engine import FinishReason
1313
from vllm.v1.metrics.stats import IterationStats, SchedulerStats
1414

1515
logger = init_logger(__name__)
@@ -117,13 +117,13 @@ def __init__(self, model_config: ModelConfig):
117117
documentation="Number of generation tokens processed.",
118118
labelnames=labelnames).labels(*labelvalues)
119119

120-
self.counter_request_success: Dict[RequestFinishedReason,
120+
self.counter_request_success: Dict[FinishReason,
121121
prometheus_client.Counter] = {}
122122
counter_request_success_base = prometheus_client.Counter(
123123
name="vllm:request_success_total",
124124
documentation="Count of successfully processed requests.",
125125
labelnames=labelnames + ["finished_reason"])
126-
for reason in RequestFinishedReason:
126+
for reason in FinishReason:
127127
self.counter_request_success[
128128
reason] = counter_request_success_base.labels(*(labelvalues +
129129
[str(reason)]))

vllm/v1/metrics/stats.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
if TYPE_CHECKING:
88
from vllm.outputs import RequestOutput
9-
from vllm.v1.engine import EngineCoreOutput, RequestFinishedReason
9+
from vllm.v1.engine import EngineCoreOutput, FinishReason
1010

1111

1212
@dataclass
@@ -32,7 +32,7 @@ class RequestStateStats:
3232
class FinishedRequestStats:
3333
"""Stats associated with a finished request."""
3434

35-
finish_reason: "RequestFinishedReason"
35+
finish_reason: "FinishReason"
3636
num_prompt_tokens: int = 0
3737
num_generation_tokens: int = 0
3838

@@ -74,8 +74,7 @@ def update_from_output(self, output: "EngineCoreOutput",
7474
request_state_stats.num_generation_tokens += num_new_generation_tokens
7575
request_state_stats.last_token_time = now
7676

77-
def update_from_finished_request(self,
78-
finish_reason: "RequestFinishedReason",
77+
def update_from_finished_request(self, finish_reason: "FinishReason",
7978
request_output: "RequestOutput",
8079
request_state_stats: RequestStateStats):
8180
self.finished_requests.append(

vllm/v1/request.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from vllm.lora.request import LoRARequest
77
from vllm.sampling_params import SamplingParams
88
from vllm.sequence import RequestMetrics
9-
from vllm.v1.engine import EngineCoreRequest, RequestFinishedReason
9+
from vllm.v1.engine import EngineCoreRequest, FinishReason
1010
from vllm.v1.utils import ConstantList
1111

1212
if TYPE_CHECKING:
@@ -109,7 +109,7 @@ def num_output_tokens(self) -> int:
109109
def is_finished(self) -> bool:
110110
return RequestStatus.is_finished(self.status)
111111

112-
def get_finished_reason(self) -> Union[RequestFinishedReason, None]:
112+
def get_finished_reason(self) -> Union[FinishReason, None]:
113113
return RequestStatus.get_finished_reason(self.status)
114114

115115
def has_encoder_inputs(self) -> bool:
@@ -150,7 +150,7 @@ def is_finished(status: "RequestStatus") -> bool:
150150

151151
@staticmethod
152152
def get_finished_reason(
153-
status: "RequestStatus") -> Union[RequestFinishedReason, None]:
153+
status: "RequestStatus") -> Union[FinishReason, None]:
154154
return _FINISHED_REASON_MAP.get(status)
155155

156156

@@ -159,8 +159,8 @@ def get_finished_reason(
159159
# are longer than the model's length cap. Therefore, the stop
160160
# reason should also be "length" as in OpenAI API.
161161
_FINISHED_REASON_MAP = {
162-
RequestStatus.FINISHED_STOPPED: RequestFinishedReason.STOP,
163-
RequestStatus.FINISHED_LENGTH_CAPPED: RequestFinishedReason.LENGTH,
164-
RequestStatus.FINISHED_ABORTED: RequestFinishedReason.ABORT,
165-
RequestStatus.FINISHED_IGNORED: RequestFinishedReason.LENGTH,
162+
RequestStatus.FINISHED_STOPPED: FinishReason.STOP,
163+
RequestStatus.FINISHED_LENGTH_CAPPED: FinishReason.LENGTH,
164+
RequestStatus.FINISHED_ABORTED: FinishReason.ABORT,
165+
RequestStatus.FINISHED_IGNORED: FinishReason.LENGTH,
166166
}

0 commit comments

Comments
 (0)