6
6
from vllm .lora .request import LoRARequest
7
7
from vllm .sampling_params import SamplingParams
8
8
from vllm .sequence import RequestMetrics
9
- from vllm .v1 .engine import EngineCoreRequest , RequestFinishedReason
9
+ from vllm .v1 .engine import EngineCoreRequest , FinishReason
10
10
from vllm .v1 .utils import ConstantList
11
11
12
12
if TYPE_CHECKING :
@@ -109,7 +109,7 @@ def num_output_tokens(self) -> int:
109
109
def is_finished (self ) -> bool :
110
110
return RequestStatus .is_finished (self .status )
111
111
112
- def get_finished_reason (self ) -> Union [RequestFinishedReason , None ]:
112
+ def get_finished_reason (self ) -> Union [FinishReason , None ]:
113
113
return RequestStatus .get_finished_reason (self .status )
114
114
115
115
def has_encoder_inputs (self ) -> bool :
@@ -150,7 +150,7 @@ def is_finished(status: "RequestStatus") -> bool:
150
150
151
151
@staticmethod
152
152
def get_finished_reason (
153
- status : "RequestStatus" ) -> Union [RequestFinishedReason , None ]:
153
+ status : "RequestStatus" ) -> Union [FinishReason , None ]:
154
154
return _FINISHED_REASON_MAP .get (status )
155
155
156
156
@@ -159,8 +159,8 @@ def get_finished_reason(
159
159
# are longer than the model's length cap. Therefore, the stop
160
160
# reason should also be "length" as in OpenAI API.
161
161
_FINISHED_REASON_MAP = {
162
- RequestStatus .FINISHED_STOPPED : RequestFinishedReason .STOP ,
163
- RequestStatus .FINISHED_LENGTH_CAPPED : RequestFinishedReason .LENGTH ,
164
- RequestStatus .FINISHED_ABORTED : RequestFinishedReason .ABORT ,
165
- RequestStatus .FINISHED_IGNORED : RequestFinishedReason .LENGTH ,
162
+ RequestStatus .FINISHED_STOPPED : FinishReason .STOP ,
163
+ RequestStatus .FINISHED_LENGTH_CAPPED : FinishReason .LENGTH ,
164
+ RequestStatus .FINISHED_ABORTED : FinishReason .ABORT ,
165
+ RequestStatus .FINISHED_IGNORED : FinishReason .LENGTH ,
166
166
}
0 commit comments