diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 37134cfb3da..bad5d527325 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -2,12 +2,18 @@ import json import time +import sys from collections.abc import (AsyncGenerator, Iterable, Iterator, Mapping, Sequence) from concurrent.futures.thread import ThreadPoolExecutor from http import HTTPStatus from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional, - TypedDict, TypeVar, Union) + TypeVar, Union) + +if sys.version_info < (3, 12): + from typing_extensions import TypedDict +else: + from typing import TypedDict from fastapi import Request from pydantic import BaseModel, ConfigDict, Field