diff --git a/.gitignore b/.gitignore index d4186ed6..2010f626 100644 --- a/.gitignore +++ b/.gitignore @@ -178,3 +178,7 @@ cython_debug/ # Project specific files *.json *.yaml + +# But not scenarios +!src/guidellm/benchmark/scenarios/*.json +!src/guidellm/benchmark/scenarios/*.yaml diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index d81b7ddf..4b059655 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -1,43 +1,23 @@ import asyncio -import json from pathlib import Path from typing import get_args import click +from pydantic import ValidationError from guidellm.backend import BackendType -from guidellm.benchmark import ProfileType, benchmark_generative_text +from guidellm.benchmark import ProfileType +from guidellm.benchmark.entrypoints import benchmark_with_scenario +from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios from guidellm.config import print_config from guidellm.scheduler import StrategyType +from guidellm.utils import cli as cli_tools STRATEGY_PROFILE_CHOICES = set( list(get_args(ProfileType)) + list(get_args(StrategyType)) ) -def parse_json(ctx, param, value): # noqa: ARG001 - if value is None: - return None - try: - return json.loads(value) - except json.JSONDecodeError as err: - raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err - - -def parse_number_str(ctx, param, value): # noqa: ARG001 - if value is None: - return None - - values = value.split(",") if "," in value else [value] - - try: - return [float(val) for val in values] - except ValueError as err: - raise click.BadParameter( - f"{param.name} must be a number or comma-separated list of numbers." - ) from err - - @click.group() def cli(): pass @@ -46,9 +26,27 @@ def cli(): @cli.command( help="Run a benchmark against a generative model using the specified arguments." ) +@click.option( + "--scenario", + type=cli_tools.Union( + click.Path( + exists=True, + readable=True, + file_okay=True, + dir_okay=False, + path_type=Path, # type: ignore[type-var] + ), + click.Choice(get_builtin_scenarios()), + ), + default=None, + help=( + "The name of a builtin scenario or path to a config file. " + "Missing values from the config will use defaults. " + "Options specified on the commandline will override the scenario." + ), +) @click.option( "--target", - required=True, type=str, help="The target path for the backend to run benchmarks against. For example, http://localhost:8000", ) @@ -59,12 +57,12 @@ def cli(): "The type of backend to use to run requests against. Defaults to 'openai_http'." f" Supported types: {', '.join(get_args(BackendType))}" ), - default="openai_http", + default=GenerativeTextScenario.get_default("backend_type"), ) @click.option( "--backend-args", - callback=parse_json, - default=None, + callback=cli_tools.parse_json, + default=GenerativeTextScenario.get_default("backend_args"), help=( "A JSON string containing any arguments to pass to the backend as a " "dict with **kwargs." @@ -72,7 +70,7 @@ def cli(): ) @click.option( "--model", - default=None, + default=GenerativeTextScenario.get_default("model"), type=str, help=( "The ID of the model to benchmark within the backend. " @@ -81,7 +79,7 @@ def cli(): ) @click.option( "--processor", - default=None, + default=GenerativeTextScenario.get_default("processor"), type=str, help=( "The processor or tokenizer to use to calculate token counts for statistics " @@ -91,8 +89,8 @@ def cli(): ) @click.option( "--processor-args", - default=None, - callback=parse_json, + default=GenerativeTextScenario.get_default("processor_args"), + callback=cli_tools.parse_json, help=( "A JSON string containing any arguments to pass to the processor constructor " "as a dict with **kwargs." @@ -100,7 +98,6 @@ def cli(): ) @click.option( "--data", - required=True, type=str, help=( "The HuggingFace dataset ID, a path to a HuggingFace dataset, " @@ -110,7 +107,8 @@ def cli(): ) @click.option( "--data-args", - callback=parse_json, + default=GenerativeTextScenario.get_default("data_args"), + callback=cli_tools.parse_json, help=( "A JSON string containing any arguments to pass to the dataset creation " "as a dict with **kwargs." @@ -118,7 +116,7 @@ def cli(): ) @click.option( "--data-sampler", - default=None, + default=GenerativeTextScenario.get_default("data_sampler"), type=click.Choice(["random"]), help=( "The data sampler type to use. 'random' will add a random shuffle on the data. " @@ -127,7 +125,6 @@ def cli(): ) @click.option( "--rate-type", - required=True, type=click.Choice(STRATEGY_PROFILE_CHOICES), help=( "The type of benchmark to run. " @@ -136,8 +133,7 @@ def cli(): ) @click.option( "--rate", - default=None, - callback=parse_number_str, + default=GenerativeTextScenario.get_default("rate"), help=( "The rates to run the benchmark at. " "Can be a single number or a comma-separated list of numbers. " @@ -150,6 +146,7 @@ def cli(): @click.option( "--max-seconds", type=float, + default=GenerativeTextScenario.get_default("max_seconds"), help=( "The maximum number of seconds each benchmark can run for. " "If None, will run until max_requests or the data is exhausted." @@ -158,6 +155,7 @@ def cli(): @click.option( "--max-requests", type=int, + default=GenerativeTextScenario.get_default("max_requests"), help=( "The maximum number of requests each benchmark can run for. " "If None, will run until max_seconds or the data is exhausted." @@ -166,7 +164,7 @@ def cli(): @click.option( "--warmup-percent", type=float, - default=None, + default=GenerativeTextScenario.get_default("warmup_percent"), help=( "The percent of the benchmark (based on max-seconds, max-requets, " "or lenth of dataset) to run as a warmup and not include in the final results. " @@ -176,6 +174,7 @@ def cli(): @click.option( "--cooldown-percent", type=float, + default=GenerativeTextScenario.get_default("cooldown_percent"), help=( "The percent of the benchmark (based on max-seconds, max-requets, or lenth " "of dataset) to run as a cooldown and not include in the final results. " @@ -210,7 +209,7 @@ def cli(): ) @click.option( "--output-extras", - callback=parse_json, + callback=cli_tools.parse_json, help="A JSON string of extra data to save with the output benchmarks", ) @click.option( @@ -220,15 +219,16 @@ def cli(): "The number of samples to save in the output file. " "If None (default), will save all samples." ), - default=None, + default=GenerativeTextScenario.get_default("output_sampling"), ) @click.option( "--random-seed", - default=42, + default=GenerativeTextScenario.get_default("random_seed"), type=int, help="The random seed to use for benchmarking to ensure reproducibility.", ) def benchmark( + scenario, target, backend_type, backend_args, @@ -252,30 +252,53 @@ def benchmark( output_sampling, random_seed, ): + click_ctx = click.get_current_context() + + overrides = cli_tools.set_if_not_default( + click_ctx, + target=target, + backend_type=backend_type, + backend_args=backend_args, + model=model, + processor=processor, + processor_args=processor_args, + data=data, + data_args=data_args, + data_sampler=data_sampler, + rate_type=rate_type, + rate=rate, + max_seconds=max_seconds, + max_requests=max_requests, + warmup_percent=warmup_percent, + cooldown_percent=cooldown_percent, + output_sampling=output_sampling, + random_seed=random_seed, + ) + + try: + # If a scenario file was specified read from it + if scenario is None: + _scenario = GenerativeTextScenario.model_validate(overrides) + elif isinstance(scenario, Path): + _scenario = GenerativeTextScenario.from_file(scenario, overrides) + else: # Only builtins can make it here; click will catch anything else + _scenario = GenerativeTextScenario.from_builtin(scenario, overrides) + except ValidationError as e: + # Translate pydantic valdation error to click argument error + errs = e.errors(include_url=False, include_context=True, include_input=True) + param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-") + raise click.BadParameter( + errs[0]["msg"], ctx=click_ctx, param_hint=param_name + ) from e + asyncio.run( - benchmark_generative_text( - target=target, - backend_type=backend_type, - backend_args=backend_args, - model=model, - processor=processor, - processor_args=processor_args, - data=data, - data_args=data_args, - data_sampler=data_sampler, - rate_type=rate_type, - rate=rate, - max_seconds=max_seconds, - max_requests=max_requests, - warmup_percent=warmup_percent, - cooldown_percent=cooldown_percent, + benchmark_with_scenario( + scenario=_scenario, show_progress=not disable_progress, show_progress_scheduler_stats=display_scheduler_stats, output_console=not disable_console_outputs, output_path=output_path, output_extras=output_extras, - output_sampling=output_sampling, - random_seed=random_seed, ) ) diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 2f6c7182..ce43fca3 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -15,10 +15,22 @@ ) from guidellm.benchmark.profile import ProfileType, create_profile from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay +from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario from guidellm.request import GenerativeRequestLoader from guidellm.scheduler import StrategyType +async def benchmark_with_scenario(scenario: Scenario, **kwargs): + """ + Run a benchmark using a scenario and specify any extra arguments + """ + + if isinstance(scenario, GenerativeTextScenario): + return await benchmark_generative_text(**vars(scenario), **kwargs) + else: + raise ValueError(f"Unsupported Scenario type {type(scenario)}") + + async def benchmark_generative_text( target: str, backend_type: BackendType, @@ -43,13 +55,13 @@ async def benchmark_generative_text( max_requests: Optional[int], warmup_percent: Optional[float], cooldown_percent: Optional[float], - show_progress: bool, - show_progress_scheduler_stats: bool, - output_console: bool, output_path: Optional[Union[str, Path]], output_extras: Optional[dict[str, Any]], output_sampling: Optional[int], random_seed: int, + show_progress: bool = True, + show_progress_scheduler_stats: bool = False, + output_console: bool = True, ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]: console = GenerativeBenchmarksConsole(enabled=show_progress) console.print_line("Creating backend...") diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py new file mode 100644 index 00000000..af43e426 --- /dev/null +++ b/src/guidellm/benchmark/scenario.py @@ -0,0 +1,104 @@ +from collections.abc import Iterable +from functools import cache +from pathlib import Path +from typing import Annotated, Any, Literal, Optional, TypeVar, Union + +from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict +from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt +from transformers.tokenization_utils_base import ( # type: ignore[import] + PreTrainedTokenizerBase, +) + +from guidellm.backend.backend import BackendType +from guidellm.benchmark.profile import ProfileType +from guidellm.objects.pydantic import StandardBaseModel +from guidellm.scheduler.strategy import StrategyType + +__ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"] + +SCENARIO_DIR = Path(__file__).parent / "scenarios/" + + +@cache +def get_builtin_scenarios() -> list[str]: + """Returns list of builtin scenario names.""" + return [p.stem for p in SCENARIO_DIR.glob("*.json")] + + +def parse_float_list(value: Union[str, float, list[float]]) -> list[float]: + """ + Parse a comma separated string to a list of float + or convert single float list of one or pass float + list through. + """ + if isinstance(value, (int, float)): + return [value] + elif isinstance(value, list): + return value + + values = value.split(",") if "," in value else [value] + + try: + return [float(val) for val in values] + except ValueError as err: + raise ValueError( + "must be a number or comma-separated list of numbers." + ) from err + + +T = TypeVar("T", bound="Scenario") + + +class Scenario(StandardBaseModel): + """ + Parent Scenario class with common options for all benchmarking types. + """ + + target: str + + @classmethod + def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T: + filename = SCENARIO_DIR / f"{name}.json" + + if not filename.is_file(): + raise ValueError(f"{name} is not a valid builtin scenario") + + return cls.from_file(filename, overrides) + + +class GenerativeTextScenario(Scenario): + """ + Scenario class for generative text benchmarks. + """ + + class Config: + # NOTE: This prevents errors due to unvalidatable + # types like PreTrainedTokenizerBase + arbitrary_types_allowed = True + + backend_type: BackendType = "openai_http" + backend_args: Optional[dict[str, Any]] = None + model: Optional[str] = None + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None + processor_args: Optional[dict[str, Any]] = None + data: Union[ + str, + Path, + Iterable[Union[str, dict[str, Any]]], + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, + ] + data_args: Optional[dict[str, Any]] = None + data_sampler: Optional[Literal["random"]] = None + rate_type: Union[StrategyType, ProfileType] + rate: Annotated[ + Optional[list[PositiveFloat]], BeforeValidator(parse_float_list) + ] = None + max_seconds: Optional[PositiveFloat] = None + max_requests: Optional[PositiveInt] = None + warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None + cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None + output_sampling: Optional[NonNegativeInt] = None + random_seed: int = 42 diff --git a/src/guidellm/benchmark/scenarios/__init__.py b/src/guidellm/benchmark/scenarios/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/guidellm/benchmark/scenarios/chat.json b/src/guidellm/benchmark/scenarios/chat.json new file mode 100644 index 00000000..024438c5 --- /dev/null +++ b/src/guidellm/benchmark/scenarios/chat.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 512, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 1024, + "output_tokens": 256, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 1024 + } +} diff --git a/src/guidellm/benchmark/scenarios/rag.json b/src/guidellm/benchmark/scenarios/rag.json new file mode 100644 index 00000000..c7ee2f27 --- /dev/null +++ b/src/guidellm/benchmark/scenarios/rag.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 4096, + "prompt_tokens_stdev": 512, + "prompt_tokens_min": 2048, + "prompt_tokens_max": 6144, + "output_tokens": 512, + "output_tokens_stdev": 128, + "output_tokens_min": 1, + "output_tokens_max": 1024 + } +} diff --git a/src/guidellm/objects/pydantic.py b/src/guidellm/objects/pydantic.py index 8365be33..f4b1e2da 100644 --- a/src/guidellm/objects/pydantic.py +++ b/src/guidellm/objects/pydantic.py @@ -1,10 +1,15 @@ -from typing import Any, Generic, TypeVar +import json +from pathlib import Path +from typing import Any, Generic, Optional, TypeVar +import yaml from loguru import logger from pydantic import BaseModel, ConfigDict, Field __all__ = ["StandardBaseModel", "StatusBreakdown"] +T = TypeVar("T", bound="StandardBaseModel") + class StandardBaseModel(BaseModel): """ @@ -27,6 +32,30 @@ def __init__(self, /, **data: Any) -> None: data, ) + @classmethod + def get_default(cls: type[T], field: str) -> Any: + """Get default values for model fields""" + return cls.model_fields[field].default + + @classmethod + def from_file(cls: type[T], filename: Path, overrides: Optional[dict] = None) -> T: + """ + Attempt to create a new instance of the model using + data loaded from json or yaml file. + """ + try: + with filename.open() as f: + if str(filename).endswith((".yaml", ".yml")): + data = yaml.safe_load(f) + else: # Assume everything else is json + data = json.load(f) + except (json.JSONDecodeError, yaml.YAMLError) as e: + logger.error(f"Failed to parse {filename} as type {cls.__name__}") + raise ValueError(f"Error when parsing file: {filename}") from e + + data.update(overrides) + return cls.model_validate(data) + SuccessfulT = TypeVar("SuccessfulT") ErroredT = TypeVar("ErroredT") diff --git a/src/guidellm/utils/cli.py b/src/guidellm/utils/cli.py new file mode 100644 index 00000000..9af6359b --- /dev/null +++ b/src/guidellm/utils/cli.py @@ -0,0 +1,63 @@ +import json +from typing import Any + +import click + + +def parse_json(ctx, param, value): # noqa: ARG001 + if value is None: + return None + try: + return json.loads(value) + except json.JSONDecodeError as err: + raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err + + +def set_if_not_default(ctx: click.Context, **kwargs) -> dict[str, Any]: + """ + Set the value of a click option if it is not the default value. + This is useful for setting options that are not None by default. + """ + values = {} + for k, v in kwargs.items(): + if ctx.get_parameter_source(k) != click.core.ParameterSource.DEFAULT: # type: ignore[attr-defined] + values[k] = v + + return values + + +class Union(click.ParamType): + """ + A custom click parameter type that allows for multiple types to be accepted. + """ + + def __init__(self, *types: click.ParamType): + self.types = types + self.name = "".join(t.name for t in types) + + def convert(self, value, param, ctx): + fails = [] + for t in self.types: + try: + return t.convert(value, param, ctx) + except click.BadParameter as e: + fails.append(str(e)) + continue + + self.fail("; ".join(fails) or f"Invalid value: {value}") # noqa: RET503 + + + def get_metavar(self, param: click.Parameter) -> str: + def get_choices(t: click.ParamType) -> str: + meta = t.get_metavar(param) + return meta if meta is not None else t.name + + # Get the choices for each type in the union. + choices_str = "|".join(map(get_choices, self.types)) + + # Use curly braces to indicate a required argument. + if param.required and param.param_type_name == "argument": + return f"{{{choices_str}}}" + + # Use square braces to indicate an option or optional argument. + return f"[{choices_str}]"