Skip to content

RFC: Benchmarking scenarios #99

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 74 additions & 44 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
from typing import get_args

import click
from pydantic import ValidationError

from guidellm.backend import BackendType
from guidellm.benchmark import ProfileType, benchmark_generative_text
from guidellm.benchmark import ProfileType
from guidellm.benchmark.entrypoints import benchmark_with_scenario
from guidellm.benchmark.scenario import GenerativeTextScenario
from guidellm.config import print_config
from guidellm.scheduler import StrategyType

Expand All @@ -24,18 +27,17 @@ def parse_json(ctx, param, value): # noqa: ARG001
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err


def parse_number_str(ctx, param, value): # noqa: ARG001
if value is None:
return None

values = value.split(",") if "," in value else [value]
def set_if_not_default(ctx: click.Context, **kwargs):
"""
Set the value of a click option if it is not the default value.
This is useful for setting options that are not None by default.
"""
values = {}
for k, v in kwargs.items():
if ctx.get_parameter_source(k) != click.core.ParameterSource.DEFAULT:
values[k] = v

try:
return [float(val) for val in values]
except ValueError as err:
raise click.BadParameter(
f"{param.name} must be a number or comma-separated list of numbers."
) from err
return values


@click.group()
Expand All @@ -46,9 +48,14 @@ def cli():
@cli.command(
help="Run a benchmark against a generative model using the specified arguments."
)
@click.option(
"--scenario",
type=str,
default=None,
help=("TODO: A scenario or path to config"),
)
@click.option(
"--target",
required=True,
type=str,
help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
)
Expand All @@ -59,20 +66,20 @@ def cli():
"The type of backend to use to run requests against. Defaults to 'openai_http'."
f" Supported types: {', '.join(get_args(BackendType))}"
),
default="openai_http",
default=GenerativeTextScenario.model_fields["backend_type"].default,
)
@click.option(
"--backend-args",
callback=parse_json,
default=None,
default=GenerativeTextScenario.model_fields["backend_args"].default,
help=(
"A JSON string containing any arguments to pass to the backend as a "
"dict with **kwargs."
),
)
@click.option(
"--model",
default=None,
default=GenerativeTextScenario.model_fields["model"].default,
type=str,
help=(
"The ID of the model to benchmark within the backend. "
Expand All @@ -81,7 +88,7 @@ def cli():
)
@click.option(
"--processor",
default=None,
default=GenerativeTextScenario.model_fields["processor"].default,
type=str,
help=(
"The processor or tokenizer to use to calculate token counts for statistics "
Expand All @@ -91,7 +98,7 @@ def cli():
)
@click.option(
"--processor-args",
default=None,
default=GenerativeTextScenario.model_fields["processor_args"].default,
callback=parse_json,
help=(
"A JSON string containing any arguments to pass to the processor constructor "
Expand All @@ -100,7 +107,6 @@ def cli():
)
@click.option(
"--data",
required=True,
type=str,
help=(
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
Expand All @@ -110,6 +116,7 @@ def cli():
)
@click.option(
"--data-args",
default=GenerativeTextScenario.model_fields["data_args"].default,
callback=parse_json,
help=(
"A JSON string containing any arguments to pass to the dataset creation "
Expand All @@ -118,7 +125,7 @@ def cli():
)
@click.option(
"--data-sampler",
default=None,
default=GenerativeTextScenario.model_fields["data_sampler"].default,
type=click.Choice(["random"]),
help=(
"The data sampler type to use. 'random' will add a random shuffle on the data. "
Expand All @@ -127,7 +134,6 @@ def cli():
)
@click.option(
"--rate-type",
required=True,
type=click.Choice(STRATEGY_PROFILE_CHOICES),
help=(
"The type of benchmark to run. "
Expand All @@ -136,8 +142,7 @@ def cli():
)
@click.option(
"--rate",
default=None,
callback=parse_number_str,
default=GenerativeTextScenario.model_fields["rate"].default,
help=(
"The rates to run the benchmark at. "
"Can be a single number or a comma-separated list of numbers. "
Expand All @@ -150,6 +155,7 @@ def cli():
@click.option(
"--max-seconds",
type=float,
default=GenerativeTextScenario.model_fields["max_seconds"].default,
help=(
"The maximum number of seconds each benchmark can run for. "
"If None, will run until max_requests or the data is exhausted."
Expand All @@ -158,6 +164,7 @@ def cli():
@click.option(
"--max-requests",
type=int,
default=GenerativeTextScenario.model_fields["max_requests"].default,
help=(
"The maximum number of requests each benchmark can run for. "
"If None, will run until max_seconds or the data is exhausted."
Expand All @@ -166,7 +173,7 @@ def cli():
@click.option(
"--warmup-percent",
type=float,
default=None,
default=GenerativeTextScenario.model_fields["warmup_percent"].default,
help=(
"The percent of the benchmark (based on max-seconds, max-requets, "
"or lenth of dataset) to run as a warmup and not include in the final results. "
Expand All @@ -176,6 +183,7 @@ def cli():
@click.option(
"--cooldown-percent",
type=float,
default=GenerativeTextScenario.model_fields["cooldown_percent"].default,
help=(
"The percent of the benchmark (based on max-seconds, max-requets, or lenth "
"of dataset) to run as a cooldown and not include in the final results. "
Expand Down Expand Up @@ -220,15 +228,16 @@ def cli():
"The number of samples to save in the output file. "
"If None (default), will save all samples."
),
default=None,
default=GenerativeTextScenario.model_fields["output_sampling"].default,
)
@click.option(
"--random-seed",
default=42,
default=GenerativeTextScenario.model_fields["random_seed"].default,
type=int,
help="The random seed to use for benchmarking to ensure reproducibility.",
)
def benchmark(
scenario,
target,
backend_type,
backend_args,
Expand All @@ -252,30 +261,51 @@ def benchmark(
output_sampling,
random_seed,
):
click_ctx = click.get_current_context()

overrides = set_if_not_default(
click_ctx,
target=target,
backend_type=backend_type,
backend_args=backend_args,
model=model,
processor=processor,
processor_args=processor_args,
data=data,
data_args=data_args,
data_sampler=data_sampler,
rate_type=rate_type,
rate=rate,
max_seconds=max_seconds,
max_requests=max_requests,
warmup_percent=warmup_percent,
cooldown_percent=cooldown_percent,
output_sampling=output_sampling,
random_seed=random_seed,
)

try:
# If a scenario file was specified read from it
if scenario is None:
_scenario = GenerativeTextScenario.model_validate(overrides)
else:
# TODO: Support pre-defined scenarios
_scenario = GenerativeTextScenario.from_file(scenario, overrides)
except ValidationError as e:
errs = e.errors(include_url=False, include_context=True, include_input=True)
param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
raise click.BadParameter(
errs[0]["msg"], ctx=click_ctx, param_hint=param_name
) from e

asyncio.run(
benchmark_generative_text(
target=target,
backend_type=backend_type,
backend_args=backend_args,
model=model,
processor=processor,
processor_args=processor_args,
data=data,
data_args=data_args,
data_sampler=data_sampler,
rate_type=rate_type,
rate=rate,
max_seconds=max_seconds,
max_requests=max_requests,
warmup_percent=warmup_percent,
cooldown_percent=cooldown_percent,
benchmark_with_scenario(
scenario=_scenario,
show_progress=not disable_progress,
show_progress_scheduler_stats=display_scheduler_stats,
output_console=not disable_console_outputs,
output_path=output_path,
output_extras=output_extras,
output_sampling=output_sampling,
random_seed=random_seed,
)
)

Expand Down
14 changes: 14 additions & 0 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,23 @@
)
from guidellm.benchmark.profile import ProfileType, create_profile
from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
from guidellm.request import GenerativeRequestLoader
from guidellm.scheduler import StrategyType

type benchmark_type = Literal["generative_text"]


async def benchmark_with_scenario(scenario: Scenario, **kwargs):
"""
Run a benchmark using a scenario and specify any extra arguments
"""

if isinstance(scenario, GenerativeTextScenario):
return await benchmark_generative_text(**vars(scenario), **kwargs)
else:
raise ValueError(f"Unsupported Scenario type {type(scenario)}")


async def benchmark_generative_text(
target: str,
Expand Down
92 changes: 92 additions & 0 deletions src/guidellm/benchmark/scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import json
from collections.abc import Iterable
from pathlib import Path
from typing import Annotated, Any, Literal, Optional, TypeVar, Union

import yaml
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
from loguru import logger
from pydantic import BeforeValidator, NonNegativeInt, PositiveFloat, PositiveInt
from transformers.tokenization_utils_base import ( # type: ignore[import]
PreTrainedTokenizerBase,
)

from guidellm.backend.backend import BackendType
from guidellm.benchmark.profile import ProfileType
from guidellm.objects.pydantic import StandardBaseModel
from guidellm.scheduler.strategy import StrategyType

__ALL__ = ["Scenario", "GenerativeTextScenario"]


def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
if isinstance(value, (int, float)):
return [value]
elif isinstance(value, list):
return value

values = value.split(",") if "," in value else [value]

try:
return [float(val) for val in values]
except ValueError as err:
raise ValueError(
"must be a number or comma-separated list of numbers."
) from err


T = TypeVar("T", bound="Scenario")


class Scenario(StandardBaseModel):
target: str

@classmethod
def from_file(
cls: type[T], filename: Union[str, Path], overrides: Optional[dict] = None
) -> T:
try:
with open(filename) as f:
if str(filename).endswith(".yaml") or str(filename).endswith(".yml"):
data = yaml.safe_load(f)
else: # Assume everything else is json
data = json.load(f)
except (json.JSONDecodeError, yaml.YAMLError) as e:
logger.error("Failed to parse scenario")
raise e

data.update(overrides)
return cls.model_validate(data)


class GenerativeTextScenario(Scenario):
# FIXME: This solves an issue with Pydantic and class types
class Config:
arbitrary_types_allowed = True

backend_type: BackendType = "openai_http"
backend_args: Optional[dict[str, Any]] = None
model: Optional[str] = None
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
processor_args: Optional[dict[str, Any]] = None
data: Union[
str,
Path,
Iterable[Union[str, dict[str, Any]]],
Dataset,
DatasetDict,
IterableDataset,
IterableDatasetDict,
]
data_args: Optional[dict[str, Any]] = None
data_sampler: Optional[Literal["random"]] = None
rate_type: Union[StrategyType, ProfileType]
rate: Annotated[
Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
] = None
max_seconds: Optional[PositiveFloat] = None
max_requests: Optional[PositiveInt] = None
warmup_percent: Optional[PositiveFloat] = None
cooldown_percent: Optional[PositiveFloat] = None
output_sampling: Optional[NonNegativeInt] = None
random_seed: int = 42
Loading