neuralmagic · markurtz · Jun 24, 2025 · May 1, 2025 · May 1, 2025 · May 1, 2025
diff --git a/.gitignore b/.gitignore
@@ -179,6 +179,9 @@ cython_debug/
 *.json
 *.yaml
 
+# But not scenarios
+!src/guidellm/benchmark/scenarios/*.json
+!src/guidellm/benchmark/scenarios/*.yaml
 
 # UI Section - Next.js/React application under src/ui/
 # dependencies

diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -1,56 +1,55 @@
 import asyncio
 import codecs
-import json
 from pathlib import Path
 from typing import get_args
 
 import click
+from pydantic import ValidationError
 
 from guidellm.backend import BackendType
-from guidellm.benchmark import ProfileType, benchmark_generative_text
+from guidellm.benchmark import ProfileType
+from guidellm.benchmark.entrypoints import benchmark_with_scenario
+from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 from guidellm.config import print_config
 from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
 from guidellm.scheduler import StrategyType
+from guidellm.utils import cli as cli_tools
 
 STRATEGY_PROFILE_CHOICES = set(
     list(get_args(ProfileType)) + list(get_args(StrategyType))
 )
 
 
-def parse_json(ctx, param, value):  # noqa: ARG001
-    if value is None:
-        return None
-    try:
-        return json.loads(value)
-    except json.JSONDecodeError as err:
-        raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
-
-
-def parse_number_str(ctx, param, value):  # noqa: ARG001
-    if value is None:
-        return None
-
-    values = value.split(",") if "," in value else [value]
-
-    try:
-        return [float(val) for val in values]
-    except ValueError as err:
-        raise click.BadParameter(
-            f"{param.name} must be a number or comma-separated list of numbers."
-        ) from err
-
-
 @click.group()
 def cli():
     pass
 
 
 @cli.command(
-    help="Run a benchmark against a generative model using the specified arguments."
+    help="Run a benchmark against a generative model using the specified arguments.",
+    context_settings={"auto_envvar_prefix": "GUIDELLM"},
+)
+@click.option(
+    "--scenario",
+    type=cli_tools.Union(
+        click.Path(
+            exists=True,
+            readable=True,
+            file_okay=True,
+            dir_okay=False,
+            path_type=Path,  # type: ignore[type-var]
+        ),
+        click.Choice(get_builtin_scenarios()),
+    ),
+    default=None,
+    help=(
+        "The name of a builtin scenario or path to a config file. "
+        "Missing values from the config will use defaults. "
+        "Options specified on the commandline will override the scenario."
+    ),
 )
 @click.option(
     "--target",
-    required=True,
     type=str,
     help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
 )
@@ -61,20 +60,20 @@ def cli():
         "The type of backend to use to run requests against. Defaults to 'openai_http'."
         f" Supported types: {', '.join(get_args(BackendType))}"
     ),
-    default="openai_http",
+    default=GenerativeTextScenario.get_default("backend_type"),
 )
 @click.option(
     "--backend-args",
-    callback=parse_json,
-    default=None,
+    callback=cli_tools.parse_json,
+    default=GenerativeTextScenario.get_default("backend_args"),
     help=(
         "A JSON string containing any arguments to pass to the backend as a "
         "dict with **kwargs."
     ),
 )
 @click.option(
     "--model",
-    default=None,
+    default=GenerativeTextScenario.get_default("model"),
     type=str,
     help=(
         "The ID of the model to benchmark within the backend. "
@@ -83,7 +82,7 @@ def cli():
 )
 @click.option(
     "--processor",
-    default=None,
+    default=GenerativeTextScenario.get_default("processor"),
     type=str,
     help=(
         "The processor or tokenizer to use to calculate token counts for statistics "
@@ -93,16 +92,15 @@ def cli():
 )
 @click.option(
     "--processor-args",
-    default=None,
-    callback=parse_json,
+    default=GenerativeTextScenario.get_default("processor_args"),
+    callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the processor constructor "
         "as a dict with **kwargs."
     ),
 )
 @click.option(
     "--data",
-    required=True,
     type=str,
     help=(
         "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
@@ -112,15 +110,16 @@ def cli():
 )
 @click.option(
     "--data-args",
-    callback=parse_json,
+    default=GenerativeTextScenario.get_default("data_args"),
+    callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the dataset creation "
         "as a dict with **kwargs."
     ),
 )
 @click.option(
     "--data-sampler",
-    default=None,
+    default=GenerativeTextScenario.get_default("data_sampler"),
     type=click.Choice(["random"]),
     help=(
         "The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -129,7 +128,6 @@ def cli():
 )
 @click.option(
     "--rate-type",
-    required=True,
     type=click.Choice(STRATEGY_PROFILE_CHOICES),
     help=(
         "The type of benchmark to run. "
@@ -138,8 +136,7 @@ def cli():
 )
 @click.option(
     "--rate",
-    default=None,
-    callback=parse_number_str,
+    default=GenerativeTextScenario.get_default("rate"),
     help=(
         "The rates to run the benchmark at. "
         "Can be a single number or a comma-separated list of numbers. "
@@ -152,6 +149,7 @@ def cli():
 @click.option(
     "--max-seconds",
     type=float,
+    default=GenerativeTextScenario.get_default("max_seconds"),
     help=(
         "The maximum number of seconds each benchmark can run for. "
         "If None, will run until max_requests or the data is exhausted."
@@ -160,6 +158,7 @@ def cli():
 @click.option(
     "--max-requests",
     type=int,
+    default=GenerativeTextScenario.get_default("max_requests"),
     help=(
         "The maximum number of requests each benchmark can run for. "
         "If None, will run until max_seconds or the data is exhausted."
@@ -168,7 +167,7 @@ def cli():
 @click.option(
     "--warmup-percent",
     type=float,
-    default=None,
+    default=GenerativeTextScenario.get_default("warmup_percent"),
     help=(
         "The percent of the benchmark (based on max-seconds, max-requets, "
         "or lenth of dataset) to run as a warmup and not include in the final results. "
@@ -178,6 +177,7 @@ def cli():
 @click.option(
     "--cooldown-percent",
     type=float,
+    default=GenerativeTextScenario.get_default("cooldown_percent"),
     help=(
         "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
         "of dataset) to run as a cooldown and not include in the final results. "
@@ -212,7 +212,7 @@ def cli():
 )
 @click.option(
     "--output-extras",
-    callback=parse_json,
+    callback=cli_tools.parse_json,
     help="A JSON string of extra data to save with the output benchmarks",
 )
 @click.option(
@@ -222,15 +222,16 @@ def cli():
         "The number of samples to save in the output file. "
         "If None (default), will save all samples."
     ),
-    default=None,
+    default=GenerativeTextScenario.get_default("output_sampling"),
 )
 @click.option(
     "--random-seed",
-    default=42,
+    default=GenerativeTextScenario.get_default("random_seed"),
     type=int,
     help="The random seed to use for benchmarking to ensure reproducibility.",
 )
 def benchmark(
+    scenario,
     target,
     backend_type,
     backend_args,
@@ -254,30 +255,53 @@ def benchmark(
     output_sampling,
     random_seed,
 ):
+    click_ctx = click.get_current_context()
+
+    overrides = cli_tools.set_if_not_default(
+        click_ctx,
+        target=target,
+        backend_type=backend_type,
+        backend_args=backend_args,
+        model=model,
+        processor=processor,
+        processor_args=processor_args,
+        data=data,
+        data_args=data_args,
+        data_sampler=data_sampler,
+        rate_type=rate_type,
+        rate=rate,
+        max_seconds=max_seconds,
+        max_requests=max_requests,
+        warmup_percent=warmup_percent,
+        cooldown_percent=cooldown_percent,
+        output_sampling=output_sampling,
+        random_seed=random_seed,
+    )
+
+    try:
+        # If a scenario file was specified read from it
+        if scenario is None:
+            _scenario = GenerativeTextScenario.model_validate(overrides)
+        elif isinstance(scenario, Path):
+            _scenario = GenerativeTextScenario.from_file(scenario, overrides)
+        else:  # Only builtins can make it here; click will catch anything else
+            _scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
+    except ValidationError as e:
+        # Translate pydantic valdation error to click argument error
+        errs = e.errors(include_url=False, include_context=True, include_input=True)
+        param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
+        raise click.BadParameter(
+            errs[0]["msg"], ctx=click_ctx, param_hint=param_name
+        ) from e
+
     asyncio.run(
-        benchmark_generative_text(
-            target=target,
-            backend_type=backend_type,
-            backend_args=backend_args,
-            model=model,
-            processor=processor,
-            processor_args=processor_args,
-            data=data,
-            data_args=data_args,
-            data_sampler=data_sampler,
-            rate_type=rate_type,
-            rate=rate,
-            max_seconds=max_seconds,
-            max_requests=max_requests,
-            warmup_percent=warmup_percent,
-            cooldown_percent=cooldown_percent,
+        benchmark_with_scenario(
+            scenario=_scenario,
             show_progress=not disable_progress,
             show_progress_scheduler_stats=display_scheduler_stats,
             output_console=not disable_console_outputs,
             output_path=output_path,
             output_extras=output_extras,
-            output_sampling=output_sampling,
-            random_seed=random_seed,
         )
     )
 
@@ -316,7 +340,8 @@ def preprocess():
         "Convert a dataset to have specific prompt and output token sizes.\n"
         "DATA: Path to the input dataset or dataset ID.\n"
         "OUTPUT_PATH: Path to save the converted dataset, including file suffix."
-    )
+    ),
+    context_settings={"auto_envvar_prefix": "GUIDELLM"},
 )
 @click.argument(
     "data",
@@ -340,15 +365,15 @@ def preprocess():
 @click.option(
     "--processor-args",
     default=None,
-    callback=parse_json,
+    callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the processor constructor "
         "as a dict with **kwargs."
     ),
 )
 @click.option(
     "--data-args",
-    callback=parse_json,
+    callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the dataset creation "
         "as a dict with **kwargs."

diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -15,10 +15,22 @@
 )
 from guidellm.benchmark.profile import ProfileType, create_profile
 from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
+from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
 from guidellm.request import GenerativeRequestLoader
 from guidellm.scheduler import StrategyType
 
 
+async def benchmark_with_scenario(scenario: Scenario, **kwargs):
+    """
+    Run a benchmark using a scenario and specify any extra arguments
+    """
+
+    if isinstance(scenario, GenerativeTextScenario):
+        return await benchmark_generative_text(**vars(scenario), **kwargs)
+    else:
+        raise ValueError(f"Unsupported Scenario type {type(scenario)}")
+
+
 async def benchmark_generative_text(
     target: str,
     backend_type: BackendType,
@@ -43,13 +55,13 @@ async def benchmark_generative_text(
     max_requests: Optional[int],
     warmup_percent: Optional[float],
     cooldown_percent: Optional[float],
-    show_progress: bool,
-    show_progress_scheduler_stats: bool,
-    output_console: bool,
     output_path: Optional[Union[str, Path]],
     output_extras: Optional[dict[str, Any]],
     output_sampling: Optional[int],
     random_seed: int,
+    show_progress: bool = True,
+    show_progress_scheduler_stats: bool = False,
+    output_console: bool = True,
 ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
     console = GenerativeBenchmarksConsole(enabled=show_progress)
     console.print_line("Creating backend...")