Merge pull request #26 from pytorch/add-v1-benchmark-script

huydhn · web-flow · commit 149fcbc3cd6f · 2025-02-19T09:20:56.000-08:00
Add a script to run vLLM benchmark
diff --git a/vllm-benchmarks/README.md b/vllm-benchmarks/README.md
@@ -0,0 +1,43 @@
+### Prerequisite
+
+Prepare your HuggingFace token and save it into `HF_TOKEN` environment
+variable. Note that the token needs to accept the terms and conditions
+of all the test models in
+[vLLM](https://github.com/vllm-project/vllm/tree/main/.buildkite/nightly-benchmarks/tests),
+otherwise, the model will be skipped.
+
+It's recommended to have ccache or sccache setup as building vLLM could
+take sometimes.
+
+### vLLM benchmark on PyTorch infra
+
+* Run the benchmark on the latest commit in a branch, i.e. `main`
+
+```
+HF_TOKEN=<REDACTED> ./run.sh main
+```
+
+* Run the benchmark on a specific commit on [vLLM](https://github.com/vllm-project/vllm)
+
+```
+HF_TOKEN=<REDACTED> ./run.sh <COMMIT_SHA>
+```
+
+* Run the benchmark, but don't upload the results to PyTorch OSS
+  benchmark database
+
+```
+HF_TOKEN=<REDACTED> UPLOAD_BENCHMARK_RESULTS=0 ./run.sh main
+```
+
+* Run the benchmark on the commit even if it has already been run before
+
+```
+HF_TOKEN=<REDACTED> OVERWRITE_BENCHMARK_RESULTS=1 ./run.sh main
+```
+
+The results and other artifacts will be available at:
+
+* Benchmark results in JSON: `https://ossci-benchmarks.s3.us-east-1.amazonaws.com/v3/vllm-project/vllm/<BRANCH>/<COMMIT>/benchmark_results.json`
+* Benchmark results in markdown: `https://ossci-benchmarks.s3.us-east-1.amazonaws.com/v3/vllm-project/vllm/<BRANCH>/<COMMIT>/benchmark_results.md`
+* Benchmark logs: `https://ossci-benchmarks.s3.us-east-1.amazonaws.com/v3/vllm-project/vllm/<BRANCH>/<COMMIT>/benchmarks.log`
diff --git a/vllm-benchmarks/requirements.txt b/vllm-benchmarks/requirements.txt
@@ -0,0 +1,6 @@
+GitPython==3.1.44
+datasets==3.3.0
+psutil==7.0.0
+pynvml==12.0.0
+boto3==1.36.21
+awscli==1.37.21
diff --git a/vllm-benchmarks/run.sh b/vllm-benchmarks/run.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+set -eux
+
+VLLM_COMMIT=$1
+if [[ -z "${VLLM_COMMIT:-}" ]]; then
+  echo "Usage: ./run.sh VLLM_BRANCH_OR_COMMIT"
+  exit 1
+fi
+
+cleanup() {
+  if [[ "${CLEANUP_BENCHMARK_RESULTS:-1}" == "1" ]]; then
+    rm -rf vllm/benchmarks/results
+  fi
+
+  # https://github.com/vllm-project/vllm/issues/13392
+  rm -rf ~/.cache/vllm/torch_compile_cache
+}
+
+setup_vllm() {
+  # I'm doing the checkout step here so that this script can be run without GHA
+  if [[ ! -d "vllm" ]]; then
+    git clone https://github.com/vllm-project/vllm.git
+  fi
+
+  pushd vllm
+  git checkout main
+  git fetch origin && git pull origin main
+  # TODO (huydhn): As this script is run periodically, we needs to add a feature
+  # to run benchmark on all commits since the last run
+  git checkout "${VLLM_COMMIT}"
+  popd
+}
+
+build_vllm() {
+  pushd vllm
+  # TODO (huydhn) I'll setup remote cache for this later
+  SCCACHE_CACHE_SIZE=100G sccache --start-server || true
+  # Build and install vLLM
+  pip install -r requirements-build.txt
+  pip install --editable .
+  popd
+}
+
+run_benchmark() {
+  pushd vllm
+  # Is there a better way to know if we are running on devvm?
+  if [[ "${CI:-}" != "true" ]]; then
+    export http_proxy=http://fwdproxy:8080
+    export https_proxy=http://fwdproxy:8080
+    export no_proxy=".fbcdn.net,.facebook.com,.thefacebook.com,.tfbnw.net,.fb.com,.fb,localhost,127.0.0.1"
+  fi
+
+  ENGINE_VERSION=v1 SAVE_TO_PYTORCH_BENCHMARK_FORMAT=1 \
+    bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh > benchmarks.log 2>&1
+  popd
+}
+
+upload_results() {
+  if [[ "${UPLOAD_BENCHMARK_RESULTS:-1}" == "1" ]]; then
+    # Upload the benchmark results
+    python upload_benchmark_results.py --vllm vllm --benchmark-results vllm/benchmarks/results
+
+    pushd vllm
+    if [[ -f benchmarks/results/benchmark_results.md ]]; then
+      # Upload the markdown file
+      S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.md"
+      aws s3 cp --acl public-read \
+        benchmarks/results/benchmark_results.md "s3://ossci-benchmarks/${S3_PATH}"
+    fi
+
+    if [[ -f benchmarks.log ]]; then
+      # Upload the logs
+      S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmarks.log"
+      aws s3 cp --acl public-read \
+        benchmarks.log "s3://ossci-benchmarks/${S3_PATH}"
+    fi
+    popd
+  fi
+}
+
+if [[ -z "${HF_TOKEN:-}" ]]; then
+  echo "Please set HF_TOKEN and accept all the benchmark models"
+  exit 1
+fi
+
+pip install -r requirements.txt
+
+cleanup
+setup_vllm
+
+pushd vllm
+export HEAD_BRANCH=$(git rev-parse --abbrev-ref HEAD)
+export HEAD_SHA=$(git rev-parse --verify HEAD)
+
+S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.json"
+aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
+
+if [[ ${NOT_EXIST:-0} == "0" && "${OVERWRITE_BENCHMARK_RESULTS:-0}" != "1" ]]; then
+  echo "Skip ${HEAD_SHA} because its benchmark results already exist at s3://ossci-benchmarks/${S3_PATH}"
+  exit 0
+fi
+popd
+
+build_vllm
+run_benchmark
+upload_results
+cleanup
diff --git a/vllm-benchmarks/upload_benchmark_results.py b/vllm-benchmarks/upload_benchmark_results.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+
+import glob
+import gzip
+import json
+import logging
+import os
+import platform
+import socket
+import time
+from argparse import Action, ArgumentParser, Namespace
+from logging import info, warning
+from typing import Any, Dict, List, Optional, Tuple
+
+import boto3
+import psutil
+import torch
+from git import Repo
+
+logging.basicConfig(level=logging.INFO)
+
+
+REPO = "vllm-project/vllm"
+
+
+class ValidateDir(Action):
+    def __call__(
+        self,
+        parser: ArgumentParser,
+        namespace: Namespace,
+        values: Any,
+        option_string: Optional[str] = None,
+    ) -> None:
+        if os.path.isdir(values):
+            setattr(namespace, self.dest, values)
+            return
+
+        parser.error(f"{values} is not a valid directory")
+
+
+def parse_args() -> Any:
+    parser = ArgumentParser("Upload vLLM benchmarks results to S3")
+    parser.add_argument(
+        "--vllm",
+        type=str,
+        required=True,
+        action=ValidateDir,
+        help="the directory that vllm repo is checked out",
+    )
+    parser.add_argument(
+        "--benchmark-results",
+        type=str,
+        required=True,
+        action=ValidateDir,
+        help="the directory with the benchmark results",
+    )
+    parser.add_argument(
+        "--s3-bucket",
+        type=str,
+        required=False,
+        default="ossci-benchmarks",
+        help="the S3 bucket to upload the benchmark results",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+    )
+
+    return parser.parse_args()
+
+
+def get_git_metadata(vllm_dir: str) -> Tuple[str, str]:
+    repo = Repo(vllm_dir)
+    return repo.active_branch.name, repo.head.object.hexsha
+
+
+def get_benchmark_metadata(head_branch: str, head_sha: str) -> Dict[str, Any]:
+    timestamp = int(time.time())
+    return {
+        "timestamp": timestamp,
+        "schema_version": "v3",
+        "name": "vLLM benchmark",
+        "repo": REPO,
+        "head_branch": head_branch,
+        "head_sha": head_sha,
+        "workflow_id": os.getenv("WORKFLOW_ID", timestamp),
+        "run_attempt": os.getenv("RUN_ATTEMPT", 1),
+        "job_id": os.getenv("JOB_ID", timestamp),
+    }
+
+
+def get_runner_info() -> Dict[str, Any]:
+    return {
+        # TODO (huydhn): Figure out a better way to set the name here without
+        # hard coding it to cuda
+        "name": "cuda",
+        "type": torch.cuda.get_device_name(),
+        "cpu_info": platform.processor(),
+        "cpu_count": psutil.cpu_count(),
+        "avail_mem_in_gb": int(psutil.virtual_memory().total / (1024 * 1024 * 1024)),
+        "gpu_info": torch.cuda.get_device_name(),
+        "gpu_count": torch.cuda.device_count(),
+        "avail_gpu_mem_in_gb": int(
+            torch.cuda.get_device_properties(0).total_memory / (1024 * 1024 * 1024)
+        ),
+        "extra_info": {
+            "hostname": socket.gethostname(),
+        },
+    }
+
+
+def load(benchmark_results: str) -> Dict[str, List]:
+    results = {}
+
+    for file in glob.glob(f"{benchmark_results}/*.json"):
+        filename = os.path.basename(file)
+        with open(file) as f:
+            try:
+                r = json.load(f)
+            except json.JSONDecodeError as e:
+                warning(f"Fail to load {file}: {e}")
+                continue
+
+            if not r:
+                warning(f"Find no benchmark results in {file}")
+                continue
+
+            if type(r) is not list or "benchmark" not in r[0]:
+                warning(f"Find no PyToch benchmark results in {file}")
+                continue
+
+            results[filename] = r
+
+    return results
+
+
+def aggregate(
+    metadata: Dict[str, Any], runner: Dict[str, Any], benchmark_results: Dict[str, List]
+) -> List[Dict[str, Any]]:
+    aggregated_results = []
+    for _, results in benchmark_results.items():
+        for result in results:
+            r: Dict[str, Any] = {**metadata, **result}
+            r["runners"] = [runner]
+            aggregated_results.append(r)
+    return aggregated_results
+
+
+def upload_to_s3(
+    s3_bucket: str,
+    head_branch: str,
+    head_sha: str,
+    aggregated_results: List[Dict[str, Any]],
+    dry_run: bool = True,
+) -> None:
+    s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/benchmark_results.json"
+    info(f"Upload benchmark results to s3://{s3_bucket}/{s3_path}")
+    if not dry_run:
+        # Write in JSONEachRow format
+        data = "\n".join([json.dumps(r) for r in aggregated_results])
+        boto3.resource("s3").Object(
+            f"{s3_bucket}",
+            f"{s3_path}",
+        ).put(
+            ACL="public-read",
+            Body=gzip.compress(data.encode()),
+            ContentEncoding="gzip",
+            ContentType="application/json",
+        )
+
+
+def main() -> None:
+    args = parse_args()
+
+    head_branch, head_sha = get_git_metadata(args.vllm)
+    # Gather some information about the benchmark
+    metadata = get_benchmark_metadata(head_branch, head_sha)
+    runner = get_runner_info()
+
+    # Extract and aggregate the benchmark results
+    aggregated_results = aggregate(metadata, runner, load(args.benchmark_results))
+    upload_to_s3(
+        args.s3_bucket, head_branch, head_sha, aggregated_results, args.dry_run
+    )
+
+
+if __name__ == "__main__":
+    main()