Skip to content

Commit 149fcbc

Browse files
authored
Merge pull request #26 from pytorch/add-v1-benchmark-script
Add a script to run vLLM benchmark
2 parents ba5a8d1 + af09643 commit 149fcbc

File tree

4 files changed

+345
-0
lines changed

4 files changed

+345
-0
lines changed

Diff for: vllm-benchmarks/README.md

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
### Prerequisite
2+
3+
Prepare your HuggingFace token and save it into `HF_TOKEN` environment
4+
variable. Note that the token needs to accept the terms and conditions
5+
of all the test models in
6+
[vLLM](https://github.com/vllm-project/vllm/tree/main/.buildkite/nightly-benchmarks/tests),
7+
otherwise, the model will be skipped.
8+
9+
It's recommended to have ccache or sccache setup as building vLLM could
10+
take sometimes.
11+
12+
### vLLM benchmark on PyTorch infra
13+
14+
* Run the benchmark on the latest commit in a branch, i.e. `main`
15+
16+
```
17+
HF_TOKEN=<REDACTED> ./run.sh main
18+
```
19+
20+
* Run the benchmark on a specific commit on [vLLM](https://github.com/vllm-project/vllm)
21+
22+
```
23+
HF_TOKEN=<REDACTED> ./run.sh <COMMIT_SHA>
24+
```
25+
26+
* Run the benchmark, but don't upload the results to PyTorch OSS
27+
benchmark database
28+
29+
```
30+
HF_TOKEN=<REDACTED> UPLOAD_BENCHMARK_RESULTS=0 ./run.sh main
31+
```
32+
33+
* Run the benchmark on the commit even if it has already been run before
34+
35+
```
36+
HF_TOKEN=<REDACTED> OVERWRITE_BENCHMARK_RESULTS=1 ./run.sh main
37+
```
38+
39+
The results and other artifacts will be available at:
40+
41+
* Benchmark results in JSON: `https://ossci-benchmarks.s3.us-east-1.amazonaws.com/v3/vllm-project/vllm/<BRANCH>/<COMMIT>/benchmark_results.json`
42+
* Benchmark results in markdown: `https://ossci-benchmarks.s3.us-east-1.amazonaws.com/v3/vllm-project/vllm/<BRANCH>/<COMMIT>/benchmark_results.md`
43+
* Benchmark logs: `https://ossci-benchmarks.s3.us-east-1.amazonaws.com/v3/vllm-project/vllm/<BRANCH>/<COMMIT>/benchmarks.log`

Diff for: vllm-benchmarks/requirements.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
GitPython==3.1.44
2+
datasets==3.3.0
3+
psutil==7.0.0
4+
pynvml==12.0.0
5+
boto3==1.36.21
6+
awscli==1.37.21

Diff for: vllm-benchmarks/run.sh

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/bin/bash
2+
3+
set -eux
4+
5+
VLLM_COMMIT=$1
6+
if [[ -z "${VLLM_COMMIT:-}" ]]; then
7+
echo "Usage: ./run.sh VLLM_BRANCH_OR_COMMIT"
8+
exit 1
9+
fi
10+
11+
cleanup() {
12+
if [[ "${CLEANUP_BENCHMARK_RESULTS:-1}" == "1" ]]; then
13+
rm -rf vllm/benchmarks/results
14+
fi
15+
16+
# https://github.com/vllm-project/vllm/issues/13392
17+
rm -rf ~/.cache/vllm/torch_compile_cache
18+
}
19+
20+
setup_vllm() {
21+
# I'm doing the checkout step here so that this script can be run without GHA
22+
if [[ ! -d "vllm" ]]; then
23+
git clone https://github.com/vllm-project/vllm.git
24+
fi
25+
26+
pushd vllm
27+
git checkout main
28+
git fetch origin && git pull origin main
29+
# TODO (huydhn): As this script is run periodically, we needs to add a feature
30+
# to run benchmark on all commits since the last run
31+
git checkout "${VLLM_COMMIT}"
32+
popd
33+
}
34+
35+
build_vllm() {
36+
pushd vllm
37+
# TODO (huydhn) I'll setup remote cache for this later
38+
SCCACHE_CACHE_SIZE=100G sccache --start-server || true
39+
# Build and install vLLM
40+
pip install -r requirements-build.txt
41+
pip install --editable .
42+
popd
43+
}
44+
45+
run_benchmark() {
46+
pushd vllm
47+
# Is there a better way to know if we are running on devvm?
48+
if [[ "${CI:-}" != "true" ]]; then
49+
export http_proxy=http://fwdproxy:8080
50+
export https_proxy=http://fwdproxy:8080
51+
export no_proxy=".fbcdn.net,.facebook.com,.thefacebook.com,.tfbnw.net,.fb.com,.fb,localhost,127.0.0.1"
52+
fi
53+
54+
ENGINE_VERSION=v1 SAVE_TO_PYTORCH_BENCHMARK_FORMAT=1 \
55+
bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh > benchmarks.log 2>&1
56+
popd
57+
}
58+
59+
upload_results() {
60+
if [[ "${UPLOAD_BENCHMARK_RESULTS:-1}" == "1" ]]; then
61+
# Upload the benchmark results
62+
python upload_benchmark_results.py --vllm vllm --benchmark-results vllm/benchmarks/results
63+
64+
pushd vllm
65+
if [[ -f benchmarks/results/benchmark_results.md ]]; then
66+
# Upload the markdown file
67+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.md"
68+
aws s3 cp --acl public-read \
69+
benchmarks/results/benchmark_results.md "s3://ossci-benchmarks/${S3_PATH}"
70+
fi
71+
72+
if [[ -f benchmarks.log ]]; then
73+
# Upload the logs
74+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmarks.log"
75+
aws s3 cp --acl public-read \
76+
benchmarks.log "s3://ossci-benchmarks/${S3_PATH}"
77+
fi
78+
popd
79+
fi
80+
}
81+
82+
if [[ -z "${HF_TOKEN:-}" ]]; then
83+
echo "Please set HF_TOKEN and accept all the benchmark models"
84+
exit 1
85+
fi
86+
87+
pip install -r requirements.txt
88+
89+
cleanup
90+
setup_vllm
91+
92+
pushd vllm
93+
export HEAD_BRANCH=$(git rev-parse --abbrev-ref HEAD)
94+
export HEAD_SHA=$(git rev-parse --verify HEAD)
95+
96+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.json"
97+
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
98+
99+
if [[ ${NOT_EXIST:-0} == "0" && "${OVERWRITE_BENCHMARK_RESULTS:-0}" != "1" ]]; then
100+
echo "Skip ${HEAD_SHA} because its benchmark results already exist at s3://ossci-benchmarks/${S3_PATH}"
101+
exit 0
102+
fi
103+
popd
104+
105+
build_vllm
106+
run_benchmark
107+
upload_results
108+
cleanup

Diff for: vllm-benchmarks/upload_benchmark_results.py

+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
#!/usr/bin/env python3
2+
3+
import glob
4+
import gzip
5+
import json
6+
import logging
7+
import os
8+
import platform
9+
import socket
10+
import time
11+
from argparse import Action, ArgumentParser, Namespace
12+
from logging import info, warning
13+
from typing import Any, Dict, List, Optional, Tuple
14+
15+
import boto3
16+
import psutil
17+
import torch
18+
from git import Repo
19+
20+
logging.basicConfig(level=logging.INFO)
21+
22+
23+
REPO = "vllm-project/vllm"
24+
25+
26+
class ValidateDir(Action):
27+
def __call__(
28+
self,
29+
parser: ArgumentParser,
30+
namespace: Namespace,
31+
values: Any,
32+
option_string: Optional[str] = None,
33+
) -> None:
34+
if os.path.isdir(values):
35+
setattr(namespace, self.dest, values)
36+
return
37+
38+
parser.error(f"{values} is not a valid directory")
39+
40+
41+
def parse_args() -> Any:
42+
parser = ArgumentParser("Upload vLLM benchmarks results to S3")
43+
parser.add_argument(
44+
"--vllm",
45+
type=str,
46+
required=True,
47+
action=ValidateDir,
48+
help="the directory that vllm repo is checked out",
49+
)
50+
parser.add_argument(
51+
"--benchmark-results",
52+
type=str,
53+
required=True,
54+
action=ValidateDir,
55+
help="the directory with the benchmark results",
56+
)
57+
parser.add_argument(
58+
"--s3-bucket",
59+
type=str,
60+
required=False,
61+
default="ossci-benchmarks",
62+
help="the S3 bucket to upload the benchmark results",
63+
)
64+
parser.add_argument(
65+
"--dry-run",
66+
action="store_true",
67+
)
68+
69+
return parser.parse_args()
70+
71+
72+
def get_git_metadata(vllm_dir: str) -> Tuple[str, str]:
73+
repo = Repo(vllm_dir)
74+
return repo.active_branch.name, repo.head.object.hexsha
75+
76+
77+
def get_benchmark_metadata(head_branch: str, head_sha: str) -> Dict[str, Any]:
78+
timestamp = int(time.time())
79+
return {
80+
"timestamp": timestamp,
81+
"schema_version": "v3",
82+
"name": "vLLM benchmark",
83+
"repo": REPO,
84+
"head_branch": head_branch,
85+
"head_sha": head_sha,
86+
"workflow_id": os.getenv("WORKFLOW_ID", timestamp),
87+
"run_attempt": os.getenv("RUN_ATTEMPT", 1),
88+
"job_id": os.getenv("JOB_ID", timestamp),
89+
}
90+
91+
92+
def get_runner_info() -> Dict[str, Any]:
93+
return {
94+
# TODO (huydhn): Figure out a better way to set the name here without
95+
# hard coding it to cuda
96+
"name": "cuda",
97+
"type": torch.cuda.get_device_name(),
98+
"cpu_info": platform.processor(),
99+
"cpu_count": psutil.cpu_count(),
100+
"avail_mem_in_gb": int(psutil.virtual_memory().total / (1024 * 1024 * 1024)),
101+
"gpu_info": torch.cuda.get_device_name(),
102+
"gpu_count": torch.cuda.device_count(),
103+
"avail_gpu_mem_in_gb": int(
104+
torch.cuda.get_device_properties(0).total_memory / (1024 * 1024 * 1024)
105+
),
106+
"extra_info": {
107+
"hostname": socket.gethostname(),
108+
},
109+
}
110+
111+
112+
def load(benchmark_results: str) -> Dict[str, List]:
113+
results = {}
114+
115+
for file in glob.glob(f"{benchmark_results}/*.json"):
116+
filename = os.path.basename(file)
117+
with open(file) as f:
118+
try:
119+
r = json.load(f)
120+
except json.JSONDecodeError as e:
121+
warning(f"Fail to load {file}: {e}")
122+
continue
123+
124+
if not r:
125+
warning(f"Find no benchmark results in {file}")
126+
continue
127+
128+
if type(r) is not list or "benchmark" not in r[0]:
129+
warning(f"Find no PyToch benchmark results in {file}")
130+
continue
131+
132+
results[filename] = r
133+
134+
return results
135+
136+
137+
def aggregate(
138+
metadata: Dict[str, Any], runner: Dict[str, Any], benchmark_results: Dict[str, List]
139+
) -> List[Dict[str, Any]]:
140+
aggregated_results = []
141+
for _, results in benchmark_results.items():
142+
for result in results:
143+
r: Dict[str, Any] = {**metadata, **result}
144+
r["runners"] = [runner]
145+
aggregated_results.append(r)
146+
return aggregated_results
147+
148+
149+
def upload_to_s3(
150+
s3_bucket: str,
151+
head_branch: str,
152+
head_sha: str,
153+
aggregated_results: List[Dict[str, Any]],
154+
dry_run: bool = True,
155+
) -> None:
156+
s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/benchmark_results.json"
157+
info(f"Upload benchmark results to s3://{s3_bucket}/{s3_path}")
158+
if not dry_run:
159+
# Write in JSONEachRow format
160+
data = "\n".join([json.dumps(r) for r in aggregated_results])
161+
boto3.resource("s3").Object(
162+
f"{s3_bucket}",
163+
f"{s3_path}",
164+
).put(
165+
ACL="public-read",
166+
Body=gzip.compress(data.encode()),
167+
ContentEncoding="gzip",
168+
ContentType="application/json",
169+
)
170+
171+
172+
def main() -> None:
173+
args = parse_args()
174+
175+
head_branch, head_sha = get_git_metadata(args.vllm)
176+
# Gather some information about the benchmark
177+
metadata = get_benchmark_metadata(head_branch, head_sha)
178+
runner = get_runner_info()
179+
180+
# Extract and aggregate the benchmark results
181+
aggregated_results = aggregate(metadata, runner, load(args.benchmark_results))
182+
upload_to_s3(
183+
args.s3_bucket, head_branch, head_sha, aggregated_results, args.dry_run
184+
)
185+
186+
187+
if __name__ == "__main__":
188+
main()

0 commit comments

Comments
 (0)