Skip to content

Commit 5ac18e3

Browse files
authoredMar 15, 2025··
Merge pull request #29 from pytorch/add-rocm-benchmark
Tweak the benchmark script to run on ROCm
2 parents dc3a901 + b66d76e commit 5ac18e3

File tree

3 files changed

+63
-23
lines changed

3 files changed

+63
-23
lines changed
 

‎vllm-benchmarks/cron.sh

+21-9
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@ run() {
2727

2828
NOT_EXIST=0
2929

30-
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/benchmark_results.json"
30+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/${GPU_DEVICE}/benchmark_results.json"
3131
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
3232

3333
if [[ ${NOT_EXIST:-0} == "0" ]]; then
3434
echo "${COMMIT}" > commit
3535
echo "Mark ${COMMIT} as the latest commit that has been benchmarked on main"
3636

37-
S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit"
37+
S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit"
3838
aws s3 cp commit "s3://ossci-benchmarks/${S3_PATH}"
3939
fi
4040
}
@@ -47,18 +47,30 @@ run_benchmarks() {
4747

4848
rm commit || true
4949
# Get the last green commit from S3
50-
S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit"
51-
aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" .
52-
LAST_GREEN_COMMIT=$(cat commit)
50+
S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit"
51+
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
52+
53+
if [[ ${NOT_EXIST:-0} == "0" ]]; then
54+
aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" .
55+
LAST_GREEN_COMMIT=$(cat commit)
5356

54-
if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then
55-
echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked"
57+
if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then
58+
echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked"
59+
else
60+
COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT})
61+
echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done
62+
fi
5663
else
57-
COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT})
58-
echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done
64+
run "${HEAD_SHA}"
5965
fi
6066
}
6167

68+
if command -v nvidia-smi; then
69+
declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
70+
elif command -v amd-smi; then
71+
declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
72+
fi
73+
6274
while :
6375
do
6476
pull_vllm

‎vllm-benchmarks/run.sh

+22-9
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ cleanup() {
1212
if [[ "${CLEANUP_BENCHMARK_RESULTS:-1}" == "1" ]]; then
1313
rm -rf vllm/benchmarks/results
1414
fi
15-
16-
# https://github.com/vllm-project/vllm/issues/13392
17-
rm -rf ~/.cache/vllm/torch_compile_cache
1815
}
1916

2017
setup_vllm() {
@@ -43,8 +40,15 @@ build_vllm() {
4340
# TODO (huydhn) I'll setup remote cache for this later
4441
SCCACHE_CACHE_SIZE=100G sccache --start-server || true
4542
# Build and install vLLM
46-
pip install -r requirements-build.txt
47-
pip install --editable .
43+
if command -v nvidia-smi; then
44+
pip install -r requirements/build.txt
45+
pip install --editable .
46+
elif command -v amd-smi; then
47+
pip install -r requirements/rocm.txt
48+
pip install -r requirements/rocm-build.txt
49+
# https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html?device=rocm
50+
PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py develop
51+
fi
4852
popd
4953
}
5054

@@ -65,19 +69,22 @@ run_benchmark() {
6569
upload_results() {
6670
if [[ "${UPLOAD_BENCHMARK_RESULTS:-1}" == "1" ]]; then
6771
# Upload the benchmark results
68-
python upload_benchmark_results.py --vllm vllm --benchmark-results vllm/benchmarks/results
72+
python upload_benchmark_results.py \
73+
--vllm vllm \
74+
--benchmark-results vllm/benchmarks/results \
75+
--device "${GPU_DEVICE}"
6976

7077
pushd vllm
7178
if [[ -f benchmarks/results/benchmark_results.md ]]; then
7279
# Upload the markdown file
73-
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.md"
80+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.md"
7481
aws s3 cp --acl public-read \
7582
benchmarks/results/benchmark_results.md "s3://ossci-benchmarks/${S3_PATH}"
7683
fi
7784

7885
if [[ -f benchmarks.log ]]; then
7986
# Upload the logs
80-
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmarks.log"
87+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmarks.log"
8188
aws s3 cp --acl public-read \
8289
benchmarks.log "s3://ossci-benchmarks/${S3_PATH}"
8390
fi
@@ -99,7 +106,13 @@ pushd vllm
99106
export HEAD_BRANCH=main
100107
export HEAD_SHA=$(git rev-parse --verify HEAD)
101108

102-
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.json"
109+
if command -v nvidia-smi; then
110+
declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
111+
elif command -v amd-smi; then
112+
declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
113+
fi
114+
115+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.json"
103116
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
104117

105118
if [[ ${NOT_EXIST:-0} == "0" && "${OVERWRITE_BENCHMARK_RESULTS:-0}" != "1" ]]; then

‎vllm-benchmarks/upload_benchmark_results.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ def parse_args() -> Any:
6161
default="ossci-benchmarks",
6262
help="the S3 bucket to upload the benchmark results",
6363
)
64+
parser.add_argument(
65+
"--device",
66+
type=str,
67+
required=True,
68+
help="the name of the GPU device coming from nvidia-smi or amd-smi",
69+
)
6470
parser.add_argument(
6571
"--dry-run",
6672
action="store_true",
@@ -94,10 +100,13 @@ def get_benchmark_metadata(head_branch: str, head_sha: str) -> Dict[str, Any]:
94100

95101

96102
def get_runner_info() -> Dict[str, Any]:
103+
if torch.cuda.is_available() and torch.version.hip:
104+
name = "rocm"
105+
elif torch.cuda.is_available() and torch.version.cuda:
106+
name = "cuda"
107+
97108
return {
98-
# TODO (huydhn): Figure out a better way to set the name here without
99-
# hard coding it to cuda
100-
"name": "cuda",
109+
"name": name,
101110
"type": torch.cuda.get_device_name(),
102111
"cpu_info": platform.processor(),
103112
"cpu_count": psutil.cpu_count(),
@@ -155,9 +164,10 @@ def upload_to_s3(
155164
head_branch: str,
156165
head_sha: str,
157166
aggregated_results: List[Dict[str, Any]],
167+
device: str,
158168
dry_run: bool = True,
159169
) -> None:
160-
s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/benchmark_results.json"
170+
s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/{device}/benchmark_results.json"
161171
info(f"Upload benchmark results to s3://{s3_bucket}/{s3_path}")
162172
if not dry_run:
163173
# Write in JSONEachRow format
@@ -184,7 +194,12 @@ def main() -> None:
184194
# Extract and aggregate the benchmark results
185195
aggregated_results = aggregate(metadata, runner, load(args.benchmark_results))
186196
upload_to_s3(
187-
args.s3_bucket, head_branch, head_sha, aggregated_results, args.dry_run
197+
args.s3_bucket,
198+
head_branch,
199+
head_sha,
200+
aggregated_results,
201+
args.device,
202+
args.dry_run,
188203
)
189204

190205

0 commit comments

Comments
 (0)
Please sign in to comment.