Skip to content

Tweak the benchmark script to run on ROCm #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions vllm-benchmarks/cron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ run() {

NOT_EXIST=0

S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/benchmark_results.json"
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/${GPU_DEVICE}/benchmark_results.json"
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1

if [[ ${NOT_EXIST:-0} == "0" ]]; then
echo "${COMMIT}" > commit
echo "Mark ${COMMIT} as the latest commit that has been benchmarked on main"

S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit"
S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit"
aws s3 cp commit "s3://ossci-benchmarks/${S3_PATH}"
fi
}
Expand All @@ -47,18 +47,30 @@ run_benchmarks() {

rm commit || true
# Get the last green commit from S3
S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit"
aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" .
LAST_GREEN_COMMIT=$(cat commit)
S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit"
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1

if [[ ${NOT_EXIST:-0} == "0" ]]; then
aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" .
LAST_GREEN_COMMIT=$(cat commit)

if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then
echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked"
if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then
echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked"
else
COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT})
echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done
fi
else
COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT})
echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done
run "${HEAD_SHA}"
fi
}

if command -v nvidia-smi; then
declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
elif command -v amd-smi; then
declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
fi

while :
do
pull_vllm
Expand Down
31 changes: 22 additions & 9 deletions vllm-benchmarks/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ cleanup() {
if [[ "${CLEANUP_BENCHMARK_RESULTS:-1}" == "1" ]]; then
rm -rf vllm/benchmarks/results
fi

# https://github.com/vllm-project/vllm/issues/13392
rm -rf ~/.cache/vllm/torch_compile_cache
}

setup_vllm() {
Expand Down Expand Up @@ -43,8 +40,15 @@ build_vllm() {
# TODO (huydhn) I'll setup remote cache for this later
SCCACHE_CACHE_SIZE=100G sccache --start-server || true
# Build and install vLLM
pip install -r requirements-build.txt
pip install --editable .
if command -v nvidia-smi; then
pip install -r requirements/build.txt
pip install --editable .
elif command -v amd-smi; then
pip install -r requirements/rocm.txt
pip install -r requirements/rocm-build.txt
# https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html?device=rocm
PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py develop
fi
popd
}

Expand All @@ -65,19 +69,22 @@ run_benchmark() {
upload_results() {
if [[ "${UPLOAD_BENCHMARK_RESULTS:-1}" == "1" ]]; then
# Upload the benchmark results
python upload_benchmark_results.py --vllm vllm --benchmark-results vllm/benchmarks/results
python upload_benchmark_results.py \
--vllm vllm \
--benchmark-results vllm/benchmarks/results \
--device "${GPU_DEVICE}"

pushd vllm
if [[ -f benchmarks/results/benchmark_results.md ]]; then
# Upload the markdown file
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.md"
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.md"
aws s3 cp --acl public-read \
benchmarks/results/benchmark_results.md "s3://ossci-benchmarks/${S3_PATH}"
fi

if [[ -f benchmarks.log ]]; then
# Upload the logs
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmarks.log"
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmarks.log"
aws s3 cp --acl public-read \
benchmarks.log "s3://ossci-benchmarks/${S3_PATH}"
fi
Expand All @@ -99,7 +106,13 @@ pushd vllm
export HEAD_BRANCH=main
export HEAD_SHA=$(git rev-parse --verify HEAD)

S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.json"
if command -v nvidia-smi; then
declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
elif command -v amd-smi; then
declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
fi

S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.json"
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1

if [[ ${NOT_EXIST:-0} == "0" && "${OVERWRITE_BENCHMARK_RESULTS:-0}" != "1" ]]; then
Expand Down
25 changes: 20 additions & 5 deletions vllm-benchmarks/upload_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ def parse_args() -> Any:
default="ossci-benchmarks",
help="the S3 bucket to upload the benchmark results",
)
parser.add_argument(
"--device",
type=str,
required=True,
help="the name of the GPU device coming from nvidia-smi or amd-smi",
)
parser.add_argument(
"--dry-run",
action="store_true",
Expand Down Expand Up @@ -94,10 +100,13 @@ def get_benchmark_metadata(head_branch: str, head_sha: str) -> Dict[str, Any]:


def get_runner_info() -> Dict[str, Any]:
if torch.cuda.is_available() and torch.version.hip:
name = "rocm"
elif torch.cuda.is_available() and torch.version.cuda:
name = "cuda"

return {
# TODO (huydhn): Figure out a better way to set the name here without
# hard coding it to cuda
"name": "cuda",
"name": name,
"type": torch.cuda.get_device_name(),
"cpu_info": platform.processor(),
"cpu_count": psutil.cpu_count(),
Expand Down Expand Up @@ -155,9 +164,10 @@ def upload_to_s3(
head_branch: str,
head_sha: str,
aggregated_results: List[Dict[str, Any]],
device: str,
dry_run: bool = True,
) -> None:
s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/benchmark_results.json"
s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/{device}/benchmark_results.json"
info(f"Upload benchmark results to s3://{s3_bucket}/{s3_path}")
if not dry_run:
# Write in JSONEachRow format
Expand All @@ -184,7 +194,12 @@ def main() -> None:
# Extract and aggregate the benchmark results
aggregated_results = aggregate(metadata, runner, load(args.benchmark_results))
upload_to_s3(
args.s3_bucket, head_branch, head_sha, aggregated_results, args.dry_run
args.s3_bucket,
head_branch,
head_sha,
aggregated_results,
args.device,
args.dry_run,
)


Expand Down