From eeb75357b3b0a8c886c5f8bb9b946fb4cf509785 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 4 Mar 2025 18:50:40 -0800 Subject: [PATCH 1/9] Tweak the benchmark script to run on ROCm --- vllm-benchmarks/cron.sh | 12 +++++++++--- vllm-benchmarks/run.sh | 26 ++++++++++++++++++-------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/vllm-benchmarks/cron.sh b/vllm-benchmarks/cron.sh index 5e5721f..c5e25f7 100755 --- a/vllm-benchmarks/cron.sh +++ b/vllm-benchmarks/cron.sh @@ -27,14 +27,14 @@ run() { NOT_EXIST=0 - S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/benchmark_results.json" + S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/${GPU_DEVICE}/benchmark_results.json" aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1 if [[ ${NOT_EXIST:-0} == "0" ]]; then echo "${COMMIT}" > commit echo "Mark ${COMMIT} as the latest commit that has been benchmarked on main" - S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit" + S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit" aws s3 cp commit "s3://ossci-benchmarks/${S3_PATH}" fi } @@ -47,7 +47,7 @@ run_benchmarks() { rm commit || true # Get the last green commit from S3 - S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit" + S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit" aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" . LAST_GREEN_COMMIT=$(cat commit) @@ -59,6 +59,12 @@ run_benchmarks() { fi } +if command -v nvidia-smi; then + declare -g GPU_DEVICE=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}') +elif command -v amd-smi; then + declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}') +fi + while : do pull_vllm diff --git a/vllm-benchmarks/run.sh b/vllm-benchmarks/run.sh index 680e187..775a06f 100755 --- a/vllm-benchmarks/run.sh +++ b/vllm-benchmarks/run.sh @@ -12,9 +12,6 @@ cleanup() { if [[ "${CLEANUP_BENCHMARK_RESULTS:-1}" == "1" ]]; then rm -rf vllm/benchmarks/results fi - - # https://github.com/vllm-project/vllm/issues/13392 - rm -rf ~/.cache/vllm/torch_compile_cache } setup_vllm() { @@ -43,8 +40,15 @@ build_vllm() { # TODO (huydhn) I'll setup remote cache for this later SCCACHE_CACHE_SIZE=100G sccache --start-server || true # Build and install vLLM - pip install -r requirements-build.txt - pip install --editable . + if command -v nvidia-smi; then + pip install -r requirements-build.txt + pip install --editable . + elif command -v amd-smi; then + pip install -r requirements-rocm.txt + pip install -r requirements-rocm-build.txt + # https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html?device=rocm + PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py develop + fi popd } @@ -70,14 +74,14 @@ upload_results() { pushd vllm if [[ -f benchmarks/results/benchmark_results.md ]]; then # Upload the markdown file - S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.md" + S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.md" aws s3 cp --acl public-read \ benchmarks/results/benchmark_results.md "s3://ossci-benchmarks/${S3_PATH}" fi if [[ -f benchmarks.log ]]; then # Upload the logs - S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmarks.log" + S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmarks.log" aws s3 cp --acl public-read \ benchmarks.log "s3://ossci-benchmarks/${S3_PATH}" fi @@ -99,7 +103,13 @@ pushd vllm export HEAD_BRANCH=main export HEAD_SHA=$(git rev-parse --verify HEAD) -S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.json" +if command -v nvidia-smi; then + declare -g GPU_DEVICE=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}') +elif command -v amd-smi; then + declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}') +fi + +S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.json" aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1 if [[ ${NOT_EXIST:-0} == "0" && "${OVERWRITE_BENCHMARK_RESULTS:-0}" != "1" ]]; then From 36c1c33a3f8533ebb8e8472a9a13c81e480e4d7b Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 00:40:12 -0800 Subject: [PATCH 2/9] Benchmark the latest main commit if the last green commit doesn't exist --- vllm-benchmarks/cron.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/vllm-benchmarks/cron.sh b/vllm-benchmarks/cron.sh index c5e25f7..060df79 100755 --- a/vllm-benchmarks/cron.sh +++ b/vllm-benchmarks/cron.sh @@ -48,14 +48,20 @@ run_benchmarks() { rm commit || true # Get the last green commit from S3 S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit" - aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" . - LAST_GREEN_COMMIT=$(cat commit) + aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1 - if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then - echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked" + if [[ ${NOT_EXIST:-0} == "0" ]]; then + aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" . + LAST_GREEN_COMMIT=$(cat commit) + + if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then + echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked" + else + COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT}) + echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done + fi else - COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT}) - echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done + run main fi } From b25a2ec6a91562397bce0c502035407dd6089c46 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 00:58:21 -0800 Subject: [PATCH 3/9] Passing GPU_DEVICE to upload script --- vllm-benchmarks/run.sh | 5 ++++- vllm-benchmarks/upload_benchmark_results.py | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/vllm-benchmarks/run.sh b/vllm-benchmarks/run.sh index 775a06f..4065328 100755 --- a/vllm-benchmarks/run.sh +++ b/vllm-benchmarks/run.sh @@ -69,7 +69,10 @@ run_benchmark() { upload_results() { if [[ "${UPLOAD_BENCHMARK_RESULTS:-1}" == "1" ]]; then # Upload the benchmark results - python upload_benchmark_results.py --vllm vllm --benchmark-results vllm/benchmarks/results + python upload_benchmark_results.py \ + --vllm vllm \ + --benchmark-results vllm/benchmarks/results \ + --device "${GPU_DEVICE}" pushd vllm if [[ -f benchmarks/results/benchmark_results.md ]]; then diff --git a/vllm-benchmarks/upload_benchmark_results.py b/vllm-benchmarks/upload_benchmark_results.py index 0c4ab22..f5f8956 100755 --- a/vllm-benchmarks/upload_benchmark_results.py +++ b/vllm-benchmarks/upload_benchmark_results.py @@ -61,6 +61,12 @@ def parse_args() -> Any: default="ossci-benchmarks", help="the S3 bucket to upload the benchmark results", ) + parser.add_argument( + "--device", + type=str, + required=True, + help="the name of the GPU device coming from nvidia-smi or amd-smi", + ) parser.add_argument( "--dry-run", action="store_true", @@ -155,9 +161,10 @@ def upload_to_s3( head_branch: str, head_sha: str, aggregated_results: List[Dict[str, Any]], + device: str, dry_run: bool = True, ) -> None: - s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/benchmark_results.json" + s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/{device}/benchmark_results.json" info(f"Upload benchmark results to s3://{s3_bucket}/{s3_path}") if not dry_run: # Write in JSONEachRow format @@ -184,7 +191,12 @@ def main() -> None: # Extract and aggregate the benchmark results aggregated_results = aggregate(metadata, runner, load(args.benchmark_results)) upload_to_s3( - args.s3_bucket, head_branch, head_sha, aggregated_results, args.dry_run + args.s3_bucket, + head_branch, + head_sha, + aggregated_results, + args.device, + args.dry_run, ) From 05b991cff194d1b57d8dede9d9d5d79660050f37 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 01:36:04 -0800 Subject: [PATCH 4/9] Set the correct device name --- vllm-benchmarks/upload_benchmark_results.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm-benchmarks/upload_benchmark_results.py b/vllm-benchmarks/upload_benchmark_results.py index f5f8956..0f05b40 100755 --- a/vllm-benchmarks/upload_benchmark_results.py +++ b/vllm-benchmarks/upload_benchmark_results.py @@ -100,10 +100,13 @@ def get_benchmark_metadata(head_branch: str, head_sha: str) -> Dict[str, Any]: def get_runner_info() -> Dict[str, Any]: + if torch.cuda.is_available() and torch.version.hip: + name = "rocm" + elif torch.cuda.is_available() and torch.version.cuda: + name = "cuda" + return { - # TODO (huydhn): Figure out a better way to set the name here without - # hard coding it to cuda - "name": "cuda", + "name": name, "type": torch.cuda.get_device_name(), "cpu_info": platform.processor(), "cpu_count": psutil.cpu_count(), From ecb9fd2bfee9eeeb1e93c03ddfb1217dde43d286 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 02:28:01 -0800 Subject: [PATCH 5/9] Another tweak --- vllm-benchmarks/cron.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-benchmarks/cron.sh b/vllm-benchmarks/cron.sh index 060df79..ec85266 100755 --- a/vllm-benchmarks/cron.sh +++ b/vllm-benchmarks/cron.sh @@ -61,7 +61,7 @@ run_benchmarks() { echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done fi else - run main + run "${HEAD_SHA}" fi } From a693dced3c1fd634b4aca8df97fda68a0bb86931 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 02:40:59 -0800 Subject: [PATCH 6/9] Only get the name of the first GPU --- vllm-benchmarks/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-benchmarks/run.sh b/vllm-benchmarks/run.sh index 4065328..8b12b30 100755 --- a/vllm-benchmarks/run.sh +++ b/vllm-benchmarks/run.sh @@ -107,7 +107,7 @@ export HEAD_BRANCH=main export HEAD_SHA=$(git rev-parse --verify HEAD) if command -v nvidia-smi; then - declare -g GPU_DEVICE=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}') + declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}' | head -n1) elif command -v amd-smi; then declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}') fi From 696f35ab228e4744560fa03f5b409684d85cb5c0 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 02:42:15 -0800 Subject: [PATCH 7/9] Remove redundant code --- vllm-benchmarks/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-benchmarks/run.sh b/vllm-benchmarks/run.sh index 8b12b30..3593779 100755 --- a/vllm-benchmarks/run.sh +++ b/vllm-benchmarks/run.sh @@ -107,7 +107,7 @@ export HEAD_BRANCH=main export HEAD_SHA=$(git rev-parse --verify HEAD) if command -v nvidia-smi; then - declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}' | head -n1) + declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') elif command -v amd-smi; then declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}') fi From ab228ecda104d7ee5cb3056303a1a54cde605639 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 02:48:44 -0800 Subject: [PATCH 8/9] Also update cron.sh --- vllm-benchmarks/cron.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-benchmarks/cron.sh b/vllm-benchmarks/cron.sh index ec85266..08ec2e7 100755 --- a/vllm-benchmarks/cron.sh +++ b/vllm-benchmarks/cron.sh @@ -66,7 +66,7 @@ run_benchmarks() { } if command -v nvidia-smi; then - declare -g GPU_DEVICE=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}') + declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') elif command -v amd-smi; then declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}') fi From b66d76ef3d20c22f8549dcfcda6933be891f7959 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 10 Mar 2025 16:45:14 -0700 Subject: [PATCH 9/9] Use the correct requirements from vllm Due to https://github.com/vllm-project/vllm/pull/12547 --- vllm-benchmarks/run.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm-benchmarks/run.sh b/vllm-benchmarks/run.sh index 3593779..6dd2980 100755 --- a/vllm-benchmarks/run.sh +++ b/vllm-benchmarks/run.sh @@ -41,11 +41,11 @@ build_vllm() { SCCACHE_CACHE_SIZE=100G sccache --start-server || true # Build and install vLLM if command -v nvidia-smi; then - pip install -r requirements-build.txt + pip install -r requirements/build.txt pip install --editable . elif command -v amd-smi; then - pip install -r requirements-rocm.txt - pip install -r requirements-rocm-build.txt + pip install -r requirements/rocm.txt + pip install -r requirements/rocm-build.txt # https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html?device=rocm PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py develop fi