Merge pull request #29 from pytorch/add-rocm-benchmark

huydhn · web-flow · commit 5ac18e3bcb60 · 2025-03-14T19:06:16.000-07:00
Tweak the benchmark script to run on ROCm
diff --git a/vllm-benchmarks/cron.sh b/vllm-benchmarks/cron.sh
@@ -27,14 +27,14 @@ run() {
 
   NOT_EXIST=0
 
-  S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/benchmark_results.json"
+  S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/${GPU_DEVICE}/benchmark_results.json"
   aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
 
   if [[ ${NOT_EXIST:-0} == "0" ]]; then
     echo "${COMMIT}" > commit
     echo "Mark ${COMMIT} as the latest commit that has been benchmarked on main"
 
-    S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit"
+    S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit"
     aws s3 cp commit "s3://ossci-benchmarks/${S3_PATH}"
   fi
 }
@@ -47,18 +47,30 @@ run_benchmarks() {
 
   rm commit || true
   # Get the last green commit from S3
-  S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit"
-  aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" .
-  LAST_GREEN_COMMIT=$(cat commit)
+  S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit"
+  aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
+
+  if [[ ${NOT_EXIST:-0} == "0" ]]; then
+    aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" .
+    LAST_GREEN_COMMIT=$(cat commit)
 
-  if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then
-    echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked"
+    if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then
+      echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked"
+    else
+      COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT})
+      echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done
+    fi
   else
-    COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT})
-    echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done
+    run "${HEAD_SHA}"
   fi
 }
 
+if command -v nvidia-smi; then
+  declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
+elif command -v amd-smi; then
+  declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
+fi
+
 while :
 do
   pull_vllm
diff --git a/vllm-benchmarks/run.sh b/vllm-benchmarks/run.sh
@@ -12,9 +12,6 @@ cleanup() {
   if [[ "${CLEANUP_BENCHMARK_RESULTS:-1}" == "1" ]]; then
     rm -rf vllm/benchmarks/results
   fi
-
-  # https://github.com/vllm-project/vllm/issues/13392
-  rm -rf ~/.cache/vllm/torch_compile_cache
 }
 
 setup_vllm() {
@@ -43,8 +40,15 @@ build_vllm() {
   # TODO (huydhn) I'll setup remote cache for this later
   SCCACHE_CACHE_SIZE=100G sccache --start-server || true
   # Build and install vLLM
-  pip install -r requirements-build.txt
-  pip install --editable .
+  if command -v nvidia-smi; then
+    pip install -r requirements/build.txt
+    pip install --editable .
+  elif command -v amd-smi; then
+    pip install -r requirements/rocm.txt
+    pip install -r requirements/rocm-build.txt
+    # https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html?device=rocm
+    PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py develop
+  fi
   popd
 }
 
@@ -65,19 +69,22 @@ run_benchmark() {
 upload_results() {
   if [[ "${UPLOAD_BENCHMARK_RESULTS:-1}" == "1" ]]; then
     # Upload the benchmark results
-    python upload_benchmark_results.py --vllm vllm --benchmark-results vllm/benchmarks/results
+    python upload_benchmark_results.py \
+      --vllm vllm \
+      --benchmark-results vllm/benchmarks/results \
+      --device "${GPU_DEVICE}"
 
     pushd vllm
     if [[ -f benchmarks/results/benchmark_results.md ]]; then
       # Upload the markdown file
-      S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.md"
+      S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.md"
       aws s3 cp --acl public-read \
         benchmarks/results/benchmark_results.md "s3://ossci-benchmarks/${S3_PATH}"
     fi
 
     if [[ -f benchmarks.log ]]; then
       # Upload the logs
-      S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmarks.log"
+      S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmarks.log"
       aws s3 cp --acl public-read \
         benchmarks.log "s3://ossci-benchmarks/${S3_PATH}"
     fi
@@ -99,7 +106,13 @@ pushd vllm
 export HEAD_BRANCH=main
 export HEAD_SHA=$(git rev-parse --verify HEAD)
 
-S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.json"
+if command -v nvidia-smi; then
+  declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
+elif command -v amd-smi; then
+  declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
+fi
+
+S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.json"
 aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
 
 if [[ ${NOT_EXIST:-0} == "0" && "${OVERWRITE_BENCHMARK_RESULTS:-0}" != "1" ]]; then
diff --git a/vllm-benchmarks/upload_benchmark_results.py b/vllm-benchmarks/upload_benchmark_results.py
@@ -61,6 +61,12 @@ def parse_args() -> Any:
         default="ossci-benchmarks",
         help="the S3 bucket to upload the benchmark results",
     )
+    parser.add_argument(
+        "--device",
+        type=str,
+        required=True,
+        help="the name of the GPU device coming from nvidia-smi or amd-smi",
+    )
     parser.add_argument(
         "--dry-run",
         action="store_true",
@@ -94,10 +100,13 @@ def get_benchmark_metadata(head_branch: str, head_sha: str) -> Dict[str, Any]:
 
 
 def get_runner_info() -> Dict[str, Any]:
+    if torch.cuda.is_available() and torch.version.hip:
+        name = "rocm"
+    elif torch.cuda.is_available() and torch.version.cuda:
+        name = "cuda"
+
     return {
-        # TODO (huydhn): Figure out a better way to set the name here without
-        # hard coding it to cuda
-        "name": "cuda",
+        "name": name,
         "type": torch.cuda.get_device_name(),
         "cpu_info": platform.processor(),
         "cpu_count": psutil.cpu_count(),
@@ -155,9 +164,10 @@ def upload_to_s3(
     head_branch: str,
     head_sha: str,
     aggregated_results: List[Dict[str, Any]],
+    device: str,
     dry_run: bool = True,
 ) -> None:
-    s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/benchmark_results.json"
+    s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/{device}/benchmark_results.json"
     info(f"Upload benchmark results to s3://{s3_bucket}/{s3_path}")
     if not dry_run:
         # Write in JSONEachRow format
@@ -184,7 +194,12 @@ def main() -> None:
     # Extract and aggregate the benchmark results
     aggregated_results = aggregate(metadata, runner, load(args.benchmark_results))
     upload_to_s3(
-        args.s3_bucket, head_branch, head_sha, aggregated_results, args.dry_run
+        args.s3_bucket,
+        head_branch,
+        head_sha,
+        aggregated_results,
+        args.device,
+        args.dry_run,
     )