Skip to content

Run vLLM tests

Run vLLM tests #109

Workflow file for this run

name: Run vLLM tests
on:
schedule:
# Run every 4 hours
- cron: '0 */4 * * *'
workflow_dispatch:
inputs:
vllm_branch:
description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request)
required: true
type: string
default: main
vllm_commit:
description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
required: false
type: string
pull_request:
paths:
- .github/workflows/vllm-ci-test.yml
- .github/scripts/run_vllm_tests.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
test:
name: Run vLLM tests
strategy:
fail-fast: false
matrix:
include:
# TODO (huydhn): Figure out later if we need to scale this up to multiple runners
- runs-on: linux.aws.h100.4
device-name: cuda
permissions:
id-token: write
contents: read
runs-on: ${{ matrix.runs-on }}
environment: pytorch-x-vllm
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Checkout vLLM repository
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
path: vllm
ref: ${{ inputs.vllm_branch || 'main' }}
fetch-depth: 0
- name: Set Docker registry
shell: bash
env:
HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
DEVICE_NAME: ${{ matrix.device-name }}
run: |
set -eux
# Mimic the logic from vllm ci-infra test template
if [[ "${HEAD_BRANCH}" == "main" ]]; then
DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
else
DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo
fi
DOCKER_IMAGE_SUFFIX=""
if [[ "${DEVICE_NAME}" == "rocm" ]]; then
DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
DOCKER_IMAGE_SUFFIX=-cpu
fi
echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV
- name: Check for available Docker image
working-directory: vllm
env:
HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
HEAD_SHA: ${{ inputs.vllm_commit || '' }}
run: |
set -eux
if [[ -z "${HEAD_SHA}" ]]; then
# Looking back the latest 100 commits is enough
for i in {0..99}
do
# Check if the image is there, if it doesn't then check an older one
# because the commit is too recent
HEAD_SHA=$(git rev-parse --verify HEAD~${i})
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
# No Docker image available yet because the commit is too recent
if docker manifest inspect "${DOCKER_IMAGE}"; then
break
fi
done
fi
echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
- name: Setup CUDA GPU_FLAG for docker run
if: matrix.device-name == 'cuda'
run: |
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
- name: Setup ROCm
if: matrix.device-name == 'rocm'
uses: pytorch/pytorch/./.github/actions/setup-rocm@main
- name: Run vLLM tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
run: |
set -eux
container_name=$(docker run \
${GPU_FLAG:-} \
-e HF_TOKEN \
--ipc=host \
--tty \
--detach \
--security-opt seccomp=unconfined \
--shm-size=4g \
-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
-w /tmp/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" bash -c "bash .github/scripts/run_vllm_tests.sh"