Run vLLM tests #109

Workflow file for this run

.github/workflows/vllm-ci-test.yml at e43ef43

	name: Run vLLM tests

	on:
	schedule:
	# Run every 4 hours
	- cron: '0 /4 * *'
	workflow_dispatch:
	inputs:
	vllm_branch:
	description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request)
	required: true
	type: string
	default: main
	vllm_commit:
	description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
	required: false
	type: string
	pull_request:
	paths:
	- .github/workflows/vllm-ci-test.yml
	- .github/scripts/run_vllm_tests.sh

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	test:
	name: Run vLLM tests
	strategy:
	fail-fast: false
	matrix:
	include:
	# TODO (huydhn): Figure out later if we need to scale this up to multiple runners
	- runs-on: linux.aws.h100.4
	device-name: cuda
	permissions:
	id-token: write
	contents: read
	runs-on: ${{ matrix.runs-on }}
	environment: pytorch-x-vllm
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Checkout vLLM repository
	uses: actions/checkout@v4
	with:
	repository: vllm-project/vllm
	path: vllm
	ref: ${{ inputs.vllm_branch \|\| 'main' }}
	fetch-depth: 0

	- name: Set Docker registry
	shell: bash
	env:
	HEAD_BRANCH: ${{ inputs.vllm_branch \|\| 'main' }}
	DEVICE_NAME: ${{ matrix.device-name }}
	run: \|
	set -eux

	# Mimic the logic from vllm ci-infra test template
	if [[ "${HEAD_BRANCH}" == "main" ]]; then
	DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
	else
	DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo
	fi

	DOCKER_IMAGE_SUFFIX=""
	if [[ "${DEVICE_NAME}" == "rocm" ]]; then
	DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
	elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
	DOCKER_IMAGE_SUFFIX=-cpu
	fi
	echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
	echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV

	- name: Check for available Docker image
	working-directory: vllm
	env:
	HEAD_BRANCH: ${{ inputs.vllm_branch \|\| 'main' }}
	HEAD_SHA: ${{ inputs.vllm_commit \|\| '' }}
	run: \|
	set -eux

	if [[ -z "${HEAD_SHA}" ]]; then
	# Looking back the latest 100 commits is enough
	for i in {0..99}
	do
	# Check if the image is there, if it doesn't then check an older one
	# because the commit is too recent
	HEAD_SHA=$(git rev-parse --verify HEAD~${i})
	DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"

	# No Docker image available yet because the commit is too recent
	if docker manifest inspect "${DOCKER_IMAGE}"; then
	break
	fi
	done
	fi

	echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV

	- name: Setup CUDA GPU_FLAG for docker run
	if: matrix.device-name == 'cuda'
	run: \|
	echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"

	- name: Setup ROCm
	if: matrix.device-name == 'rocm'
	uses: pytorch/pytorch/./.github/actions/setup-rocm@main

	- name: Run vLLM tests
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
	run: \|
	set -eux

	container_name=$(docker run \
	${GPU_FLAG:-} \
	-e HF_TOKEN \
	--ipc=host \
	--tty \
	--detach \
	--security-opt seccomp=unconfined \
	--shm-size=4g \
	-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
	-w /tmp/workspace \
	"${DOCKER_IMAGE}"
	)
	docker exec -t "${container_name}" bash -c "bash .github/scripts/run_vllm_tests.sh"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Run vLLM tests #109

Workflow file

Run vLLM tests #109

Uh oh!

Jobs

Run details

Workflow file for this run