vLLM Benchmark #97

Workflow file for this run

.github/workflows/vllm-benchmark.yml at 6d7b17a

	name: vLLM Benchmark

	on:
	workflow_dispatch:
	inputs:
	pytorch_branch:
	description: \|
	PyTorch branch (main or refs/pull/PR_NUMBER/head for pull request)
	required: true
	type: string
	default: main
	pytorch_commit:
	description: \|
	PyTorch commit (optional, default to use the latest commit from the branch)
	required: false
	type: string
	models:
	description: \|
	A comma-separated list of models from pytorch-integration-testing repo (optional, default to run everything)
	required: false
	type: string
	default: ''
	runners:
	description: \|
	A comma-separated list of runners from .github/scripts/generate_vllm_benchmark_matrix.py to run the benchmark (optional, default to run everything)
	required: true
	type: string
	default: h100,b200
	schedule:
	# Run daily at 5:15 AM PST
	- cron: '15 13 * * *'

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	set-parameters:
	if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }}
	runs-on: linux.c7i.2xlarge
	outputs:
	benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
	docker_image: ${{ steps.calculate-docker-image.outputs.docker-image }}
	torch_cuda_arch_list: '8.0 8.9 9.0 10.0'
	build_environment: linux-jammy-cuda12.9-py3.12-gcc11
	steps:
	- uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
	with:
	python-version: 3.12
	activate-environment: true
	ignore-empty-workdir: true

	- name: Checkout pytorch-integration-testing repository
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	repository: pytorch/pytorch-integration-testing
	path: pytorch/pytorch-integration-testing
	ref: main

	- name: Set parameters
	working-directory: pytorch/pytorch-integration-testing
	id: set-parameters
	env:
	MODELS: ${{ inputs.models \|\| '' }}
	# Only need CUDA for now, we can add ROCm later if needed
	RUNNERS: ${{ inputs.runners \|\| 'h100,b200' }}
	run: \|
	set -eux

	# The generated matrix is grouped by model and runner
	python .github/scripts/generate_vllm_benchmark_matrix.py \
	--benchmark-configs-dir vllm-benchmarks/benchmarks \
	--models "${MODELS}" \
	--runners "${RUNNERS}"

	- name: Checkout PyTorch
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	path: pytorch/pytorch
	ref: ${{ inputs.pytorch_commit \|\| inputs.pytorch_branch }}
	show-progress: false

	- name: Calculate docker image
	id: calculate-docker-image
	uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
	with:
	working-directory: pytorch/pytorch
	docker-image-name: ci-image:pytorch-linux-jammy-cuda12.9-cudnn9-py3.12-gcc11-vllm

	build:
	name: Build PyTorch and vLLM
	needs:
	- set-parameters
	uses: ./.github/workflows/_vllm-build.yml
	with:
	runner: linux.24xlarge.memory
	docker_image: ${{ needs.set-parameters.outputs.docker_image }}
	build_environment: ${{ needs.set-parameters.outputs.build_environment }}
	pytorch_branch: ${{ inputs.pytorch_branch }}
	pytorch_commit: ${{ inputs.pytorch_commit }}
	torch_cuda_arch_list: ${{ needs.set-parameters.outputs.torch_cuda_arch_list }}
	secrets: inherit

	benchmarks:
	name: Run vLLM benchmarks
	needs:
	- set-parameters
	- build
	strategy:
	matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
	fail-fast: false
	uses: ./.github/workflows/_vllm-benchmark.yml
	permissions:
	id-token: write
	contents: read
	with:
	runner: ${{ matrix.runner }}
	docker_image: ${{ needs.set-parameters.outputs.docker_image }}
	build_environment: ${{ needs.set-parameters.outputs.build_environment }}
	pytorch_branch: ${{ inputs.pytorch_branch }}
	pytorch_commit: ${{ inputs.pytorch_commit }}
	models: ${{ matrix.models }}
	secrets: inherit

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

vLLM Benchmark #97

Workflow file

vLLM Benchmark #97

Uh oh!

Workflow file for this run