From 55368b34f5e644cc931d044baa64473b011cc6cc Mon Sep 17 00:00:00 2001 From: Orion Reblitz-Richardson Date: Sun, 7 Dec 2025 16:38:48 -0800 Subject: [PATCH] [PT nightlies] Remove nightly_torch Docker image and build. Use standard image. Signed-off-by: Orion Reblitz-Richardson --- buildkite/test-template-amd.j2 | 2 -- buildkite/test-template-ci.j2 | 20 +++++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/buildkite/test-template-amd.j2 b/buildkite/test-template-amd.j2 index 5f68479b..c038fec5 100644 --- a/buildkite/test-template-amd.j2 +++ b/buildkite/test-template-amd.j2 @@ -1,12 +1,10 @@ {% set cov_enabled = (cov_enabled == "1") %} {% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %} -{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-torch-nightly" %} {% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cu118" %} {% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cpu" %} {% if branch == "main" %} {% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT" %} {% set docker_image_latest = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:latest" %} -{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-torch-nightly" %} {% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cu118" %} {% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %} {% endif %} diff --git a/buildkite/test-template-ci.j2 b/buildkite/test-template-ci.j2 index d1fc3c98..8f98ec3b 100644 --- a/buildkite/test-template-ci.j2 +++ b/buildkite/test-template-ci.j2 @@ -484,7 +484,7 @@ steps: {% endif %} {% endfor %} - - group: "vllm against torch nightly" + - group: "vLLM Against PyTorch Nightly" depends_on: ~ steps: {% if nightly != "1" %} @@ -506,9 +506,13 @@ steps: {% else %} queue: cpu_queue_premerge_us_east_1 {% endif %} - timeout_in_minutes: 360 + timeout_in_minutes: 600 commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com" + - "docker buildx create --name vllm-builder --driver docker-container --use" + - "docker buildx inspect --bootstrap" + - "docker buildx ls" - | #!/bin/bash if [[ -z $(docker manifest inspect {{ docker_image_torch_nightly }}) ]]; then @@ -518,15 +522,21 @@ steps: exit 0 fi - > - docker build - --file docker/Dockerfile.nightly_torch + docker buildx build --file docker/Dockerfile --build-arg max_jobs=16 --build-arg buildkite_commit=$BUILDKITE_COMMIT --build-arg USE_SCCACHE=1 + --build-arg PYTORCH_NIGHTLY=1 + --build-arg TORCH_CUDA_ARCH_LIST="8.0 8.9 9.0 10.0" + --build-arg FI_TORCH_CUDA_ARCH_LIST="8.0 8.9 9.0a 10.0a" + --cache-to type=registry,ref={{ cache_to }},mode=max,compression=zstd + --cache-from type=registry,ref={{ cache_from }},mode=max + --cache-from type=registry,ref={{ cache_from_base_branch }},mode=max + --cache-from type=registry,ref={{ cache_from_main }},mode=max --tag {{ docker_image_torch_nightly }} + --push --target test --progress plain . - - "docker push {{ docker_image_torch_nightly }}" env: DOCKER_BUILDKIT: "1" retry: