Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sky/catalog/gcp_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,13 @@ def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
_image_df = common.read_catalog('gcp/images.csv',
pull_frequency_hours=0)
image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
# Remove this debug lines after catalog PR merged
if tag == 'skypilot:custom-cpu-ubuntu-2204-250923':
image_id = ('projects/sky-dev-465/global/images/'
'skypilot-gcp-cpu-ubuntu-250923')
elif tag == 'skypilot:custom-gpu-ubuntu-2204-250923':
image_id = ('projects/sky-dev-465/global/images/'
'skypilot-gcp-gpu-ubuntu-250923')
return image_id


Expand Down
11 changes: 11 additions & 0 deletions sky/catalog/images/provisioners/cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ else
ARCH_PATH="x86_64"
fi

# Install GCC 12 and set as default compiler
# This is required because newer Ubuntu 22.04 kernels (6.5.0+ and 6.8.0+) are built with GCC 12,
# but Ubuntu 22.04 LTS defaults to GCC 11. Without GCC 12, NVIDIA DKMS driver compilation
# will fail with error: "unrecognized command-line option '-ftrivial-auto-var-init=zero'"
# This flag was introduced in GCC 12 and is not recognized by GCC 11.
echo "Installing GCC 12 to match kernel compiler version..."
sudo apt-get update
sudo apt-get install -y gcc-12 g++-12
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100

# Download architecture-specific CUDA keyring package
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH_PATH}/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
Expand Down
4 changes: 2 additions & 2 deletions sky/clouds/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@
)

# Image ID tags
_DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-2204'
_DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-2204-250923'
# For GPU-related package version, see sky/clouds/catalog/images/provisioners/cuda.sh
_DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-2204'
_DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-2204-250923'
_DEFAULT_GPU_K80_IMAGE_ID = 'skypilot:k80-debian-10'
# Use COS image with GPU Direct support.
# Need to contact GCP support to build our own image for GPUDirect-TCPX support.
Expand Down
1 change: 0 additions & 1 deletion tests/smoke_tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import os
import pathlib
import subprocess
import sys
import tempfile
import textwrap
import time
Expand Down
135 changes: 135 additions & 0 deletions tests/smoke_tests/test_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Smoke tests for SkyPilot for basic functionality
# Default options are set in pyproject.toml
# Example usage:
# Run all tests except for AWS and Lambda Cloud
# > pytest tests/smoke_tests/test_llm.py
#
# Terminate failed clusters after test finishes
# > pytest tests/smoke_tests/test_llm.py --terminate-on-failure
#
# Re-run last failed tests
# > pytest --lf
#
# Run one of the smoke tests
# > pytest tests/smoke_tests/test_llm.py::test_deepseek_r1
#
# Only run test for AWS + generic tests
# > pytest tests/smoke_tests/test_llm.py --aws
#
# Change cloud for generic tests to aws
# > pytest tests/smoke_tests/test_llm.py --generic-cloud aws

import json

import pytest
from smoke_tests import smoke_tests_utils
# TODO(zeping): move them to smoke_tests_utils
from smoke_tests.test_sky_serve import SERVE_ENDPOINT_WAIT
from smoke_tests.test_sky_serve import SERVE_WAIT_UNTIL_READY
from smoke_tests.test_sky_serve import TEARDOWN_SERVICE

import sky


@pytest.mark.gcp
@pytest.mark.parametrize('model_name,gpu_spec', [
('deepseek-ai/DeepSeek-R1-Distill-Llama-8B', 'L4:1'),
('deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'A100-80GB:2'),
])
def test_deepseek_r1_vllm(generic_cloud: str, model_name: str, gpu_spec: str):
name = smoke_tests_utils.get_cluster_name()

payload = {
"model": model_name,
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Who are you?"
},
],
}
json_payload = json.dumps(payload)

test = smoke_tests_utils.Test(
'deepseek_r1_vllm',
[
f'sky launch -y -d -c {name} --infra {generic_cloud} --env MODEL_NAME={model_name} --gpus {gpu_spec} llm/deepseek-r1-distilled/deepseek-r1-vllm.yaml',
smoke_tests_utils.get_cmd_wait_until_cluster_status_contains(
cluster_name=name,
cluster_status=[sky.ClusterStatus.UP],
timeout=300),
# Disable SKYPILOT_DEBUG while retrieving the IP to avoid debug logs
# contaminating the output of `sky status --ip`, which would break curl.
# Use `tail -n 1` to ensure only the pure IP/hostname is captured.
(
f'ORIGIN_SKYPILOT_DEBUG=$SKYPILOT_DEBUG; export SKYPILOT_DEBUG=0; '
f'ENDPOINT=$(sky status --ip {name} | tail -n 1); '
f'export SKYPILOT_DEBUG=$ORIGIN_SKYPILOT_DEBUG; '
# Wait up to 10 minutes for the model server to be ready
f'start_time=$SECONDS; timeout=1800; s=""; '
f'while true; do '
f' resp=$(curl -sS --max-time 15 http://$ENDPOINT:8000/v1/chat/completions '
f' -H "Content-Type: application/json" -d \'{json_payload}\' || true); '
f' if echo "$resp" | jq -e ".choices[0].message.content" > /dev/null 2>&1; then '
f' s="$resp"; break; fi; '
f' if (( SECONDS - start_time > timeout )); then '
f' echo "Timeout after $timeout seconds waiting for model server readiness"; echo "$resp"; exit 1; fi; '
f' echo "Waiting for model server to be ready..."; sleep 10; '
f'done; '
f'echo "$s" | jq .; '
f'content=$(echo "$s" | jq -r ".choices[0].message.content"); '
f'echo "$content"; '
# Accept either opening or closing think tag, or explicit self-identification
f'(echo "$content" | grep -qi "<think>" || '
f' echo "$content" | grep -qi "</think>" || '
f' echo "$content" | grep -qi "I\'m DeepSeek-R1") || '
f'(echo "Expected <think> tag or model self-identification not found" && exit 1)'
),
],
f'sky down -y {name}',
)
smoke_tests_utils.run_one_test(test)


@pytest.mark.gcp
def test_sglang_llava_serving(generic_cloud: str):
name = smoke_tests_utils.get_cluster_name()

payload = {
"model": "liuhaotian/llava-v1.6-vicuna-7b",
"messages": [{
"role": "user",
"content": [{
"type": "text",
"text": "Describe this image"
}, {
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/examples/frontend_language/quick_start/images/cat.jpeg"
}
}]
}],
}
json_payload = json.dumps(payload)

test = smoke_tests_utils.Test(
'sglang_llava',
[
f'sky serve up -n {name} --infra {generic_cloud} --gpus L4:1 -y llm/sglang/llava.yaml',
SERVE_WAIT_UNTIL_READY.format(name=name, replica_num=2),
(f'{SERVE_ENDPOINT_WAIT.format(name=name)}; '
f's=$(curl -sS $endpoint/v1/chat/completions -H "Content-Type: application/json" -d \'{json_payload}\'); '
f'echo "$s" | jq .; '
f'content=$(echo "$s" | jq -r ".choices[0].message.content"); '
f'echo "$content"; '
f'echo "$content" | grep -E ".+"'),
],
TEARDOWN_SERVICE.format(name=name),
env=smoke_tests_utils.LOW_CONTROLLER_RESOURCE_ENV,
timeout=40 * 60,
)
smoke_tests_utils.run_one_test(test)
Loading