skypilot-org · zpoint · Jul 29, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/sky/catalog/gcp_catalog.py b/sky/catalog/gcp_catalog.py
@@ -685,6 +685,13 @@ def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
         _image_df = common.read_catalog('gcp/images.csv',
                                         pull_frequency_hours=0)
         image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
+    # Remove this debug lines after catalog PR merged
+    if tag == 'skypilot:custom-cpu-ubuntu-2204-250923':
+        image_id = ('projects/sky-dev-465/global/images/'
+                    'skypilot-gcp-cpu-ubuntu-250923')
+    elif tag == 'skypilot:custom-gpu-ubuntu-2204-250923':
+        image_id = ('projects/sky-dev-465/global/images/'
+                    'skypilot-gcp-gpu-ubuntu-250923')
     return image_id
 
 

diff --git a/sky/catalog/images/provisioners/cuda.sh b/sky/catalog/images/provisioners/cuda.sh
@@ -17,6 +17,17 @@ else
     ARCH_PATH="x86_64"
 fi
 
+# Install GCC 12 and set as default compiler
+# This is required because newer Ubuntu 22.04 kernels (6.5.0+ and 6.8.0+) are built with GCC 12,
+# but Ubuntu 22.04 LTS defaults to GCC 11. Without GCC 12, NVIDIA DKMS driver compilation
+# will fail with error: "unrecognized command-line option '-ftrivial-auto-var-init=zero'"
+# This flag was introduced in GCC 12 and is not recognized by GCC 11.
+echo "Installing GCC 12 to match kernel compiler version..."
+sudo apt-get update
+sudo apt-get install -y gcc-12 g++-12
+sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100
+sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100
+
 # Download architecture-specific CUDA keyring package
 wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH_PATH}/cuda-keyring_1.1-1_all.deb
 sudo dpkg -i cuda-keyring_1.1-1_all.deb

diff --git a/sky/clouds/gcp.py b/sky/clouds/gcp.py
@@ -111,9 +111,9 @@
 )
 
 # Image ID tags
-_DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-2204'
+_DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-2204-250923'
 # For GPU-related package version, see sky/clouds/catalog/images/provisioners/cuda.sh
-_DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-2204'
+_DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-2204-250923'
 _DEFAULT_GPU_K80_IMAGE_ID = 'skypilot:k80-debian-10'
 # Use COS image with GPU Direct support.
 # Need to contact GCP support to build our own image for GPUDirect-TCPX support.

diff --git a/tests/smoke_tests/test_basic.py b/tests/smoke_tests/test_basic.py
@@ -23,7 +23,6 @@
 import os
 import pathlib
 import subprocess
-import sys
 import tempfile
 import textwrap
 import time

diff --git a/tests/smoke_tests/test_examples.py b/tests/smoke_tests/test_examples.py
@@ -0,0 +1,135 @@
+# Smoke tests for SkyPilot for basic functionality
+# Default options are set in pyproject.toml
+# Example usage:
+# Run all tests except for AWS and Lambda Cloud
+# > pytest tests/smoke_tests/test_llm.py
+#
+# Terminate failed clusters after test finishes
+# > pytest tests/smoke_tests/test_llm.py --terminate-on-failure
+#
+# Re-run last failed tests
+# > pytest --lf
+#
+# Run one of the smoke tests
+# > pytest tests/smoke_tests/test_llm.py::test_deepseek_r1
+#
+# Only run test for AWS + generic tests
+# > pytest tests/smoke_tests/test_llm.py --aws
+#
+# Change cloud for generic tests to aws
+# > pytest tests/smoke_tests/test_llm.py --generic-cloud aws
+
+import json
+
+import pytest
+from smoke_tests import smoke_tests_utils
+# TODO(zeping): move them to smoke_tests_utils
+from smoke_tests.test_sky_serve import SERVE_ENDPOINT_WAIT
+from smoke_tests.test_sky_serve import SERVE_WAIT_UNTIL_READY
+from smoke_tests.test_sky_serve import TEARDOWN_SERVICE
+
+import sky
+
+
+@pytest.mark.gcp
+@pytest.mark.parametrize('model_name,gpu_spec', [
+    ('deepseek-ai/DeepSeek-R1-Distill-Llama-8B', 'L4:1'),
+    ('deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'A100-80GB:2'),
+])
+def test_deepseek_r1_vllm(generic_cloud: str, model_name: str, gpu_spec: str):
+    name = smoke_tests_utils.get_cluster_name()
+
+    payload = {
+        "model": model_name,
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant."
+            },
+            {
+                "role": "user",
+                "content": "Who are you?"
+            },
+        ],
+    }
+    json_payload = json.dumps(payload)
+
+    test = smoke_tests_utils.Test(
+        'deepseek_r1_vllm',
+        [
+            f'sky launch -y -d -c {name} --infra {generic_cloud} --env MODEL_NAME={model_name} --gpus {gpu_spec} llm/deepseek-r1-distilled/deepseek-r1-vllm.yaml',
+            smoke_tests_utils.get_cmd_wait_until_cluster_status_contains(
+                cluster_name=name,
+                cluster_status=[sky.ClusterStatus.UP],
+                timeout=300),
+            # Disable SKYPILOT_DEBUG while retrieving the IP to avoid debug logs
+            # contaminating the output of `sky status --ip`, which would break curl.
+            # Use `tail -n 1` to ensure only the pure IP/hostname is captured.
+            (
+                f'ORIGIN_SKYPILOT_DEBUG=$SKYPILOT_DEBUG; export SKYPILOT_DEBUG=0; '
+                f'ENDPOINT=$(sky status --ip {name} | tail -n 1); '
+                f'export SKYPILOT_DEBUG=$ORIGIN_SKYPILOT_DEBUG; '
+                # Wait up to 10 minutes for the model server to be ready
+                f'start_time=$SECONDS; timeout=1800; s=""; '
+                f'while true; do '
+                f'  resp=$(curl -sS --max-time 15 http://$ENDPOINT:8000/v1/chat/completions '
+                f'    -H "Content-Type: application/json" -d \'{json_payload}\' || true); '
+                f'  if echo "$resp" | jq -e ".choices[0].message.content" > /dev/null 2>&1; then '
+                f'    s="$resp"; break; fi; '
+                f'  if (( SECONDS - start_time > timeout )); then '
+                f'    echo "Timeout after $timeout seconds waiting for model server readiness"; echo "$resp"; exit 1; fi; '
+                f'  echo "Waiting for model server to be ready..."; sleep 10; '
+                f'done; '
+                f'echo "$s" | jq .; '
+                f'content=$(echo "$s" | jq -r ".choices[0].message.content"); '
+                f'echo "$content"; '
+                # Accept either opening or closing think tag, or explicit self-identification
+                f'(echo "$content" | grep -qi "<think>" || '
+                f' echo "$content" | grep -qi "</think>" || '
+                f' echo "$content" | grep -qi "I\'m DeepSeek-R1") || '
+                f'(echo "Expected <think> tag or model self-identification not found" && exit 1)'
+            ),
+        ],
+        f'sky down -y {name}',
+    )
+    smoke_tests_utils.run_one_test(test)
+
+
+@pytest.mark.gcp
+def test_sglang_llava_serving(generic_cloud: str):
+    name = smoke_tests_utils.get_cluster_name()
+
+    payload = {
+        "model": "liuhaotian/llava-v1.6-vicuna-7b",
+        "messages": [{
+            "role": "user",
+            "content": [{
+                "type": "text",
+                "text": "Describe this image"
+            }, {
+                "type": "image_url",
+                "image_url": {
+                    "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/examples/frontend_language/quick_start/images/cat.jpeg"
+                }
+            }]
+        }],
+    }
+    json_payload = json.dumps(payload)
+
+    test = smoke_tests_utils.Test(
+        'sglang_llava',
+        [
+            f'sky serve up -n {name} --infra {generic_cloud} --gpus L4:1 -y llm/sglang/llava.yaml',
+            SERVE_WAIT_UNTIL_READY.format(name=name, replica_num=2),
+            (f'{SERVE_ENDPOINT_WAIT.format(name=name)}; '
+             f's=$(curl -sS $endpoint/v1/chat/completions -H "Content-Type: application/json" -d \'{json_payload}\'); '
+             f'echo "$s" | jq .; '
+             f'content=$(echo "$s" | jq -r ".choices[0].message.content"); '
+             f'echo "$content"; '
+             f'echo "$content" | grep -E ".+"'),
+        ],
+        TEARDOWN_SERVICE.format(name=name),
+        env=smoke_tests_utils.LOW_CONTROLLER_RESOURCE_ENV,
+        timeout=40 * 60,
+    )
+    smoke_tests_utils.run_one_test(test)