-
Notifications
You must be signed in to change notification settings - Fork 917
Update gcp gpu image #6417
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
zpoint
wants to merge
22
commits into
skypilot-org:master
Choose a base branch
from
zpoint:dev/zeping/gcp_images
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Update gcp gpu image #6417
Changes from all commits
Commits
Show all changes
22 commits
Select commit
Hold shift + click to select a range
0a8eb0b
gcp build
zpoint d3fe9cb
Merge branch 'master' into dev/zeping/gcp_images
zpoint b3215d5
fix script
zpoint 0ce7921
test
zpoint 32c5ec9
fix failure
zpoint ce588af
specific version
zpoint 6e88ae2
Merge branch 'master' into dev/zeping/gcp_images
zpoint 5be79ad
sky test llm
zpoint d98adcf
comment
zpoint c7f6238
fix test failure
zpoint 37651ff
fix failure
zpoint bc60ca6
longer timeout
zpoint 8deb70e
longer timeout
zpoint d0836b3
fix test
zpoint 948239c
fix test
zpoint ccc9838
revert catalog debug change
zpoint 7f5173c
merge master
zpoint 3bc95e2
renmae to test_examples
zpoint 9272b2d
restore cuda change
zpoint 87a5f72
resolve comment and rebuild
zpoint 53c18ef
debug
zpoint b12166c
comment
zpoint File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,6 @@ | |
| import os | ||
| import pathlib | ||
| import subprocess | ||
| import sys | ||
| import tempfile | ||
| import textwrap | ||
| import time | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| # Smoke tests for SkyPilot for basic functionality | ||
| # Default options are set in pyproject.toml | ||
| # Example usage: | ||
| # Run all tests except for AWS and Lambda Cloud | ||
| # > pytest tests/smoke_tests/test_llm.py | ||
| # | ||
| # Terminate failed clusters after test finishes | ||
| # > pytest tests/smoke_tests/test_llm.py --terminate-on-failure | ||
| # | ||
| # Re-run last failed tests | ||
| # > pytest --lf | ||
| # | ||
| # Run one of the smoke tests | ||
| # > pytest tests/smoke_tests/test_llm.py::test_deepseek_r1 | ||
| # | ||
| # Only run test for AWS + generic tests | ||
| # > pytest tests/smoke_tests/test_llm.py --aws | ||
| # | ||
| # Change cloud for generic tests to aws | ||
| # > pytest tests/smoke_tests/test_llm.py --generic-cloud aws | ||
|
|
||
| import json | ||
|
|
||
| import pytest | ||
| from smoke_tests import smoke_tests_utils | ||
| # TODO(zeping): move them to smoke_tests_utils | ||
| from smoke_tests.test_sky_serve import SERVE_ENDPOINT_WAIT | ||
| from smoke_tests.test_sky_serve import SERVE_WAIT_UNTIL_READY | ||
| from smoke_tests.test_sky_serve import TEARDOWN_SERVICE | ||
|
|
||
| import sky | ||
|
|
||
|
|
||
| @pytest.mark.gcp | ||
| @pytest.mark.parametrize('model_name,gpu_spec', [ | ||
| ('deepseek-ai/DeepSeek-R1-Distill-Llama-8B', 'L4:1'), | ||
| ('deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'A100-80GB:2'), | ||
| ]) | ||
| def test_deepseek_r1_vllm(generic_cloud: str, model_name: str, gpu_spec: str): | ||
| name = smoke_tests_utils.get_cluster_name() | ||
|
|
||
| payload = { | ||
| "model": model_name, | ||
| "messages": [ | ||
| { | ||
| "role": "system", | ||
| "content": "You are a helpful assistant." | ||
| }, | ||
| { | ||
| "role": "user", | ||
| "content": "Who are you?" | ||
| }, | ||
| ], | ||
| } | ||
| json_payload = json.dumps(payload) | ||
|
|
||
| test = smoke_tests_utils.Test( | ||
| 'deepseek_r1_vllm', | ||
| [ | ||
| f'sky launch -y -d -c {name} --infra {generic_cloud} --env MODEL_NAME={model_name} --gpus {gpu_spec} llm/deepseek-r1-distilled/deepseek-r1-vllm.yaml', | ||
| smoke_tests_utils.get_cmd_wait_until_cluster_status_contains( | ||
| cluster_name=name, | ||
| cluster_status=[sky.ClusterStatus.UP], | ||
| timeout=300), | ||
| # Disable SKYPILOT_DEBUG while retrieving the IP to avoid debug logs | ||
| # contaminating the output of `sky status --ip`, which would break curl. | ||
| # Use `tail -n 1` to ensure only the pure IP/hostname is captured. | ||
| ( | ||
| f'ORIGIN_SKYPILOT_DEBUG=$SKYPILOT_DEBUG; export SKYPILOT_DEBUG=0; ' | ||
| f'ENDPOINT=$(sky status --ip {name} | tail -n 1); ' | ||
| f'export SKYPILOT_DEBUG=$ORIGIN_SKYPILOT_DEBUG; ' | ||
| # Wait up to 10 minutes for the model server to be ready | ||
| f'start_time=$SECONDS; timeout=1800; s=""; ' | ||
| f'while true; do ' | ||
| f' resp=$(curl -sS --max-time 15 http://$ENDPOINT:8000/v1/chat/completions ' | ||
| f' -H "Content-Type: application/json" -d \'{json_payload}\' || true); ' | ||
| f' if echo "$resp" | jq -e ".choices[0].message.content" > /dev/null 2>&1; then ' | ||
| f' s="$resp"; break; fi; ' | ||
| f' if (( SECONDS - start_time > timeout )); then ' | ||
| f' echo "Timeout after $timeout seconds waiting for model server readiness"; echo "$resp"; exit 1; fi; ' | ||
| f' echo "Waiting for model server to be ready..."; sleep 10; ' | ||
| f'done; ' | ||
| f'echo "$s" | jq .; ' | ||
| f'content=$(echo "$s" | jq -r ".choices[0].message.content"); ' | ||
| f'echo "$content"; ' | ||
| # Accept either opening or closing think tag, or explicit self-identification | ||
| f'(echo "$content" | grep -qi "<think>" || ' | ||
| f' echo "$content" | grep -qi "</think>" || ' | ||
| f' echo "$content" | grep -qi "I\'m DeepSeek-R1") || ' | ||
| f'(echo "Expected <think> tag or model self-identification not found" && exit 1)' | ||
| ), | ||
| ], | ||
| f'sky down -y {name}', | ||
| ) | ||
| smoke_tests_utils.run_one_test(test) | ||
|
|
||
|
|
||
| @pytest.mark.gcp | ||
| def test_sglang_llava_serving(generic_cloud: str): | ||
| name = smoke_tests_utils.get_cluster_name() | ||
|
|
||
| payload = { | ||
| "model": "liuhaotian/llava-v1.6-vicuna-7b", | ||
| "messages": [{ | ||
| "role": "user", | ||
| "content": [{ | ||
| "type": "text", | ||
| "text": "Describe this image" | ||
| }, { | ||
| "type": "image_url", | ||
| "image_url": { | ||
| "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/examples/frontend_language/quick_start/images/cat.jpeg" | ||
| } | ||
| }] | ||
| }], | ||
| } | ||
| json_payload = json.dumps(payload) | ||
|
|
||
| test = smoke_tests_utils.Test( | ||
| 'sglang_llava', | ||
| [ | ||
| f'sky serve up -n {name} --infra {generic_cloud} --gpus L4:1 -y llm/sglang/llava.yaml', | ||
| SERVE_WAIT_UNTIL_READY.format(name=name, replica_num=2), | ||
| (f'{SERVE_ENDPOINT_WAIT.format(name=name)}; ' | ||
| f's=$(curl -sS $endpoint/v1/chat/completions -H "Content-Type: application/json" -d \'{json_payload}\'); ' | ||
| f'echo "$s" | jq .; ' | ||
| f'content=$(echo "$s" | jq -r ".choices[0].message.content"); ' | ||
| f'echo "$content"; ' | ||
| f'echo "$content" | grep -E ".+"'), | ||
| ], | ||
| TEARDOWN_SERVICE.format(name=name), | ||
| env=smoke_tests_utils.LOW_CONTROLLER_RESOURCE_ENV, | ||
| timeout=40 * 60, | ||
| ) | ||
| smoke_tests_utils.run_one_test(test) | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.