Skip to content

Commit

Permalink
Merge pull request #4 from DeepAuto-AI/deepauto/feat/update_main
Browse files Browse the repository at this point in the history
Merge upstream commits
  • Loading branch information
kbumsik authored Jan 28, 2025
2 parents 0005b7e + 1849c48 commit f5f5e89
Show file tree
Hide file tree
Showing 225 changed files with 12,878 additions and 3,685 deletions.
35 changes: 35 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
From lmsysorg/sglang:dev

# Create non-root user with specified UID and GID
# NOTE: Replace with your own UID and GID. This is a workaround from https://github.com/microsoft/vscode-remote-release/issues/49#issuecomment-489060908.
ARG HOST_UID=1003
ARG HOST_GID=1003
RUN groupadd -g $HOST_GID devuser && \
useradd -m -u $HOST_UID -g $HOST_GID -s /bin/zsh devuser

# Give devuser sudo access
RUN apt-get update && apt-get install -y sudo && \
echo "devuser ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/devuser && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean

# Set up oh-my-zsh for devuser
RUN cp -r /root/.oh-my-zsh /home/devuser/.oh-my-zsh && \
cp /root/.zshrc /home/devuser/.zshrc && \
cp /root/.vimrc /home/devuser/.vimrc && \
cp /root/.tmux.conf /home/devuser/.tmux.conf && \
sed -i 's|/root/.oh-my-zsh|/home/devuser/.oh-my-zsh|g' /home/devuser/.zshrc && \
chown -R devuser:devuser /home/devuser/

# Set workspace directory and ownership
WORKDIR /sgl-workspace/sglang
RUN chown -R devuser:devuser /sgl-workspace

# Switch to devuser
USER devuser

# Install uv
RUN curl -LsSf https://astral.sh/uv/install.sh | sh

# Install rust
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
10 changes: 7 additions & 3 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
{
"name": "sglang",
"build": {
"dockerfile": "../docker/Dockerfile.dev"
"dockerfile": "Dockerfile"
},
"remoteUser": "devuser",
"customizations": {
"vscode": {
"extensions": [
Expand All @@ -15,6 +16,9 @@
]
}
},
"workspaceFolder": "/sgl-workspace/sglang",
"forwardPorts": []
"forwardPorts": [],
"runArgs": [
"--gpus",
"all"
]
}
2 changes: 1 addition & 1 deletion .github/workflows/execute-notebook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
python -m ipykernel install --user --name python3 --display-name "Python 3"
- name: Execute notebooks
timeout-minutes: 30
timeout-minutes: 40
run: |
cd docs
make clean
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pr-test-rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
cd sgl-router/
cargo test
e2e-rust:
e2e-python:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 2-gpu-runner
steps:
Expand All @@ -65,7 +65,7 @@ jobs:
python3 run_suite.py
finish:
needs: [unit-test-rust, e2e-rust]
needs: [unit-test-rust, e2e-python]
runs-on: ubuntu-latest
steps:
- name: Finish
Expand Down
71 changes: 62 additions & 9 deletions .github/workflows/pr-test-sgl-kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,77 @@ concurrency:
cancel-in-progress: true

jobs:
unit-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
lint:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Install dependencies
- name: Check clang-format
uses: DoozyX/[email protected]
with:
source: sgl-kernel
extensions: h,c,cpp,hpp,cu,cuh,cc
clangFormatVersion: 16
style: file

build-wheels:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: sgl-kernel-build-node
strategy:
matrix:
python-version: ['3.9']
cuda-version: ['12.4']

steps:
- name: Cleanup
run: |
bash scripts/ci_install_dependency.sh
sudo rm -rf $GITHUB_WORKSPACE/* || true
- uses: actions/checkout@v4
with:
submodules: 'recursive'

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: |
cd sgl-kernel
git submodule update --init --recursive
pip3 install -e . --force-reinstall
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*

unit-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
needs: build-wheels
runs-on: 1-gpu-runner
steps:
- uses: actions/checkout@v4

- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*

- name: Install
run: |
pip3 install torch==2.5.1 && pip3 install pytest && pip3 install vllm==0.6.4.post1
pip3 uninstall sgl-kernel -y || true
pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
pip3 list | grep sgl-kernel
- name: Run test
timeout-minutes: 10
timeout-minutes: 30
run: |
cd sgl-kernel
find tests -name "test_*.py" | xargs -n 1 python3
Expand All @@ -43,7 +96,7 @@ jobs:
pip3 uninstall sgl-kernel -y
finish:
needs: [unit-test]
needs: [unit-test, lint]
runs-on: ubuntu-latest
steps:
- name: Finish
Expand Down
37 changes: 26 additions & 11 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ concurrency:
jobs:

unit-test-frontend:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
runs-on: 1-gpu-runner
steps:
- name: Checkout code
Expand All @@ -48,11 +48,12 @@ jobs:
python3 run_suite.py --suite per-commit
unit-test-backend-1-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
runs-on: 1-gpu-runner
strategy:
fail-fast: false
matrix:
range: [0-6, 6-16, 16-23, 23-30, 30-38, 38-100]
range: [0-6, 6-15, 15-22, 22-32, 32-40, 40-100]
steps:
- name: Checkout code
uses: actions/checkout@v3
Expand All @@ -75,7 +76,7 @@ jobs:
unit-test-backend-2-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
runs-on: 2-gpu-runner
steps:
- name: Checkout code
Expand Down Expand Up @@ -112,7 +113,7 @@ jobs:
python3 test_moe_ep.py
performance-test-1-gpu-part-1:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
runs-on: 1-gpu-runner
steps:
- name: Checkout code
Expand All @@ -128,7 +129,7 @@ jobs:
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_default
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1
- name: Benchmark online latency
timeout-minutes: 10
Expand All @@ -148,8 +149,15 @@ jobs:
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
- name: Benchmark online latency (EAGLE)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle
performance-test-1-gpu-part-2:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
runs-on: 1-gpu-runner
steps:
- name: Checkout code
Expand Down Expand Up @@ -180,7 +188,7 @@ jobs:
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
performance-test-2-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
runs-on: 2-gpu-runner
steps:
- name: Checkout code
Expand All @@ -196,7 +204,13 @@ jobs:
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_default
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
- name: Benchmark single latency + torch.compile (TP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
- name: Benchmark offline throughput (TP=2)
timeout-minutes: 10
Expand All @@ -210,8 +224,9 @@ jobs:
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
accuracy-test-1-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
runs-on: 1-gpu-runner
steps:
- name: Checkout code
Expand All @@ -235,7 +250,7 @@ jobs:
accuracy-test-2-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
runs-on: 2-gpu-runner
steps:
- name: Checkout code
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/release-pypi-kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
branches:
- main
paths:
- sgl-kernel/pyproject.toml
- sgl-kernel/version.py
workflow_dispatch:

concurrency:
Expand All @@ -14,11 +14,12 @@ concurrency:

jobs:
build-wheels:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
cuda-version: ['12.1']
python-version: ['3.9']
cuda-version: ['12.4']

steps:
- uses: actions/checkout@v4
Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/release-pypi-router.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
branches:
- main
paths:
- sglang-router/pyproject.toml
- sgl-router/pyproject.toml
workflow_dispatch:

jobs:
Expand All @@ -26,9 +26,9 @@ jobs:
with:
path: sglang-repo

- name: Move sglang-router folder to root and delete sglang-repo
- name: Move sgl-router folder to root and delete sglang-repo
run: |
mv sglang-repo/sglang-router/* .
mv sglang-repo/sgl-router/* .
rm -rf sglang-repo
ls -alt
Expand Down Expand Up @@ -69,9 +69,9 @@ jobs:
with:
path: sglang-repo

- name: Move sglang-router folder to root, copy the license file, and delete sglang-repo
- name: Move sgl-router folder to root, copy the license file, and delete sglang-repo
run: |
mv sglang-repo/sglang-router/* .
mv sglang-repo/sgl-router/* .
mv sglang-repo/LICENSE .
rm -rf sglang-repo
ls -alt
Expand All @@ -84,6 +84,7 @@ jobs:
- name: Build SDist
run: |
pip install build
python -m pip install -U packaging
python -m build --sdist
- uses: actions/upload-artifact@v4
Expand Down
Loading

0 comments on commit f5f5e89

Please sign in to comment.