DeepAuto-AI
diff --git a/‎.devcontainer/Dockerfile
Lines changed: 35 additions & 0 deletions b/‎.devcontainer/Dockerfile
Lines changed: 35 additions & 0 deletions
diff --git a/‎.devcontainer/devcontainer.json
Lines changed: 7 additions & 3 deletions b/‎.devcontainer/devcontainer.json
Lines changed: 7 additions & 3 deletions
diff --git a/‎.github/workflows/execute-notebook.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/execute-notebook.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pr-test-rust.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pr-test-rust.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pr-test-sgl-kernel.yml
Lines changed: 62 additions & 9 deletions b/‎.github/workflows/pr-test-sgl-kernel.yml
Lines changed: 62 additions & 9 deletions
diff --git a/‎.github/workflows/pr-test.yml
Lines changed: 26 additions & 11 deletions b/‎.github/workflows/pr-test.yml
Lines changed: 26 additions & 11 deletions
diff --git a/‎.github/workflows/release-pypi-kernel.yml
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/release-pypi-kernel.yml
Lines changed: 4 additions & 3 deletions
diff --git a/‎.github/workflows/release-pypi-router.yml
Lines changed: 6 additions & 5 deletions b/‎.github/workflows/release-pypi-router.yml
Lines changed: 6 additions & 5 deletions
@@ -0,0 +1,35 @@
+From lmsysorg/sglang:dev
+
+# Create non-root user with specified UID and GID
+# NOTE: Replace with your own UID and GID. This is a workaround from https://github.com/microsoft/vscode-remote-release/issues/49#issuecomment-489060908.
+ARG HOST_UID=1003
+ARG HOST_GID=1003
+RUN groupadd -g $HOST_GID devuser && \
+    useradd -m -u $HOST_UID -g $HOST_GID -s /bin/zsh devuser
+
+# Give devuser sudo access
+RUN apt-get update && apt-get install -y sudo && \
+    echo "devuser ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/devuser && \
+    rm -rf /var/lib/apt/lists/* && \
+    apt-get clean
+
+# Set up oh-my-zsh for devuser
+RUN cp -r /root/.oh-my-zsh /home/devuser/.oh-my-zsh && \
+    cp /root/.zshrc /home/devuser/.zshrc && \
+    cp /root/.vimrc /home/devuser/.vimrc && \
+    cp /root/.tmux.conf /home/devuser/.tmux.conf && \
+    sed -i 's|/root/.oh-my-zsh|/home/devuser/.oh-my-zsh|g' /home/devuser/.zshrc && \
+    chown -R devuser:devuser /home/devuser/
+
+# Set workspace directory and ownership
+WORKDIR /sgl-workspace/sglang
+RUN chown -R devuser:devuser /sgl-workspace
+
+# Switch to devuser
+USER devuser
+
+# Install uv
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Install rust
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
@@ -1,8 +1,9 @@
 {
     "name": "sglang",
     "build": {
-        "dockerfile": "../docker/Dockerfile.dev"
+        "dockerfile": "Dockerfile"
     },
+    "remoteUser": "devuser",
     "customizations": {
         "vscode": {
             "extensions": [
@@ -15,6 +16,9 @@
             ]
         }
     },
-    "workspaceFolder": "/sgl-workspace/sglang",
-    "forwardPorts": []
+    "forwardPorts": [],
+    "runArgs": [
+        "--gpus",
+        "all"
+    ]
 }
@@ -42,7 +42,7 @@ jobs:
           python -m ipykernel install --user --name python3 --display-name "Python 3"
 
       - name: Execute notebooks
-        timeout-minutes: 30
+        timeout-minutes: 40
         run: |
           cd docs
           make clean
 
@@ -40,7 +40,7 @@ jobs:
           cd sgl-router/
           cargo test
 
-  e2e-rust:
+  e2e-python:
     if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
     runs-on: 2-gpu-runner
     steps:
@@ -65,7 +65,7 @@ jobs:
           python3 run_suite.py
 
   finish:
-    needs: [unit-test-rust, e2e-rust]
+    needs: [unit-test-rust, e2e-python]
     runs-on: ubuntu-latest
     steps:
       - name: Finish
 
@@ -16,24 +16,77 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  unit-test:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: 1-gpu-runner
+  lint:
+    runs-on: ubuntu-latest
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
 
-      - name: Install dependencies
+      - name: Check clang-format
+        uses: DoozyX/[email protected]
+        with:
+          source: sgl-kernel
+          extensions: h,c,cpp,hpp,cu,cuh,cc
+          clangFormatVersion: 16
+          style: file
+
+  build-wheels:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: sgl-kernel-build-node
+    strategy:
+      matrix:
+        python-version: ['3.9']
+        cuda-version: ['12.4']
+
+    steps:
+      - name: Cleanup
         run: |
-          bash scripts/ci_install_dependency.sh
+          sudo rm -rf $GITHUB_WORKSPACE/* || true
+
+      - uses: actions/checkout@v4
+        with:
+          submodules: 'recursive'
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
 
+      - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
+        run: |
           cd sgl-kernel
-          git submodule update --init --recursive
-          pip3 install -e . --force-reinstall
+          chmod +x ./build.sh
+          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
+          path: sgl-kernel/dist/*
+
+  unit-test:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    needs: build-wheels
+    runs-on: 1-gpu-runner
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: sgl-kernel/dist/
+          merge-multiple: true
+          pattern: wheel-*
+
+      - name: Install
+        run: |
+          pip3 install torch==2.5.1 && pip3 install pytest && pip3 install vllm==0.6.4.post1
+          pip3 uninstall sgl-kernel -y || true
+          pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
           pip3 list | grep sgl-kernel
 
       - name: Run test
-        timeout-minutes: 10
+        timeout-minutes: 30
         run: |
           cd sgl-kernel
           find tests -name "test_*.py" | xargs -n 1 python3
@@ -43,7 +96,7 @@ jobs:
           pip3 uninstall sgl-kernel -y
 
   finish:
-    needs: [unit-test]
+    needs: [unit-test, lint]
     runs-on: ubuntu-latest
     steps:
       - name: Finish
 
@@ -29,7 +29,7 @@ concurrency:
 jobs:
 
   unit-test-frontend:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
@@ -48,11 +48,12 @@ jobs:
           python3 run_suite.py --suite per-commit
 
   unit-test-backend-1-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
     runs-on: 1-gpu-runner
     strategy:
+      fail-fast: false
       matrix:
-        range: [0-6, 6-16, 16-23, 23-30, 30-38, 38-100]
+        range: [0-6, 6-15, 15-22, 22-32, 32-40, 40-100]
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
@@ -75,7 +76,7 @@ jobs:
 
 
   unit-test-backend-2-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
     runs-on: 2-gpu-runner
     steps:
       - name: Checkout code
@@ -112,7 +113,7 @@ jobs:
           python3 test_moe_ep.py
 
   performance-test-1-gpu-part-1:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
@@ -128,7 +129,7 @@ jobs:
         timeout-minutes: 10
         run: |
           cd test/srt
-          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_default
+          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1
 
       - name: Benchmark online latency
         timeout-minutes: 10
@@ -148,8 +149,15 @@ jobs:
           cd test/srt
           python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
 
+      - name: Benchmark online latency (EAGLE)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle
+
+
   performance-test-1-gpu-part-2:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
@@ -180,7 +188,7 @@ jobs:
           python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
 
   performance-test-2-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
     runs-on: 2-gpu-runner
     steps:
       - name: Checkout code
@@ -196,7 +204,13 @@ jobs:
         timeout-minutes: 10
         run: |
           cd test/srt
-          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_default
+          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
+
+      - name: Benchmark single latency + torch.compile (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
 
       - name: Benchmark offline throughput (TP=2)
         timeout-minutes: 10
@@ -210,8 +224,9 @@ jobs:
           cd test/srt
           python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
 
+
   accuracy-test-1-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
@@ -235,7 +250,7 @@ jobs:
 
 
   accuracy-test-2-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
     runs-on: 2-gpu-runner
     steps:
       - name: Checkout code
 
@@ -5,7 +5,7 @@ on:
     branches:
       - main
     paths:
-      - sgl-kernel/pyproject.toml
+      - sgl-kernel/version.py
   workflow_dispatch:
 
 concurrency:
@@ -14,11 +14,12 @@ concurrency:
 
 jobs:
   build-wheels:
+    if: github.repository == 'sgl-project/sglang'
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
-        cuda-version: ['12.1']
+        python-version: ['3.9']
+        cuda-version: ['12.4']
 
     steps:
     - uses: actions/checkout@v4
 
@@ -7,7 +7,7 @@ on:
     branches:
       - main
     paths:
-      - sglang-router/pyproject.toml
+      - sgl-router/pyproject.toml
   workflow_dispatch:
 
 jobs:
@@ -26,9 +26,9 @@ jobs:
         with:
           path: sglang-repo
 
-      - name: Move sglang-router folder to root and delete sglang-repo
+      - name: Move sgl-router folder to root and delete sglang-repo
         run: |
-          mv sglang-repo/sglang-router/* .
+          mv sglang-repo/sgl-router/* .
           rm -rf sglang-repo
           ls -alt
 
@@ -69,9 +69,9 @@ jobs:
         with:
           path: sglang-repo
 
-      - name: Move sglang-router folder to root, copy the license file, and delete sglang-repo
+      - name: Move sgl-router folder to root, copy the license file, and delete sglang-repo
         run: |
-          mv sglang-repo/sglang-router/* .
+          mv sglang-repo/sgl-router/* .
           mv sglang-repo/LICENSE .
           rm -rf sglang-repo
           ls -alt
@@ -84,6 +84,7 @@ jobs:
       - name: Build SDist
         run: |
           pip install build
+          python -m pip install -U packaging
           python -m build --sdist
 
       - uses: actions/upload-artifact@v4
Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,9 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "sglang",`
`3`	`3`	`"build": {`
`4`		`- "dockerfile": "../docker/Dockerfile.dev"`
	`4`	`+ "dockerfile": "Dockerfile"`
`5`	`5`	`},`
	`6`	`+ "remoteUser": "devuser",`
`6`	`7`	`"customizations": {`
`7`	`8`	`"vscode": {`
`8`	`9`	`"extensions": [`
`@@ -15,6 +16,9 @@`
`15`	`16`	`]`
`16`	`17`	`}`
`17`	`18`	`},`
`18`		`- "workspaceFolder": "/sgl-workspace/sglang",`
`19`		`- "forwardPorts": []`
	`19`	`+ "forwardPorts": [],`
	`20`	`+ "runArgs": [`
	`21`	`+ "--gpus",`
	`22`	`+ "all"`
	`23`	`+ ]`
`20`	`24`	`}`