Merge branch 'main' into deepauto/feat/update-upstream

DeepAuto-AI · Feb 5, 2025 · 883a547 · 883a547
2 parents 5ac2d5f + de55333
commit 883a547
Show file tree

Hide file tree

Showing 124 changed files with 9,195 additions and 1,201 deletions.
diff --git a/.clang-format-ignore b/.clang-format-ignore
@@ -0,0 +1 @@
+sgl-kernel/3rdparty/tensorrt_llm/*
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -13,4 +13,4 @@
 - [ ] Format your code according to the [Code Formatting with Pre-Commit](https://docs.sglang.ai/references/contribution_guide.html#code-formatting-with-pre-commit).
 - [ ] Add unit tests as outlined in the [Running Unit Tests](https://docs.sglang.ai/references/contribution_guide.html#running-unit-tests-adding-to-ci).
 - [ ] Update documentation / docstrings / example tutorials as needed, according to [Writing Documentation](https://docs.sglang.ai/references/contribution_guide.html#writing-documentation-running-docs-ci).
-- [ ] Provide throughput / latency benchmark results and accuracy evaluation results as needed, according to [Benchmark and Profiling](https://docs.sglang.ai/references/benchmark_and_profiling.html).
+- [ ] Provide throughput / latency benchmark results and accuracy evaluation results as needed, according to [Benchmark and Profiling](https://docs.sglang.ai/references/benchmark_and_profiling.html) and [Accuracy Results](https://docs.sglang.ai/references/accuracy_evaluation.html).
diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
@@ -37,7 +37,7 @@ jobs:
 
       - name: Install dependencies
         env:
-          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
+          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.5/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer' }}
         run: |
           bash scripts/ci_install_dependency.sh
 
@@ -60,7 +60,7 @@ jobs:
 
       - name: Install dependencies
         env:
-          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
+          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.5/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer' }}
         run: |
           bash scripts/ci_install_dependency.sh
 
@@ -84,7 +84,7 @@ jobs:
 
       - name: Install dependencies
         env:
-          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
+          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.5/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer' }}
         run: |
           bash scripts/ci_install_dependency.sh
 
@@ -121,7 +121,7 @@ jobs:
 
       - name: Install dependencies
         env:
-          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
+          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.5/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer' }}
         run: |
           bash scripts/ci_install_dependency.sh
 
@@ -165,7 +165,7 @@ jobs:
 
       - name: Install dependencies
         env:
-          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
+          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.5/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer' }}
         run: |
           bash scripts/ci_install_dependency.sh
 
@@ -196,7 +196,7 @@ jobs:
 
       - name: Install dependencies
         env:
-          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
+          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.5/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer' }}
         run: |
           bash scripts/ci_install_dependency.sh
 
@@ -234,7 +234,7 @@ jobs:
 
       - name: Install dependencies
         env:
-          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
+          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.5/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer' }}
         run: |
           bash scripts/ci_install_dependency.sh
 
@@ -258,7 +258,7 @@ jobs:
 
       - name: Install dependencies
         env:
-          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
+          FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.5/flashinfer' || 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer' }}
         run: |
           bash scripts/ci_install_dependency.sh
 

diff --git a/.github/workflows/release-docker-amd.yml b/.github/workflows/release-docker-amd.yml
@@ -14,7 +14,7 @@ jobs:
     environment: 'prod'
     strategy:
       matrix:
-        rocm_version: ['6.2.0']
+        rocm_version: ['6.3.0']
         build_type: ['all', 'srt']
     steps:
       - name: Checkout repository
@@ -41,8 +41,8 @@ jobs:
         run: |
           version=$(cat python/sglang/version.py | cut -d'"' -f2)
 
-          if [ "${{ matrix.rocm_version }}" = "6.2.0" ]; then
-            rocm_tag="rocm620"
+          if [ "${{ matrix.rocm_version }}" = "6.3.0" ]; then
+            rocm_tag="rocm630"
           else
             echo "Unsupported ROCm version"
             exit 1

diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml
@@ -14,7 +14,7 @@ jobs:
     environment: 'prod'
     strategy:
       matrix:
-        cuda_version: ['11.8.0', '12.1.1', '12.4.1']
+        cuda_version: ['11.8.0', '12.1.1', '12.4.1', '12.5.1']
         build_type: ['all', 'srt']
     steps:
       - name: Delete huge unnecessary tools folder
@@ -39,6 +39,8 @@ jobs:
             cuda_tag="cu121"
           elif [ "${{ matrix.cuda_version }}" = "12.4.1" ]; then
             cuda_tag="cu124"
+          elif [ "${{ matrix.cuda_version }}" = "12.5.1" ]; then
+            cuda_tag="cu125"
           else
             echo "Unsupported CUDA version"
             exit 1
@@ -58,7 +60,7 @@ jobs:
           docker build . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache
           docker push lmsysorg/sglang:${tag}${tag_suffix}
 
-          if [ "${{ matrix.cuda_version }}" = "12.4.1" ]; then
+          if [ "${{ matrix.cuda_version }}" = "12.5.1" ]; then
             docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix}
             docker push lmsysorg/sglang:latest${tag_suffix}
           fi
diff --git a/README.md b/README.md
@@ -101,7 +101,11 @@ Learn more in the release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
 [Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
 
 ## Adoption and Sponsorship
-The project is supported by (alphabetically): AMD, Baseten, Cursor, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
+The project is supported by (alphabetically): AMD, Atlas Cloud, Baseten, Cursor, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS CORP, Meituan, Nebius, Novita AI, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
+
+## Contact Us
+
+For enterprises interested in adopting or deploying SGLang at scale, including technical consulting, sponsorship opportunities, or partnership inquiries, please contact us at [email protected].
 
 ## Acknowledgment and Citation
 We learned the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql). Please cite the paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.