@@ -29,7 +29,7 @@ concurrency:
29
29
jobs :
30
30
31
31
unit-test-frontend :
32
- if : github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
32
+ if : ( github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
33
33
runs-on : 1-gpu-runner
34
34
steps :
35
35
- name : Checkout code
@@ -48,11 +48,12 @@ jobs:
48
48
python3 run_suite.py --suite per-commit
49
49
50
50
unit-test-backend-1-gpu :
51
- if : github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
51
+ if : ( github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
52
52
runs-on : 1-gpu-runner
53
53
strategy :
54
+ fail-fast : false
54
55
matrix :
55
- range : [0-6, 6-16, 16-23, 23-30, 30-38, 38 -100]
56
+ range : [0-6, 6-15, 15-22, 22-32, 32-40, 40 -100]
56
57
steps :
57
58
- name : Checkout code
58
59
uses : actions/checkout@v3
75
76
76
77
77
78
unit-test-backend-2-gpu :
78
- if : github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
79
+ if : ( github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
79
80
runs-on : 2-gpu-runner
80
81
steps :
81
82
- name : Checkout code
@@ -112,7 +113,7 @@ jobs:
112
113
python3 test_moe_ep.py
113
114
114
115
performance-test-1-gpu-part-1 :
115
- if : github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
116
+ if : ( github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
116
117
runs-on : 1-gpu-runner
117
118
steps :
118
119
- name : Checkout code
@@ -128,7 +129,7 @@ jobs:
128
129
timeout-minutes : 10
129
130
run : |
130
131
cd test/srt
131
- python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_default
132
+ python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1
132
133
133
134
- name : Benchmark online latency
134
135
timeout-minutes : 10
@@ -148,8 +149,15 @@ jobs:
148
149
cd test/srt
149
150
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
150
151
152
+ - name : Benchmark online latency (EAGLE)
153
+ timeout-minutes : 10
154
+ run : |
155
+ cd test/srt
156
+ python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle
157
+
158
+
151
159
performance-test-1-gpu-part-2 :
152
- if : github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
160
+ if : ( github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
153
161
runs-on : 1-gpu-runner
154
162
steps :
155
163
- name : Checkout code
@@ -180,7 +188,7 @@ jobs:
180
188
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
181
189
182
190
performance-test-2-gpu :
183
- if : github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
191
+ if : ( github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
184
192
runs-on : 2-gpu-runner
185
193
steps :
186
194
- name : Checkout code
@@ -196,7 +204,13 @@ jobs:
196
204
timeout-minutes : 10
197
205
run : |
198
206
cd test/srt
199
- python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_default
207
+ python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
208
+
209
+ - name : Benchmark single latency + torch.compile (TP=2)
210
+ timeout-minutes : 10
211
+ run : |
212
+ cd test/srt
213
+ python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
200
214
201
215
- name : Benchmark offline throughput (TP=2)
202
216
timeout-minutes : 10
@@ -210,8 +224,9 @@ jobs:
210
224
cd test/srt
211
225
python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
212
226
227
+
213
228
accuracy-test-1-gpu :
214
- if : github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
229
+ if : ( github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
215
230
runs-on : 1-gpu-runner
216
231
steps :
217
232
- name : Checkout code
@@ -235,7 +250,7 @@ jobs:
235
250
236
251
237
252
accuracy-test-2-gpu :
238
- if : github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
253
+ if : ( github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false
239
254
runs-on : 2-gpu-runner
240
255
steps :
241
256
- name : Checkout code
0 commit comments