@@ -68,34 +68,23 @@ jobs:
6868 pip install -r requirements-dev.txt
6969 pip install -v -e .
7070
71- - name : Run vllm-project/vllm-ascend test (non triton)
72- env :
73- VLLM_WORKER_MULTIPROC_METHOD : spawn
74- PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
75- if : ${{ inputs.type == 'full' }}
76- run : |
77- pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
78- pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
79-
8071 - name : Install Ascend toolkit & triton_ascend
8172 shell : bash -l {0}
8273 run : |
83- BISHENG_NAME="Ascend-BiSheng-toolkit_aarch64_20251225 .run"
74+ BISHENG_NAME="Ascend-BiSheng-toolkit_aarch64_20260105 .run"
8475 BISHENG_URL="https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${BISHENG_NAME}"
8576 wget -O "${BISHENG_NAME}" "${BISHENG_URL}" && chmod a+x "${BISHENG_NAME}" && "./${BISHENG_NAME}" --install && rm "${BISHENG_NAME}"
86- source /usr/local/Ascend/8.5.0/bisheng_toolkit/set_env.sh
87- python3 -m pip install " https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend- 3.2.0.dev20251229-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
77+ export PATH= /usr/local/Ascend/tools/bishengir/bin:$PATH
78+ python3 -m pip install -i https://test.pypi.org/simple/ triton-ascend== 3.2.0.dev20260105
8879
8980 - name : Run vllm-project/vllm-ascend test
9081 env :
9182 PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
9283 VLLM_WORKER_MULTIPROC_METHOD : spawn
9384 if : ${{ inputs.type == 'light' }}
9485 run : |
95- # pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
96- # pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
97- pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
98- pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_classification.py::test_qwen_pooling_classify_correctness
86+ pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py::test_piecewise_res_consistency
87+ pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py::test_qwen3_w8a8_quant
9988
10089 - name : Run e2e test
10190 env :
@@ -105,30 +94,40 @@ jobs:
10594 run : |
10695 # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
10796 # the test separately.
108-
109- pytest -sv --durations=0 tests/e2e/nightly/ops/triton/
110- pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py
97+ # basic
11198 pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
99+ pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
112100 pytest -sv --durations=0 tests/e2e/singlecard/test_async_scheduling.py
113- pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
101+ pytest -sv --durations=0 tests/e2e/singlecard/test_batch_invariant.py
102+ pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
103+ pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py
104+ pytest -sv --durations=0 tests/e2e/singlecard/test_cpu_offloading.py
105+ # xgrammar has parameter mismatching bug, please follows: https://github.com/vllm-project/vllm-ascend/issues/5524
106+ # pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
114107 # torch 2.8 doesn't work with lora, fix me
115- #pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py
108+ pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py
109+ pytest -sv --durations=0 tests/e2e/singlecard/test_models.py
110+ pytest -sv --durations=0 tests/e2e/singlecard/test_multistream_overlap_shared_expert.py
116111 pytest -sv --durations=0 tests/e2e/singlecard/test_profile_execute_duration.py
117112 pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
118113 pytest -sv --durations=0 tests/e2e/singlecard/test_sampler.py
119114 pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py
120115 pytest -sv --durations=0 tests/e2e/singlecard/test_xlite.py
121- pytest -sv --durations=0 tests/e2e/singlecard/test_models.py
122- pytest -sv --durations=0 tests/e2e/singlecard/pooling/
116+
117+ # compile
123118 pytest -sv --durations=0 tests/e2e/singlecard/compile/test_norm_quant_fusion.py
124- pytest -sv --durations=0 tests/e2e/singlecard/test_multistream_overlap_shared_expert.py
125- pytest -sv --durations=0 tests/e2e/singlecard/test_cpu_offloading.py
119+
120+ # model_runner_v2
121+ pytest -sv --durations=0 tests/e2e/singlecard/model_runner_v2/test_basic.py
126122
127- # ------------------------------------ v1 spec decode test ------------------------------------ #
128- pytest -sv --durations=0 tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
129- pytest -sv --durations=0 tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
123+ # pooling
124+ pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_classification.py
125+ pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_embedding.py
126+ pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_scoring.py
130127
131- pytest -sv --durations=0 tests/e2e/singlecard/model_runner_v2/test_basic.py
128+ # spec_decode
129+ pytest -sv --durations=0 tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py
130+ pytest -sv --durations=0 tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py
132131
133132 e2e-2-cards :
134133 name : multicard-2
@@ -189,52 +188,55 @@ jobs:
189188 env :
190189 VLLM_WORKER_MULTIPROC_METHOD : spawn
191190 run : |
192- pytest -sv --durations=0 tests/e2e/multicard/test_aclgraph_capture_replay.py
191+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/ test_aclgraph_capture_replay.py
193192
194193 - name : Install Ascend toolkit & triton_ascend
195194 shell : bash -l {0}
196195 run : |
197- BISHENG_NAME="Ascend-BiSheng-toolkit_aarch64_20251225 .run"
196+ BISHENG_NAME="Ascend-BiSheng-toolkit_aarch64_20260105 .run"
198197 BISHENG_URL="https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${BISHENG_NAME}"
199198 wget -O "${BISHENG_NAME}" "${BISHENG_URL}" && chmod a+x "${BISHENG_NAME}" && "./${BISHENG_NAME}" --install && rm "${BISHENG_NAME}"
200- source /usr/local/Ascend/8.5.0/bisheng_toolkit/set_env.sh
201- python3 -m pip install " https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend- 3.2.0.dev20251229-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
199+ export PATH= /usr/local/Ascend/tools/bishengir/bin:$PATH
200+ python3 -m pip install -i https://test.pypi.org/simple/ triton-ascend== 3.2.0.dev20260105
202201
203202 - name : Run vllm-project/vllm-ascend test (light)
204203 env :
205204 VLLM_WORKER_MULTIPROC_METHOD : spawn
206205 if : ${{ inputs.type == 'light' }}
207206 run : |
208- pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep
207+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/ test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep
209208
210209 - name : Run vllm-project/vllm-ascend test (full)
211210 env :
212211 VLLM_WORKER_MULTIPROC_METHOD : spawn
213212 if : ${{ inputs.type == 'full' }}
214213 run : |
215- pytest -sv --durations=0 tests/e2e/multicard/test_quantization.py
216- pytest -sv --durations=0 tests/e2e/multicard/test_full_graph_mode.py
217- pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel.py
218- pytest -sv --durations=0 tests/e2e/multicard/test_expert_parallel.py
219- pytest -sv --durations=0 tests/e2e/multicard/test_external_launcher.py
220- pytest -sv --durations=0 tests/e2e/multicard/test_single_request_aclgraph.py
214+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_data_parallel.py
215+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_expert_parallel.py
216+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_external_launcher.py
217+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_full_graph_mode.py
221218 # torch 2.8 doesn't work with lora, fix me
222- #pytest -sv --durations=0 tests/e2e/multicard/test_ilama_lora_tp2.py
219+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py
220+
223221
224222 # To avoid oom, we need to run the test in a single process.
225- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_multistream_moe_tp2
226- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_w4a8_dynamic_tp2
227- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_moe_sp_tp2
228- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_moe_fc2_tp2
229- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_dense_fc1_tp2
230- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_qwen3_dense_prefetch_mlp_weight_tp2
231- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy_tp2
232- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_v2_lite_fc1_tp2
233-
234- pytest -sv --durations=0 tests/e2e/multicard/test_prefix_caching.py
235- pytest -sv --durations=0 tests/e2e/multicard/test_pipeline_parallel.py
236- pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_moe.py
237- pytest -sv --durations=0 tests/e2e/multicard/test_offline_weight_load.py
223+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_multistream_moe_tp2
224+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_w4a8_dynamic_tp2
225+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_moe_sp_tp2
226+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy_tp2
227+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_moe_fc2_tp2
228+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_v2_lite_fc1_tp2
229+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_dense_fc1_tp2
230+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_dense_prefetch_mlp_weight_tp2
231+
232+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_weight_load.py
233+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_pipeline_parallel.py
234+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_prefix_caching.py
235+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_quantization.py
236+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_qwen3_moe.py
237+ # This test is broken, fix me
238+ #pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_shared_expert_dp.py
239+ pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_single_request_aclgraph.py
238240
239241 e2e-4-cards :
240242 name : multicard-4
@@ -294,21 +296,26 @@ jobs:
294296 - name : Install Ascend toolkit & triton_ascend
295297 shell : bash -l {0}
296298 run : |
297- BISHENG_NAME="Ascend-BiSheng-toolkit_aarch64_20251225 .run"
299+ BISHENG_NAME="Ascend-BiSheng-toolkit_aarch64_20260105 .run"
298300 BISHENG_URL="https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${BISHENG_NAME}"
299301 wget -O "${BISHENG_NAME}" "${BISHENG_URL}" && chmod a+x "${BISHENG_NAME}" && "./${BISHENG_NAME}" --install && rm "${BISHENG_NAME}"
300- source /usr/local/Ascend/8.5.0/bisheng_toolkit/set_env.sh
301- python3 -m pip install " https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend- 3.2.0.dev20251229-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
302+ export PATH= /usr/local/Ascend/tools/bishengir/bin:$PATH
303+ python3 -m pip install -i https://test.pypi.org/simple/ triton-ascend== 3.2.0.dev20260105
302304
303305 - name : Run vllm-project/vllm-ascend test for V1 Engine
304306 working-directory : ./vllm-ascend
305307 env :
306308 VLLM_WORKER_MULTIPROC_METHOD : spawn
307309 run : |
308- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_deepseek_multistream_moe_tp2
309- pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_kimi_k2_thinking_w4a16_tp4
310- pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel_tp2.py
311- pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_basic.py
312- pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_accuracy.py
313- pytest -sv --durations=0 tests/e2e/multicard/long_sequence/test_mtp.py
314- pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_next.py
310+ pytest -sv --durations=0 tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
311+ pytest -sv --durations=0 tests/e2e/multicard/4-cards/test_kimi_k2.py
312+ pytest -sv --durations=0 tests/e2e/multicard/4-cards/test_qwen3_next.py
313+
314+ # long_sequence
315+ pytest -sv --durations=0 tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py
316+ pytest -sv --durations=0 tests/e2e/multicard/4-cards/long_sequence/test_basic.py
317+ pytest -sv --durations=0 tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py
318+ pytest -sv --durations=0 tests/e2e/multicard/4-cards/long_sequence/test_mtp.py
319+
320+ # spec_decode
321+ pytest -sv --durations=0 tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
0 commit comments