Skip to content

Commit fa101c2

Browse files
committed
bugfix for the order of dummy run pad and sync
1 parent db12c1e commit fa101c2

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2062,9 +2062,13 @@ def _dummy_run(
20622062
if self.is_kv_producer and not self.is_kv_consumer:
20632063
with_prefill = True
20642064

2065+
has_lora = True if self.lora_config and self.compilation_config.cudagraph_specialize_lora else False
2066+
_ag_mode, batch_descriptor = \
2067+
self.cudagraph_dispatcher.dispatch(num_tokens=num_tokens, uniform_decode=uniform_decode, has_lora=has_lora)
2068+
20652069
# Padding for DP
20662070
(num_tokens, num_tokens_across_dp,
2067-
with_prefill) = self._sync_metadata_across_dp(num_tokens,
2071+
with_prefill) = self._sync_metadata_across_dp(batch_descriptor.num_tokens,
20682072
with_prefill)
20692073

20702074
# If cudagraph_mode.decode_mode() == FULL and
@@ -2112,9 +2116,9 @@ def _dummy_run(
21122116
if not is_profile and self.dynamic_eplb:
21132117
self.eplb_updator.forward_before()
21142118

2115-
has_lora = True if self.lora_config and self.compilation_config.cudagraph_specialize_lora else False
2116-
_ag_mode, batch_descriptor = \
2117-
self.cudagraph_dispatcher.dispatch(num_tokens=num_tokens, uniform_decode=uniform_decode, has_lora=has_lora)
2119+
if num_tokens != batch_descriptor.num_tokens:
2120+
_ag_mode, batch_descriptor = self.cudagraph_dispatcher.dispatch(
2121+
num_tokens=num_tokens, uniform_decode=uniform_decode, has_lora=has_lora)
21182122

21192123
num_tokens_padded = batch_descriptor.num_tokens
21202124
num_reqs_padded = (batch_descriptor.num_reqs if

0 commit comments

Comments
 (0)