Skip to content

Commit 3ce6fb2

Browse files
committed
bugfix for the order of dummy run pad and sync
Signed-off-by: LiuYi_UP <[email protected]> Changes to be committed: modified: vllm_ascend/worker/model_runner_v1.py Signed-off-by: LiuYi-UP <[email protected]>
1 parent db12c1e commit 3ce6fb2

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2062,10 +2062,14 @@ def _dummy_run(
20622062
if self.is_kv_producer and not self.is_kv_consumer:
20632063
with_prefill = True
20642064

2065+
has_lora = True if self.lora_config and self.compilation_config.cudagraph_specialize_lora else False
2066+
_ag_mode, batch_descriptor = \
2067+
self.cudagraph_dispatcher.dispatch(num_tokens=num_tokens, uniform_decode=uniform_decode, has_lora=has_lora)
2068+
20652069
# Padding for DP
20662070
(num_tokens, num_tokens_across_dp,
2067-
with_prefill) = self._sync_metadata_across_dp(num_tokens,
2068-
with_prefill)
2071+
with_prefill) = self._sync_metadata_across_dp(
2072+
batch_descriptor.num_tokens, with_prefill)
20692073

20702074
# If cudagraph_mode.decode_mode() == FULL and
20712075
# cudagraph_mode.seperate_routine(). This means that we are using
@@ -2112,9 +2116,11 @@ def _dummy_run(
21122116
if not is_profile and self.dynamic_eplb:
21132117
self.eplb_updator.forward_before()
21142118

2115-
has_lora = True if self.lora_config and self.compilation_config.cudagraph_specialize_lora else False
2116-
_ag_mode, batch_descriptor = \
2117-
self.cudagraph_dispatcher.dispatch(num_tokens=num_tokens, uniform_decode=uniform_decode, has_lora=has_lora)
2119+
if num_tokens != batch_descriptor.num_tokens:
2120+
_ag_mode, batch_descriptor = self.cudagraph_dispatcher.dispatch(
2121+
num_tokens=num_tokens,
2122+
uniform_decode=uniform_decode,
2123+
has_lora=has_lora)
21182124

21192125
num_tokens_padded = batch_descriptor.num_tokens
21202126
num_reqs_padded = (batch_descriptor.num_reqs if

0 commit comments

Comments
 (0)