Skip to content

Commit e11d833

Browse files
committed
revert
1 parent c6147a3 commit e11d833

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

vllm/model_executor/models/mllama.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,10 +1029,12 @@ def forward(
10291029
# to 2D tensor to align with public vllm input_tokens shape. But this
10301030
# will face the graph building failure issue, still need to investigate.
10311031
assert len(residual.shape) == 3
1032-
if len(hidden_states.shape)==2:
1033-
hidden_states = hidden_states.view(residual.size(0), residual.size(1), residual.size(2))
1032+
if len(hidden_states.shape) == 2:
1033+
hidden_states = hidden_states.view(residual.size(0),
1034+
residual.size(1),
1035+
residual.size(2))
10341036
full_text_row_masked_out_mask = full_text_row_masked_out_mask.view(
1035-
hidden_states.size(0), -1, 1)
1037+
hidden_states.size(0), -1, 1)
10361038
hidden_states = full_text_row_masked_out_mask * hidden_states
10371039
hidden_states = residual + self.cross_attn_attn_gate.tanh(
10381040
) * hidden_states

vllm/worker/hpu_enc_dec_model_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import habana_frameworks.torch as htorch
1111
import torch
12+
from PIL import Image
1213
from vllm_hpu_extension.ops import batch2block, block2batch
1314

1415
from vllm.attention import AttentionMetadata
@@ -21,7 +22,7 @@
2122
from vllm.sequence import (CompletionSequenceGroupOutput, IntermediateTensors,
2223
Logprob, SequenceData, SequenceGroupMetadata,
2324
SequenceOutput)
24-
from vllm.utils import is_fake_hpu
25+
from vllm.utils import is_fake_hpu, is_list_of
2526
from vllm.worker.hpu_model_runner import (HpuModelAdapter, HPUModelRunnerBase,
2627
ModelInputForHPUWithSamplingMetadata,
2728
setup_profiler, subtuple)

0 commit comments

Comments
 (0)