revert

yma11 · yma11 · commit e11d83301b21 · 2025-02-25T21:37:56.000+08:00
diff --git a/vllm/model_executor/models/mllama.py b/vllm/model_executor/models/mllama.py
@@ -1029,10 +1029,12 @@ def forward(
         # to 2D tensor to align with public vllm input_tokens shape. But this
         # will face the graph building failure issue, still need to investigate.
         assert len(residual.shape) == 3
-        if len(hidden_states.shape)==2:
-            hidden_states = hidden_states.view(residual.size(0), residual.size(1), residual.size(2))
+        if len(hidden_states.shape) == 2:
+            hidden_states = hidden_states.view(residual.size(0),
+                                               residual.size(1),
+                                               residual.size(2))
         full_text_row_masked_out_mask = full_text_row_masked_out_mask.view(
-                hidden_states.size(0), -1, 1)
+            hidden_states.size(0), -1, 1)
         hidden_states = full_text_row_masked_out_mask * hidden_states
         hidden_states = residual + self.cross_attn_attn_gate.tanh(
         ) * hidden_states
diff --git a/vllm/worker/hpu_enc_dec_model_runner.py b/vllm/worker/hpu_enc_dec_model_runner.py
@@ -9,6 +9,7 @@
 
 import habana_frameworks.torch as htorch
 import torch
+from PIL import Image
 from vllm_hpu_extension.ops import batch2block, block2batch
 
 from vllm.attention import AttentionMetadata
@@ -21,7 +22,7 @@
 from vllm.sequence import (CompletionSequenceGroupOutput, IntermediateTensors,
                            Logprob, SequenceData, SequenceGroupMetadata,
                            SequenceOutput)
-from vllm.utils import is_fake_hpu
+from vllm.utils import is_fake_hpu, is_list_of
 from vllm.worker.hpu_model_runner import (HpuModelAdapter, HPUModelRunnerBase,
                                           ModelInputForHPUWithSamplingMetadata,
                                           setup_profiler, subtuple)