Skip to content

Commit 0763268

Browse files
authored
[NPU]Qwen2 groupwise performance opt (#12299)
* qwen2 gw performance opt * remove debug
1 parent 41b8064 commit 0763268

File tree

1 file changed

+4
-1
lines changed
  • python/llm/src/ipex_llm/transformers/npu_models

1 file changed

+4
-1
lines changed

python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,10 @@ def __init__(
229229
new_value_states = self.convert_to_fp16(curr_key_values[i][1])
230230

231231
print(f"{mode} start compiling")
232-
self.compile()
232+
if group_size != 0 and (mode == "prefill" or num_layers == 2):
233+
self.compile(npu_dpu_groups=6)
234+
else:
235+
self.compile()
233236
print(f"{mode} end compiling")
234237

235238
def build_decoder(

0 commit comments

Comments
 (0)