Skip to content

Commit 303b104

Browse files
Fix abnormal output for Qwen2-7B when sym_int8 (#12446)
1 parent 71e1f11 commit 303b104

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,11 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision,
128128
from ipex_llm.transformers.npu_models.common import split_linears
129129
if quantization_group_size == 0:
130130
n_splits_linear = 1
131-
n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1
131+
if qtype == "sym_int8_rtn":
132+
# do not split mlp down_proj for Qwen2-7B & sym_int8
133+
n_splits_down_proj = 1
134+
else:
135+
n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1
132136
else:
133137
invalidInputError(
134138
model.config.hidden_size % quantization_group_size == 0 and

0 commit comments

Comments
 (0)