Skip to content

Commit 7b40f9b

Browse files
authored
[NPU] Support GW for NPU C++ (#12450)
1 parent c2efa26 commit 7b40f9b

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
445445
"qkv_bias": True,
446446
"use_prefill_sdp": False,
447447
"weight_num": 7,
448-
"weight_idx": 8}
448+
"weight_idx": 8,
449+
"n_splits_linear": n_splits_linear,
450+
"n_splits_down_proj": n_splits_down_proj}
449451
model.config.update(update_dict)
450452
model.config.save_pretrained(save_directory)
451453

@@ -495,7 +497,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
495497
"weight_num": 7,
496498
"weight_idx": 5,
497499
"embedding_post": embedding_post,
498-
"cos_sin_input": cos_sin_input}
500+
"cos_sin_input": cos_sin_input,
501+
"n_splits_linear": n_splits_linear,
502+
"n_splits_down_proj": n_splits_down_proj}
499503
model.config.update(update_dict)
500504
model.config.save_pretrained(save_directory)
501505

@@ -530,7 +534,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
530534
"weight_num": 7,
531535
"weight_idx": 5,
532536
"model_type": "minicpm",
533-
"embedding_post": True}
537+
"embedding_post": True,
538+
"n_splits_linear": n_splits_linear,
539+
"n_splits_down_proj": n_splits_down_proj}
534540
model.config.update(update_dict)
535541
model.config.save_pretrained(save_directory)
536542

0 commit comments

Comments
 (0)