diff --git a/llmc/models/qwen2moe.py b/llmc/models/qwen2moe.py index 6706ed62..340882de 100644 --- a/llmc/models/qwen2moe.py +++ b/llmc/models/qwen2moe.py @@ -85,6 +85,7 @@ def get_subsets_in_block(self, block): 'mlp.shared_expert.gate_proj': block.mlp.shared_expert.gate_proj, # noqa 'mlp.shared_expert.up_proj': block.mlp.shared_expert.up_proj, # noqa 'mlp.gate': block.mlp.gate, + 'mlp.shared_expert_gate': block.mlp.shared_expert_gate, }, 'prev_op': [block.post_attention_layernorm], 'input': ['mlp'],