Skip to content

Commit f0801c5

Browse files
committed
Add Qwen3 models.
1 parent 3b5ce13 commit f0801c5

File tree

3 files changed

+82
-0
lines changed

3 files changed

+82
-0
lines changed

pyvene/models/intervenable_modelcard.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .llava.modelings_intervenable_llava import *
1515
from .olmo.modelings_intervenable_olmo import *
1616
from .olmo2.modelings_intervenable_olmo2 import *
17+
from .qwen3.modelings_intervenable_qwen3 import *
1718

1819
#########################################################################
1920
"""
@@ -66,6 +67,8 @@
6667
hf_models.olmo.modeling_olmo.OlmoForCausalLM: olmo_lm_type_to_module_mapping,
6768
hf_models.olmo2.modeling_olmo2.Olmo2Model: olmo2_type_to_module_mapping,
6869
hf_models.olmo2.modeling_olmo2.Olmo2ForCausalLM: olmo2_lm_type_to_module_mapping,
70+
hf_models.qwen3.modeling_qwen3.Qwen3Model: qwen3_type_to_module_mapping,
71+
hf_models.qwen3.modeling_qwen3.Qwen3ForCausalLM: qwen3_lm_type_to_module_mapping,
6972
hf_models.blip.modeling_blip.BlipForQuestionAnswering: blip_type_to_module_mapping,
7073
hf_models.blip.modeling_blip.BlipForImageTextRetrieval: blip_itm_type_to_module_mapping,
7174
MLPModel: mlp_type_to_module_mapping,
@@ -103,6 +106,8 @@
103106
hf_models.olmo.modeling_olmo.OlmoForCausalLM: olmo_lm_type_to_dimension_mapping,
104107
hf_models.olmo2.modeling_olmo2.Olmo2Model: olmo2_type_to_dimension_mapping,
105108
hf_models.olmo2.modeling_olmo2.Olmo2ForCausalLM: olmo2_lm_type_to_dimension_mapping,
109+
hf_models.qwen3.modeling_qwen3.Qwen3Model: qwen3_type_to_dimension_mapping,
110+
hf_models.qwen3.modeling_qwen3.Qwen3ForCausalLM: qwen3_lm_type_to_dimension_mapping,
106111
hf_models.blip.modeling_blip.BlipForQuestionAnswering: blip_type_to_dimension_mapping,
107112
hf_models.blip.modeling_blip.BlipForImageTextRetrieval: blip_itm_type_to_dimension_mapping,
108113
MLPModel: mlp_type_to_dimension_mapping,

pyvene/models/qwen3/__init__.py

Whitespace-only changes.
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""
2+
Each modeling file in this library is a mapping between
3+
abstract naming of intervention anchor points and actual
4+
model module defined in the huggingface library.
5+
We also want to let the intervention library know how to
6+
config the dimensions of intervention based on model config
7+
defined in the huggingface library.
8+
"""
9+
import torch
10+
from ..constants import *
11+
12+
qwen3_type_to_module_mapping = {
13+
"block_input": ("layers[%s]", CONST_INPUT_HOOK),
14+
"block_output": ("layers[%s]", CONST_OUTPUT_HOOK),
15+
"mlp_activation": ("layers[%s].mlp.act_fn", CONST_OUTPUT_HOOK),
16+
"mlp_output": ("layers[%s].mlp", CONST_OUTPUT_HOOK),
17+
"mlp_input": ("layers[%s].mlp", CONST_INPUT_HOOK),
18+
"attention_value_output": ("layers[%s].self_attn.o_proj", CONST_INPUT_HOOK),
19+
"head_attention_value_output": ("layers[%s].self_attn.o_proj", CONST_INPUT_HOOK, (split_head_and_permute, "n_head")),
20+
"attention_output": ("layers[%s].self_attn", CONST_OUTPUT_HOOK),
21+
"attention_input": ("layers[%s].self_attn", CONST_INPUT_HOOK),
22+
"query_output": ("layers[%s].self_attn.q_proj", CONST_OUTPUT_HOOK),
23+
"key_output": ("layers[%s].self_attn.k_proj", CONST_OUTPUT_HOOK),
24+
"value_output": ("layers[%s].self_attn.v_proj", CONST_OUTPUT_HOOK),
25+
"head_query_output": ("layers[%s].self_attn.q_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_head")),
26+
"head_key_output": ("layers[%s].self_attn.k_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_kv_head")),
27+
"head_value_output": ("layers[%s].self_attn.v_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_kv_head")),
28+
}
29+
30+
qwen3_type_to_dimension_mapping = {
31+
"n_head": ("num_attention_heads",),
32+
"n_kv_head": ("num_key_value_heads",),
33+
"block_input": ("hidden_size",),
34+
"block_output": ("hidden_size",),
35+
"mlp_activation": ("intermediate_size",),
36+
"mlp_output": ("hidden_size",),
37+
"mlp_input": ("hidden_size",),
38+
"attention_value_output": ("hidden_size",),
39+
"head_attention_value_output": ("hidden_size/num_attention_heads",),
40+
"attention_output": ("hidden_size",),
41+
"attention_input": ("hidden_size",),
42+
"query_output": ("hidden_size",),
43+
"key_output": ("hidden_size",),
44+
"value_output": ("hidden_size",),
45+
"head_query_output": ("hidden_size/num_attention_heads",),
46+
"head_key_output": ("hidden_size/num_attention_heads",),
47+
"head_value_output": ("hidden_size/num_attention_heads",),
48+
}
49+
50+
"""qwen3 model with LM head"""
51+
qwen3_lm_type_to_module_mapping = {}
52+
for k, v in qwen3_type_to_module_mapping.items():
53+
qwen3_lm_type_to_module_mapping[k] = (f"model.{v[0]}", ) + v[1:]
54+
qwen3_lm_type_to_dimension_mapping = qwen3_type_to_dimension_mapping
55+
56+
"""qwen3 model with classifier head"""
57+
qwen3_classifier_type_to_module_mapping = {}
58+
for k, v in qwen3_type_to_module_mapping.items():
59+
qwen3_classifier_type_to_module_mapping[k] = (f"model.{v[0]}", ) + v[1:]
60+
qwen3_classifier_type_to_dimension_mapping = qwen3_type_to_dimension_mapping
61+
62+
def create_qwen3(
63+
name="Qwen/Qwen3-8B", cache_dir=None, dtype=torch.bfloat16
64+
):
65+
"""Creates a Causal LM model, config, and tokenizer from the given name and revision"""
66+
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
67+
68+
config = AutoConfig.from_pretrained(name, cache_dir=cache_dir)
69+
tokenizer = AutoTokenizer.from_pretrained(name, cache_dir=cache_dir)
70+
model = AutoModelForCausalLM.from_pretrained(
71+
name,
72+
config=config,
73+
cache_dir=cache_dir,
74+
torch_dtype=dtype,
75+
)
76+
print("loaded model")
77+
return config, tokenizer, model

0 commit comments

Comments
 (0)