forked from stanfordnlp/pyvene
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodelings_intervenable_whisper.py
More file actions
54 lines (50 loc) · 2.65 KB
/
modelings_intervenable_whisper.py
File metadata and controls
54 lines (50 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"""
Each modeling file in this library is a mapping between
abstract naming of intervention anchor points and actual
model module defined in the huggingface library.
We also want to let the intervention library know how to
config the dimensions of intervention based on model config
defined in the huggingface library.
"""
import torch
from ..constants import *
whisper_type_to_module_mapping = {
"block_input": ("encoder.layers[%s]", CONST_INPUT_HOOK),
"block_output": ("encoder.layers[%s]", CONST_OUTPUT_HOOK),
"mlp_activation": ("encoder.layers[%s].activation_fn", CONST_OUTPUT_HOOK),
"mlp_output": ("encoder.layers[%s].fc2", CONST_OUTPUT_HOOK),
"mlp_input": ("encoder.layers[%s].fc1", CONST_INPUT_HOOK),
"attention_value_output": ("encoder.layers[%s].self_attn.out_proj", CONST_INPUT_HOOK),
"head_attention_value_output": ("encoder.layers[%s].self_attn.out_proj", CONST_INPUT_HOOK, (split_head_and_permute, "n_head")),
"attention_output": ("encoder.layers[%s].self_attn", CONST_OUTPUT_HOOK),
"attention_input": ("encoder.layers[%s].self_attn", CONST_INPUT_HOOK),
"query_output": ("encoder.layers[%s].self_attn.q_proj", CONST_OUTPUT_HOOK),
"key_output": ("encoder.layers[%s].self_attn.k_proj", CONST_OUTPUT_HOOK),
"value_output": ("encoder.layers[%s].self_attn.v_proj", CONST_OUTPUT_HOOK),
"head_query_output": ("encoder.layers[%s].self_attn.q_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_head")),
"head_key_output": ("encoder.layers[%s].self_attn.k_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_head")),
"head_value_output": ("encoder.layers[%s].self_attn.v_proj", CONST_OUTPUT_HOOK, (split_head_and_permute, "n_head")),
}
whisper_type_to_dimension_mapping = {
"n_head": ("encoder_attention_heads",),
"block_input": ("d_model",),
"block_output": ("d_model",),
"mlp_activation": ("encoder_ffn_dim",),
"mlp_output": ("d_model",),
"mlp_input": ("d_model",),
"attention_value_output": ("d_model",),
"head_attention_value_output": ("d_model/encoder_attention_heads",),
"attention_output": ("d_model",),
"attention_input": ("d_model",),
"query_output": ("d_model",),
"key_output": ("d_model",),
"value_output": ("d_model",),
"head_query_output": ("d_model/encoder_attention_heads",),
"head_key_output": ("d_model/encoder_attention_heads",),
"head_value_output": ("d_model/encoder_attention_heads",),
}
"""whisper model with LM head"""
whisper_lm_type_to_module_mapping = {}
for k, v in whisper_type_to_module_mapping.items():
whisper_lm_type_to_module_mapping[k] = (f"model.{v[0]}", ) + v[1:]
whisper_lm_type_to_dimension_mapping = whisper_type_to_dimension_mapping