Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
helloyongyang committed Dec 15, 2024
1 parent 4c26182 commit f04d4ea
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 122 deletions.
6 changes: 1 addition & 5 deletions llmc/compression/quantization/base_blockwise_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,11 +463,7 @@ def run(self, block, input_feat, handles):

def block_transform(self, block, input_feat, block_kwargs):
logger.info(f'Start transform the {self.block_idx}-th block')
subsets = (
self.model.get_subsets_in_block(block)
if self.model.get_modality() == 'language'
else self.model.get_vision_subsets_in_block(block)
)
subsets = self.model.get_subsets_in_block(block)

if self.act_static:
self.register_non_linear_qparams(block, input_feat)
Expand Down
84 changes: 45 additions & 39 deletions llmc/models/internvl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,53 +308,59 @@ def batch_process(self, img_qas, calib_or_eval='eval', apply_chat_template=True,

def find_blocks(self):
if self.get_modality() == 'language':
self.blocks = self.model.model.layers
super.find_blocks()
elif self.get_modality() == 'vision':
self.blocks = self.vision_model.encoder.layers
else:
raise Exception(f'InternVL2 do not support {self.get_modality()} modality.')

def get_layernorms_in_block(self, block):
if self.get_modality() == 'language':
return {
'attention_norm': block.attention_norm,
'ffn_norm': block.ffn_norm,
}
return super.get_layernorms_in_block(block)
elif self.get_modality() == 'vision':
return {
'norm1': block.norm1,
'norm2': block.norm2,
}
else:
raise Exception(f'InternVL2 do not support {self.get_modality()} modality.')

def get_vision_subsets_in_block(self, block):
return [
{
'layers': {'attn.qkv': block.attn.qkv},
'prev_op': [block.norm1],
'input': ['attn.qkv'],
'inspect': block.attn,
'has_kwargs': False,
},
{
'layers': {'attn.proj': block.attn.proj},
'prev_op': [block.attn.qkv],
'input': ['attn.proj'],
'inspect': block.attn.proj,
'has_kwargs': False,
},
{
'layers': {'mlp.fc1': block.mlp.fc1},
'prev_op': [block.norm2],
'input': ['mlp.fc1'],
'inspect': block.mlp.fc1,
'has_kwargs': False,
'is_mlp': True,
},
{
'layers': {'mlp.fc2': block.mlp.fc2},
'prev_op': [block.mlp.fc1],
'input': ['mlp.fc2'],
'inspect': block.mlp.fc2,
'has_kwargs': False,
'is_mlp': True,
'do_trans': False
},
]
def get_subsets_in_block(self, block):
if self.get_modality() == 'language':
return super.get_subsets_in_block(block)
elif self.get_modality() == 'vision':
return [
{
'layers': {'attn.qkv': block.attn.qkv},
'prev_op': [block.norm1],
'input': ['attn.qkv'],
'inspect': block.attn,
'has_kwargs': False,
},
{
'layers': {'attn.proj': block.attn.proj},
'prev_op': [block.attn.qkv],
'input': ['attn.proj'],
'inspect': block.attn.proj,
'has_kwargs': False,
},
{
'layers': {'mlp.fc1': block.mlp.fc1},
'prev_op': [block.norm2],
'input': ['mlp.fc1'],
'inspect': block.mlp.fc1,
'has_kwargs': False,
'is_mlp': True,
},
{
'layers': {'mlp.fc2': block.mlp.fc2},
'prev_op': [block.mlp.fc1],
'input': ['mlp.fc2'],
'inspect': block.mlp.fc2,
'has_kwargs': False,
'is_mlp': True,
'do_trans': False
},
]
else:
raise Exception(f'InternVL2 do not support {self.get_modality()} modality.')
90 changes: 48 additions & 42 deletions llmc/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,60 +100,66 @@ def batch_process(self, img_qas, calib_or_eval='eval', apply_chat_template=True,

def find_blocks(self):
if self.get_modality() == 'language':
self.blocks = self.model.model.layers
super.find_blocks()
elif self.get_modality() == 'vision':
self.blocks = self.vision_model.vision_model.encoder.layers
else:
raise Exception(f'Llava do not support {self.get_modality()} modality.')

def get_layernorms_in_block(self, block):
if self.get_modality() == 'language':
return {
'input_layernorm': block.input_layernorm,
'post_attention_layernorm': block.post_attention_layernorm,
}
return super.get_layernorms_in_block(block)
elif self.get_modality() == 'vision':
return {
'layer_norm1': block.layer_norm1,
'layer_norm2': block.layer_norm2,
}
else:
raise Exception(f'Llava do not support {self.get_modality()} modality.')

def get_vision_subsets_in_block(self, block):
return [
{
'layers': {
'self_attn.q_proj': block.self_attn.q_proj,
'self_attn.k_proj': block.self_attn.k_proj,
'self_attn.v_proj': block.self_attn.v_proj,
def get_subsets_in_block(self, block):
if self.get_modality() == 'language':
return super.get_subsets_in_block(block)
elif self.get_modality() == 'vision':
return [
{
'layers': {
'self_attn.q_proj': block.self_attn.q_proj,
'self_attn.k_proj': block.self_attn.k_proj,
'self_attn.v_proj': block.self_attn.v_proj,
},
'prev_op': [block.layer_norm1],
'input': ['self_attn.q_proj'],
'inspect': block.self_attn,
'has_kwargs': True,
},
'prev_op': [block.layer_norm1],
'input': ['self_attn.q_proj'],
'inspect': block.self_attn,
'has_kwargs': True,
},
{
'layers': {'self_attn.out_proj': block.self_attn.out_proj},
'prev_op': [block.self_attn.v_proj],
'input': ['self_attn.out_proj'],
'inspect': block.self_attn.out_proj,
'has_kwargs': False,
},
{
'layers': {'mlp.fc1': block.mlp.fc1},
'prev_op': [block.layer_norm2],
'input': ['mlp.fc1'],
'inspect': block.mlp.fc1,
'has_kwargs': False,
'is_mlp': True,
},
{
'layers': {'mlp.fc2': block.mlp.fc2},
'prev_op': [block.mlp.fc1],
'input': ['mlp.fc2'],
'inspect': block.mlp.fc2,
'has_kwargs': False,
'is_mlp': True,
'do_trans': False
},
]
{
'layers': {'self_attn.out_proj': block.self_attn.out_proj},
'prev_op': [block.self_attn.v_proj],
'input': ['self_attn.out_proj'],
'inspect': block.self_attn.out_proj,
'has_kwargs': False,
},
{
'layers': {'mlp.fc1': block.mlp.fc1},
'prev_op': [block.layer_norm2],
'input': ['mlp.fc1'],
'inspect': block.mlp.fc1,
'has_kwargs': False,
'is_mlp': True,
},
{
'layers': {'mlp.fc2': block.mlp.fc2},
'prev_op': [block.mlp.fc1],
'input': ['mlp.fc2'],
'inspect': block.mlp.fc2,
'has_kwargs': False,
'is_mlp': True,
'do_trans': False
},
]
else:
raise Exception(f'Llava do not support {self.get_modality()} modality.')


@MODEL_REGISTRY
Expand Down
90 changes: 54 additions & 36 deletions llmc/models/qwen2vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,46 +124,64 @@ def batch_process(self, img_qas, calib_or_eval='eval', apply_chat_template=True,

def find_blocks(self):
if self.get_modality() == 'language':
self.blocks = self.model.model.layers
super.find_blocks()
elif self.get_modality() == 'vision':
self.blocks = self.vision_model.blocks
else:
raise Exception(f'Qwen2VL do not support {self.get_modality()} modality.')

def get_vision_subsets_in_block(self, block):
return [
{
'layers': {
'attn.qkv': block.attn.qkv,
def get_layernorms_in_block(self, block):
if self.get_modality() == 'language':
return super.get_layernorms_in_block(block)
elif self.get_modality() == 'vision':
return {
'norm1': block.norm1,
'norm2': block.norm2,
}
else:
raise Exception(f'Qwen2VL do not support {self.get_modality()} modality.')

def get_subsets_in_block(self, block):
if self.get_modality() == 'language':
return super.get_subsets_in_block(block)
elif self.get_modality() == 'vision':
return [
{
'layers': {
'attn.qkv': block.attn.qkv,
},
'prev_op': [block.norm1],
'input':['attn.qkv'],
'inspect': block.attn,
'has_kwargs': True,
},
'prev_op': [block.norm1],
'input':['attn.qkv'],
'inspect': block.attn,
'has_kwargs': True,
},
{
'layers': {'attn.proj': block.attn.proj},
'prev_op': [block.attn.qkv],
'input': ['attn.proj'],
'inspect': block.attn.proj,
'has_kwargs': False,
},
{
'layers': {'mlp.fc1': block.mlp.fc1},
'prev_op': [block.norm2],
'input': ['mlp.fc1'],
'inspect': block.mlp.fc1,
'has_kwargs': False,
'is_mlp': True,
},
{
'layers': {'mlp.fc2': block.mlp.fc2},
'prev_op': [block.mlp.fc1],
'input': ['mlp.fc2'],
'inspect': block.mlp.fc2,
'has_kwargs': False,
'is_mlp': True,
'do_trans': False
},
]
{
'layers': {'attn.proj': block.attn.proj},
'prev_op': [block.attn.qkv],
'input': ['attn.proj'],
'inspect': block.attn.proj,
'has_kwargs': False,
},
{
'layers': {'mlp.fc1': block.mlp.fc1},
'prev_op': [block.norm2],
'input': ['mlp.fc1'],
'inspect': block.mlp.fc1,
'has_kwargs': False,
'is_mlp': True,
},
{
'layers': {'mlp.fc2': block.mlp.fc2},
'prev_op': [block.mlp.fc1],
'input': ['mlp.fc2'],
'inspect': block.mlp.fc2,
'has_kwargs': False,
'is_mlp': True,
'do_trans': False
},
]
else:
raise Exception(f'Qwen2VL do not support {self.get_modality()} modality.')

def get_catcher(self, first_block_input):
class Catcher(nn.Module):
Expand Down

0 comments on commit f04d4ea

Please sign in to comment.