Skip to content

Commit

Permalink
Merge pull request #298 from ModelTC/dev_fix
Browse files Browse the repository at this point in the history
Fix vllm fp8 export bug
  • Loading branch information
gushiqiao authored Jan 15, 2025
2 parents 9943e79 + 774d06e commit 2d91eda
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions llmc/utils/export_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ def update_vllm_quant_config(
vllm_quant_method='compressed-tensors',

):

need_pack = config.quant.weight.get('need_pack', False)
if config.quant.get('quant_type', 'int-quant') == 'float-quant':
if 'act' in config.quant and config.quant.act.static:
weight_quant_type = config.quant.act.get('quant_type', 'int-quant')
if 'act' in config.quant:
act_quant_type = config.quant.act.get('quant_type', 'int-quant')
assert act_quant_type == weight_quant_type
else:
act_quant_type = None
if act_quant_type is not None and act_quant_type == 'float-quant':
if config.quant.act.get('static', False):
quant_config = {
'activation_scheme': 'static',
'ignored_layers': [
Expand All @@ -30,8 +35,7 @@ def update_vllm_quant_config(
vllm_quant_format = 'float-quantized'
quant_type = 'float'
w_num_bits = 8
if 'act' in config.quant:
a_num_bits = 8
a_num_bits = 8
elif need_pack:
vllm_quant_format = 'pack-quantized'
quant_type = 'int'
Expand Down

0 comments on commit 2d91eda

Please sign in to comment.