Skip to content

Commit dff29c0

Browse files
authored
Fix use_hqq for int4_weight_only quantize (#1707)
Fix HQQ call for int4_weight_only quantize
1 parent d3306b2 commit dff29c0

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

torchao/_models/llama/generate.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,10 +420,9 @@ def ffn_or_attn_only(mod, fqn):
420420
else:
421421
quantize_(model, int8_dynamic_activation_int8_weight())
422422
if "int4wo" in quantization:
423+
use_hqq = False
423424
if "hqq" in quantization:
424425
use_hqq = True
425-
else:
426-
use_hqq = False
427426
group_size = int(quantization.split("-")[1])
428427
assert (
429428
group_size
@@ -434,7 +433,7 @@ def ffn_or_attn_only(mod, fqn):
434433
256,
435434
]
436435
), f"int4wo group_size needs to be one of [32,64,128,256] but got {group_size}"
437-
quantize_(model, int4_weight_only(group_size=group_size))
436+
quantize_(model, int4_weight_only(group_size=group_size, use_hqq=use_hqq))
438437
elif "int8adq-int4w-symm" in quantization:
439438
from torchao.dtypes import CutlassInt4PackedLayout
440439

0 commit comments

Comments
 (0)