From 1ff1f6ee9326695f67699773ef5cf1ed006c0f9a Mon Sep 17 00:00:00 2001 From: vasiliy Date: Mon, 10 Feb 2025 20:37:45 -0800 Subject: [PATCH] Update [ghstack-poisoned] --- test/dtypes/test_affine_quantized.py | 1 + torchao/quantization/quant_api.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py index d26f1d8e04..616701f1e3 100644 --- a/test/dtypes/test_affine_quantized.py +++ b/test/dtypes/test_affine_quantized.py @@ -218,6 +218,7 @@ def test_flatten_unflatten(self, device, dtype): linear = torch.nn.Linear(128, 256, dtype=dtype, device=device) if isinstance(apply_quant, AOBaseConfig): quantize_(linear, apply_quant) + ql = linear else: # TODO(#1690): delete this once config migration is done ql = apply_quant(linear) diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py index 6c4ac40dc7..7aec7a3a7c 100644 --- a/torchao/quantization/quant_api.py +++ b/torchao/quantization/quant_api.py @@ -865,11 +865,11 @@ def _int4_weight_only_transform( return module +@dataclass class Int8WeightOnlyConfig(AOBaseConfig): """ Configuration for applying int8 weight-only symmetric per-channel quantization to linear layers. """ - group_size: Optional[int] = None