From 88406805b2c27d8b8c1a89a06abb806002ea815f Mon Sep 17 00:00:00 2001 From: gushiqiao <77222802+gushiqiao@users.noreply.github.com> Date: Tue, 14 Jan 2025 18:38:59 +0800 Subject: [PATCH] Update base_blockwise_quantization.py --- llmc/compression/quantization/base_blockwise_quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmc/compression/quantization/base_blockwise_quantization.py b/llmc/compression/quantization/base_blockwise_quantization.py index 514b5434..9cedd214 100644 --- a/llmc/compression/quantization/base_blockwise_quantization.py +++ b/llmc/compression/quantization/base_blockwise_quantization.py @@ -840,7 +840,7 @@ def scaling_input(self, x, scales, is_gqa): batch_scale = scales_tmp.view(1, -1) x_tmp[i] = batch / batch_scale else: - x_tmp = x / scales.view(1, -1) + x_tmp = x / scales_tmp.view(1, -1) return x_tmp @torch.no_grad()