From 4a1c1ea0879e8e6a054a219a4a23250b0cf6bdbd Mon Sep 17 00:00:00 2001
From: Daniel Vega-Myhre <danvm@meta.com>
Date: Wed, 22 Jan 2025 11:37:09 -0800
Subject: [PATCH] Update

[ghstack-poisoned]
---
 torchao/quantization/quant_api.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
index 25588844e7..cfae8ee0ac 100644
--- a/torchao/quantization/quant_api.py
+++ b/torchao/quantization/quant_api.py
@@ -35,12 +35,12 @@
     PlainLayout,
     SemiSparseLayout,
     TensorCoreTiledLayout,
+    UintxLayout,
     to_affine_quantized_float8,
     to_affine_quantized_floatx,
     to_affine_quantized_floatx_static,
     to_affine_quantized_intx,
     to_marlinqqq_quantized_intx,
-    UintxLayout,
 )
 from torchao.float8.float8_linear import Float8Linear
 from torchao.float8.inference import Float8MMConfig
@@ -52,15 +52,15 @@
     to_weight_tensor_with_linear_activation_quantization_metadata,
 )
 from torchao.utils import (
-    is_MI300,
-    is_sm_at_least_89,
-    is_sm_at_least_90,
     TORCH_VERSION_AT_LEAST_2_4,
     TORCH_VERSION_AT_LEAST_2_5,
     TORCH_VERSION_AT_LEAST_2_6,
+    is_MI300,
+    is_sm_at_least_89,
+    is_sm_at_least_90,
 )
 
-from .autoquant import autoquant, AutoQuantizableLinearWeight
+from .autoquant import AutoQuantizableLinearWeight, autoquant
 from .GPTQ import (
     Int4WeightOnlyGPTQQuantizer,
     Int4WeightOnlyQuantizer,
@@ -929,7 +929,6 @@ def float8_weight_only(weight_dtype: torch.dtype = torch.float8_e4m3fn):
         The actual matmul will be computed in original precision of the weight tensor.
 
     """
-    from torchao.dtypes import to_affine_quantized_floatx
 
     def apply_float8wo_quant(weight):
         block_size = (1, weight.shape[1])