Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
[ghstack-poisoned]
  • Loading branch information
danielvegamyhre committed Jan 22, 2025
1 parent a1becad commit 4a1c1ea
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions torchao/quantization/quant_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@
PlainLayout,
SemiSparseLayout,
TensorCoreTiledLayout,
UintxLayout,
to_affine_quantized_float8,
to_affine_quantized_floatx,
to_affine_quantized_floatx_static,
to_affine_quantized_intx,
to_marlinqqq_quantized_intx,
UintxLayout,
)
from torchao.float8.float8_linear import Float8Linear
from torchao.float8.inference import Float8MMConfig
Expand All @@ -52,15 +52,15 @@
to_weight_tensor_with_linear_activation_quantization_metadata,
)
from torchao.utils import (
is_MI300,
is_sm_at_least_89,
is_sm_at_least_90,
TORCH_VERSION_AT_LEAST_2_4,
TORCH_VERSION_AT_LEAST_2_5,
TORCH_VERSION_AT_LEAST_2_6,
is_MI300,
is_sm_at_least_89,
is_sm_at_least_90,
)

from .autoquant import autoquant, AutoQuantizableLinearWeight
from .autoquant import AutoQuantizableLinearWeight, autoquant
from .GPTQ import (
Int4WeightOnlyGPTQQuantizer,
Int4WeightOnlyQuantizer,
Expand Down Expand Up @@ -929,7 +929,6 @@ def float8_weight_only(weight_dtype: torch.dtype = torch.float8_e4m3fn):
The actual matmul will be computed in original precision of the weight tensor.
"""
from torchao.dtypes import to_affine_quantized_floatx

def apply_float8wo_quant(weight):
block_size = (1, weight.shape[1])
Expand Down

0 comments on commit 4a1c1ea

Please sign in to comment.