Fix:

the-tuning-machine · the-tuning-machine · commit f51a142c1cfb · 2025-03-11T10:31:34.000+01:00
- condition triton import in gemm
- linting
diff --git a/benchmarks/benchmark_blockwise_scaled_linear_triton.py b/benchmarks/benchmark_blockwise_scaled_linear_triton.py
@@ -12,12 +12,10 @@
     fp8_blockwise_act_quant,
     fp8_blockwise_weight_quant,
 )
-
 from torchao.quantization.quant_api import (
-    _int8_symm_per_token_reduced_range_quant_cutlass,
     _int4_symm_per_token_quant_cutlass,
+    _int8_symm_per_token_reduced_range_quant_cutlass,
 )
-
 from torchao.utils import is_sm_at_least_89
 
 
@@ -44,7 +42,7 @@ def get_blockwise_problem(
     assert dtype in [
         torch.float8_e4m3fn,
         torch.float8_e5m2,
-    ], f"dtype must be torch.float8_e4m3fn or torch.float8_e5m2"
+    ], "dtype must be torch.float8_e4m3fn or torch.float8_e5m2"
     dtype_max = torch.finfo(dtype).max
     A = (dtype_max * (2 * torch.rand(m, k, device=device) - 1)).to(dtype)
     A_scale = torch.randn((m, k // block_size), dtype=torch.half, device=device)
diff --git a/test/prototype/test_blockwise_triton.py b/test/prototype/test_blockwise_triton.py
@@ -7,7 +7,6 @@
     fp8_blockwise_weight_dequant,
     fp8_blockwise_weight_quant,
 )
-
 from torchao.utils import is_sm_at_least_89
 
 BLOCKWISE_SIZE_MNK = [
diff --git a/torchao/prototype/blockwise_fp8/__init__.py b/torchao/prototype/blockwise_fp8/__init__.py
@@ -2,8 +2,8 @@
 from .blockwise_linear import BlockwiseQuantLinear
 from .blockwise_quantization import (
     fp8_blockwise_act_quant,
-    fp8_blockwise_weight_quant,
     fp8_blockwise_weight_dequant,
+    fp8_blockwise_weight_quant,
 )
 
 __all__ = [
diff --git a/torchao/prototype/blockwise_fp8/blockwise_fp8_gemm_triton.py b/torchao/prototype/blockwise_fp8/blockwise_fp8_gemm_triton.py
@@ -1,7 +1,9 @@
 import torch
-import triton
-import triton.language as tl
-from triton import Config
+
+if torch.cuda.is_available():
+    import triton
+    import triton.language as tl
+    from triton import Config
 
 # Original implementation at https://github.com/deepseek-ai/DeepSeek-V3/blob/main/inference/kernel.py
 
diff --git a/torchao/prototype/blockwise_fp8/blockwise_quantization.py b/torchao/prototype/blockwise_fp8/blockwise_quantization.py
@@ -55,7 +55,7 @@ def fp8_blockwise_act_quant(
     assert dtype in [
         torch.float8_e4m3fn,
         torch.float8_e5m2,
-    ], f"dtype must be torch.float8_e4m3fn or torch.float8_e5m2"
+    ], "dtype must be torch.float8_e4m3fn or torch.float8_e5m2"
     y = torch.empty_like(x, dtype=dtype)
     s = x.new_empty(*x.size()[:-1], x.size(-1) // block_size, dtype=torch.float32)
     grid = lambda meta: (triton.cdiv(x.numel(), meta["BLOCK_SIZE"]),)
@@ -117,7 +117,7 @@ def fp8_blockwise_weight_quant(
     assert dtype in [
         torch.float8_e4m3fn,
         torch.float8_e5m2,
-    ], f"dtype must be torch.float8_e4m3fn or torch.float8_e5m2"
+    ], "dtype must be torch.float8_e4m3fn or torch.float8_e5m2"
     M, N = x.size()
     y = torch.empty_like(x, dtype=dtype)
     s = x.new_empty(M // block_size, N // block_size, dtype=torch.float32)

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,6 @@`
`7`	`7`	`fp8_blockwise_weight_dequant,`
`8`	`8`	`fp8_blockwise_weight_quant,`
`9`	`9`	`)`
`10`		`-`
`11`	`10`	`from torchao.utils import is_sm_at_least_89`
`12`	`11`
`13`	`12`	`BLOCKWISE_SIZE_MNK = [`
Original file line number	Diff line number	Diff line change
`@@ -2,8 +2,8 @@`
`2`	`2`	`from .blockwise_linear import BlockwiseQuantLinear`
`3`	`3`	`from .blockwise_quantization import (`
`4`	`4`	`fp8_blockwise_act_quant,`
`5`		`- fp8_blockwise_weight_quant,`
`6`	`5`	`fp8_blockwise_weight_dequant,`
	`6`	`+ fp8_blockwise_weight_quant,`
`7`	`7`	`)`
`8`	`8`
`9`	`9`	`__all__ = [`