use torch.float8_e8m0fnu in mx_formats (#1966)

vkuzo · web-flow · commit 59bf66613d54 · 2025-03-28T13:01:42.000-07:00
* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]
diff --git a/test/prototype/mx_formats/test_mx_tensor.py b/test/prototype/mx_formats/test_mx_tensor.py
@@ -19,7 +19,6 @@
 )
 from torchao.prototype.mx_formats.custom_cast import pack_uint4, pack_uint6
 from torchao.prototype.mx_formats.mx_tensor import (
-    E8M0_EXPONENT_NAN_VAL,
     MXTensor,
     ScaleCalculationMode,
     to_dtype,
@@ -321,8 +320,8 @@ def test_exponent_nan_in(elem_dtype):
     )
     block_size = 4
     tensor_mx = MXTensor.to_mx(tensor_hp, elem_dtype, block_size)
-    assert torch.all(tensor_mx._scale_e8m0[0] == E8M0_EXPONENT_NAN_VAL)
-    assert not torch.any(tensor_mx._scale_e8m0[1:] == E8M0_EXPONENT_NAN_VAL)
+    assert torch.all(torch.isnan(tensor_mx._scale_e8m0[0]))
+    assert not torch.any(torch.isnan(tensor_mx._scale_e8m0[1:]))
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -332,8 +331,11 @@ def test_exponent_nan_out(elem_dtype, pack_fp6):
     """
     If block exponent value is NaN, the MX tensor block value is NaN
     """
-    scale_e8m0_bits = torch.tensor(
-        [E8M0_EXPONENT_NAN_VAL, 23], dtype=torch.uint8, device="cuda"
+    if pack_fp6 and elem_dtype not in (DTYPE_FP6_E2M3, DTYPE_FP6_E3M2):
+        pytest.skip("invalid configuration")
+
+    scale_e8m0 = torch.tensor(
+        [float("nan"), 1.0], dtype=torch.float8_e8m0fnu, device="cuda"
     )
 
     block_size = 4
@@ -359,7 +361,7 @@ def test_exponent_nan_out(elem_dtype, pack_fp6):
     block_size = 4
     use_fp4_custom_triton_dequant_kernel = False
     tensor_mx = MXTensor(
-        scale_e8m0_bits,
+        scale_e8m0,
         data_bits,
         elem_dtype,
         block_size,
diff --git a/torchao/prototype/mx_formats/custom_cast.py b/torchao/prototype/mx_formats/custom_cast.py
@@ -745,6 +745,7 @@ def triton_f4_to_scaled_bf16(
       size is currently assumed to be 32.
     Output: a tensor of bfloat16 values, multiplied by the encoded scale
     """
+    s_e8m0 = s_e8m0.view(torch.uint8)
     assert TORCH_VERSION_AT_LEAST_2_4, "unsupported"
     new_shape = (*x.shape[:-1], x.shape[-1] * 2)
     output = torch.empty(*new_shape, device=x.device, dtype=torch.bfloat16)
@@ -861,6 +862,7 @@ def triton_f6_e2m3_to_scaled_bf16(
         size is currently assumed to be 32.
         Output: a tensor of bfloat16 values, multiplied by the encoded scale
         """
+        s_e8m0 = s_e8m0.view(torch.uint8)
 
         packed_mx_block_size = 3 * mx_block_size // 4
 
@@ -902,6 +904,7 @@ def triton_f6_e3m2_to_scaled_bf16(
         size is currently assumed to be 32.
         Output: a tensor of bfloat16 values, multiplied by the encoded scale
         """
+        s_e8m0 = s_e8m0.view(torch.uint8)
 
         packed_mx_block_size = 3 * mx_block_size // 4
 
diff --git a/torchao/prototype/mx_formats/mx_linear.py b/torchao/prototype/mx_formats/mx_linear.py
@@ -92,7 +92,7 @@ def backward(ctx, grad_output_hp: torch.Tensor):
                 weight_hp, block_size
             )
             weight_mx_dim1 = MXTensor(
-                weight_mx_dim1_scale.view(torch.uint8).reshape(-1),
+                weight_mx_dim1_scale.reshape(-1),
                 weight_mx_dim1_data.t(),
                 w_elem_dtype,
                 block_size,
@@ -121,7 +121,7 @@ def backward(ctx, grad_output_hp: torch.Tensor):
                 grad_output_hp_r, block_size
             )
             grad_output_mx_dim1 = MXTensor(
-                grad_output_mx_dim1_scale.view(torch.uint8).reshape(-1),
+                grad_output_mx_dim1_scale.reshape(-1),
                 grad_output_mx_dim1_data.t(),
                 grad_elem_dtype,
                 block_size,
@@ -143,7 +143,7 @@ def backward(ctx, grad_output_hp: torch.Tensor):
                 input_hp_r, block_size
             )
             input_t_mx_dim0_tmp = MXTensor(
-                input_t_mx_dim0_tmp_scale.view(torch.uint8).reshape(-1),
+                input_t_mx_dim0_tmp_scale.reshape(-1),
                 input_t_mx_dim0_tmp_data.t(),
                 in_elem_dtype,
                 block_size,
diff --git a/torchao/prototype/mx_formats/mx_tensor.py b/torchao/prototype/mx_formats/mx_tensor.py
@@ -326,10 +326,12 @@ def to_mx(
     else:
         raise AssertionError("unsupported")
 
+    scale_e8m0_biased = scale_e8m0_biased.view(torch.float8_e8m0fnu)
     return scale_e8m0_biased, data_lp
 
 
 def get_fp_scale(scale_e8m0):
+    scale_e8m0 = scale_e8m0.view(torch.uint8)
     s_offset = scale_e8m0.to(torch.int16) - E8M0_EXPONENT_BIAS
     # TODO(later): it would be nice if there was a way to do the 2^x operation
     # in PyTorch without creating a tensor of twos
@@ -562,7 +564,9 @@ def __new__(
             dtype=orig_dtype,
             device=data_bits.device,
         )
-        assert scale_e8m0_bits.dtype == torch.uint8, "unsupported"
+        assert (
+            scale_e8m0_bits.dtype == torch.float8_e8m0fnu
+        ), f"scale_e8m0_bits.dtype must be `torch.float8_e8m0fnu`, got {scale_e8m0_bits.dtype}"
         assert len(scale_e8m0_bits.shape) == 1, "unsupported"
         assert data_bits.dtype in (
             torch.float8_e4m3fn,