print MX config when printing MXLinear and MXInferenceLinear

vkuzo · vkuzo · commit 4d8998d16b7c · 2025-03-26T13:55:48.000-07:00
Summary: Adds relevant MX config options to string representation of MX linear objects, to make debugging easier. Example: ``` MXLinear(in_features=4096, out_features=4096, bias=False, bl_sz=32, lp_dtype=f8e4m3, kernel=cublas, use_fp8_dim1_cast_triton_kernel=True) ``` Test Plan: CI Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: ce0d3fc ghstack-comment-id: 2749522655 Pull Request resolved: #1947
diff --git a/test/prototype/mx_formats/test_mx_linear.py b/test/prototype/mx_formats/test_mx_linear.py
@@ -401,3 +401,21 @@ def test_filter_fn():
     swap_linear_with_mx_inference_linear(m2, config=config, filter_fn=filter_fn)  # noqa: E501
     assert type(m2[0]) == MXInferenceLinear
     assert type(m2[1]) == torch.nn.Linear
+
+
+def test_training_print_str():
+    m = nn.Sequential(nn.Linear(32, 32))
+    config = MXLinearConfig()
+    swap_linear_with_mx_linear(m, config=config)
+    s = str(m)
+    assert "bl_sz=32" in s
+    assert "kernel=emulated" in s
+
+
+def test_inference_print_str():
+    m = nn.Sequential(nn.Linear(32, 32))
+    config = MXLinearConfig()
+    swap_linear_with_mx_inference_linear(m, config=config)
+    s = str(m)
+    assert "bl_sz=32" in s
+    assert "kernel=emulated" in s
diff --git a/torchao/prototype/mx_formats/config.py b/torchao/prototype/mx_formats/config.py
@@ -12,6 +12,7 @@
 
 from torchao.prototype.mx_formats.constants import (
     DTYPE_FP4,
+    DTYPE_TO_SHORT_STR,
     SUPPORTED_ELEM_DTYPES,
 )
 
@@ -143,3 +144,22 @@ def from_recipe_name(
             )
         else:
             raise AssertionError(f"unknown recipe_name {recipe_name}")
+
+    def short_str(self) -> str:
+        """
+        Returns a concise representation of the current config.
+        """
+        s = f"bl_sz={self.block_size}, lp_dtype={DTYPE_TO_SHORT_STR[self.elem_dtype]}"
+        if self.elem_dtype_weight_override is not None:
+            s += (
+                f", lp_w_override={DTYPE_TO_SHORT_STR[self.elem_dtype_weight_override]}"
+            )
+        if self.elem_dtype_grad_output_override is not None:
+            s += f", lp_go_override={DTYPE_TO_SHORT_STR[self.elem_dtype_grad_output_override]}"
+        s += f", kernel={self.gemm_kernel_choice.value}"
+        if self.use_fp8_dim1_cast_triton_kernel:
+            s += ", use_fp8_dim1_cast_triton_kernel=True"
+        if self.use_fp4_custom_triton_dequant_kernel:
+            s += ", use_fp4_custom_triton_dequant_kernel=True"
+        # TODO(future PR): split training from inference and add fp6 here
+        return s
diff --git a/torchao/prototype/mx_formats/constants.py b/torchao/prototype/mx_formats/constants.py
@@ -22,6 +22,14 @@
     DTYPE_FP4,
 ]
 
+DTYPE_TO_SHORT_STR = {
+    torch.float8_e4m3fn: "f8e4m3",
+    torch.float8_e5m2: "f8e5m2",
+    DTYPE_FP6_E2M3: "f6e2m3",
+    DTYPE_FP6_E3M2: "f6e3m2",
+    DTYPE_FP4: "f4e2m1",
+}
+
 F8E4M3_MAX = torch.finfo(torch.float8_e4m3fn).max  # 448.0
 F8E5M2_MAX = torch.finfo(torch.float8_e5m2).max  # 57344.0
 
diff --git a/torchao/prototype/mx_formats/mx_linear.py b/torchao/prototype/mx_formats/mx_linear.py
@@ -213,6 +213,10 @@ def forward(self, x):
             y = y + self.bias
         return y
 
+    def extra_repr(self):
+        s = f"{super().extra_repr()}, {self.config.short_str()}"
+        return s
+
 
 class MXInferenceLinear(torch.nn.Linear):
     """
@@ -255,6 +259,10 @@ def forward(self, x):
         y = F.linear(x, w_hp, self.bias)
         return y
 
+    def extra_repr(self):
+        s = f"{super().extra_repr()}, {self.config.short_str()}"
+        return s
+
 
 def replace_with_custom_fn_if_matches_filter(
     model, replacement_fn, filter_fn, cur_fqn=""