add docstring to tensor_to_scale

danielvegamyhre · danielvegamyhre · commit 067db27c722e · 2025-02-05T14:01:42.000-08:00
diff --git a/torchao/float8/float8_utils.py b/torchao/float8/float8_utils.py
@@ -8,7 +8,7 @@
 
 import torch
 import torch.distributed as dist
-from torch.distributed._functional_collectives import AsyncCollectiveTensor, all_reduce
+from torch.distributed._functional_collectives import all_reduce, AsyncCollectiveTensor
 
 from torchao.float8.config import Float8LinearConfig, ScalingGranularity, ScalingType
 
@@ -120,16 +120,27 @@ def tensor_to_amax(
 
 @torch.no_grad()
 def tensor_to_scale(
-    x: torch.Tensor,
+    hp_tensor: torch.Tensor,
     float8_dtype: torch.dtype,
     reduce_amax: bool = False,
     device_mesh=None,
     scaling_granularity: ScalingGranularity = ScalingGranularity.TENSORWISE,
     axiswise_dim: Optional[int] = None,
     power_of_2_scale: bool = False,
 ) -> torch.Tensor:
+    """
+    Compute scaling factor for the given high precision tensor.
+
+    Args:
+        hp_tensor: the tensor to convert
+        float8_dtype: the float8 dtype to use
+        reduce_amax: whether to reduce the max(abs(hp_tensor)) value across distributed ranks
+        scaling_granularity: Defines the scaling granularity
+        axiswise_dim: if axiswise granularity is used, defines the dim to scale across
+        power_of_2_scale: if true, round scaling factor down to the nearest power of 2.
+    """
     amax = tensor_to_amax(
-        x,
+        hp_tensor,
         reduce_amax,
         device_mesh,
         scaling_granularity,