Support pre-quantization via torchao quantize_ (#10293)

metascroy · web-flow · commit 15888ca5737f · 2025-04-18T11:23:38.000-07:00
Checkpoints saved with torchao quantized subclasses can be loaded with
the PR
diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py
@@ -18,6 +18,7 @@
 from executorch.examples.models.llama.llama_transformer import Transformer
 
 from executorch.examples.models.llama.model_args import ModelArgs
+from torchao.utils import TorchAOBaseTensor
 
 try:
     from .fairseq2 import convert_to_llama_checkpoint
@@ -257,6 +258,9 @@ def __init__(self, **kwargs):
                 strict=False,
                 assign=True,
             )  # self.model_ = Transformer(gptconf)
+            for param in self.model_.parameters():
+                if isinstance(param, TorchAOBaseTensor):
+                    param.requires_grad = False
         else:
             print("Checkpoint not provided, defaulting weights to zeros.")
             self.model_.to_empty(device="cpu")
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -41,6 +41,7 @@
 from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
 from torch.export import export_for_training, ExportedProgram
 from torch.nn.attention import SDPBackend
+from torchao.utils import unwrap_tensor_subclass
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
@@ -199,6 +200,11 @@ def _get_edge_config(self) -> EdgeCompileConfig:
         return edge_config
 
     def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
+        if module is not None:
+            unwrap_tensor_subclass(module)
+        else:
+            unwrap_tensor_subclass(self.model)
+
         dynamic_shape = self._get_dynamic_shape()
         # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)