Fix tests and lint

jackzhxng · jackzhxng · commit 6887e9305845 · 2025-03-28T08:38:03.000-07:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -534,6 +534,11 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str:
         return return_val
 
 
+def get_default_llm_config() -> DictConfig:
+    default_args = build_args_parser().parse_args([])
+    return _convert_args_to_config(default_args)
+
+
 def _convert_args_to_config(args: argparse.Namespace) -> DictConfig:
     """Convert argparse.Namespace to DictConfig."""
     # Create a dictionary from args
@@ -670,7 +675,9 @@ def export_llama(args: Union[argparse.Namespace, DictConfig]) -> str:
             raise ValueError(
                 f"Converting weights to meta format for {config.model.name} is not yet supported"
             )
-        config.model.checkpoint = download_and_convert_hf_checkpoint(repo_id, convert_weights)
+        config.model.checkpoint = download_and_convert_hf_checkpoint(
+            repo_id, convert_weights
+        )
 
     if config.misc.profile_path is not None:
         try:
@@ -711,9 +718,7 @@ def _prepare_for_llama_export(config: DictConfig) -> LLMEdgeManager:
         if config.model.checkpoint_dir
         else None
     )
-    params_path = (
-        canonical_path(config.model.params) if config.model.params else None
-    )
+    params_path = canonical_path(config.model.params) if config.model.params else None
     output_dir_path = canonical_path(config.export.output_dir, dir=True)
     weight_type = (
         WeightType.FAIRSEQ2 if config.model.type == "FAIRSEQ2" else WeightType.LLAMA
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
@@ -783,7 +783,9 @@ def forward(self, indices: torch.Tensor) -> torch.Tensor:
 ############################ Source Transform Start #######################
 
 
-def get_quant_embedding_transform(config: DictConfig, dtype_override: Optional[DType] = None):
+def get_quant_embedding_transform(
+    config: DictConfig, dtype_override: Optional[DType] = None
+):
     if config.quantization.embedding_quantize.startswith("torchao:"):
         from torchao.experimental.quant_api import (
             EmbeddingQuantizer,
@@ -850,15 +852,15 @@ def get_quant_weight_transform(
     # If these optional args are None, don't provide them to quantize().
     quant_args = {}
     if config.quantization.group_size is not None:
-        quant_args['group_size'] = config.quantization.group_size
+        quant_args["group_size"] = config.quantization.group_size
     if config.calibration.tasks is not None:
-        quant_args['calibration_tasks'] = OmegaConf.to_container(config.calibration.tasks)
+        quant_args["calibration_tasks"] = OmegaConf.to_container(
+            config.calibration.tasks
+        )
     if config.calibration.limit is not None:
-        quant_args['calibration_limit'] = config.calibration.limit
+        quant_args["calibration_limit"] = config.calibration.limit
     if config.calibration.seq_length is not None:
-        quant_args['calibration_seq_length'] = config.calibration.seq_length
-
-
+        quant_args["calibration_seq_length"] = config.calibration.seq_length
 
     group_size = config.quantization.group_size
     calibration_tasks = config.calibration.tasks
@@ -871,11 +873,15 @@ def get_quant_weight_transform(
         qmode=config.quantization.mode,
         computation_dtype=computation_dtype,
         checkpoint_dtype=checkpoint_dtype,
-        checkpoint_path=(Path(path) if (path := config.model.checkpoint) is not None else None),
+        checkpoint_path=(
+            Path(path) if (path := config.model.checkpoint) is not None else None
+        ),
         tokenizer_path=(
             Path(path) if (path := config.model.tokenizer_path) is not None else None
         ),
     )
+
+
 def _load_torchao_aten_lib(libname):
     import glob
     import os
diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py
@@ -4,14 +4,18 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
 import unittest
 
 from executorch.devtools.backend_debug import get_delegation_info
 from executorch.examples.models.llama.export_llama_lib import (
-    export_llama,
-    build_args_parser,
+    _export_llama,
+    get_default_llm_config,
 )
 
+FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
+logging.basicConfig(level=logging.INFO, format=FORMAT, force=True)
+
 UNWANTED_OPS = [
     "aten_permute_copy_default",
     "aten_transpose_copy_default",
@@ -34,13 +38,12 @@ def test_has_expected_ops_and_op_counts(self):
         # we cannot test quantization args in this way
         # since quantization requires promoting meta tensors
         # to device=cpu, which requires real weights.
-        parser = build_args_parser()
-        args = parser.parse_args([])
-        args.use_sdpa_with_kv_cache = True
-        args.use_kv_cache = True
-        args.verbose = True
+        export_config = get_default_llm_config()
+        export_config.kv_cache.use_sdpa_with_kv_cache = True
+        export_config.kv_cache.use_kv_cache = True
+        export_config.misc.verbose = True
 
-        builder = export_llama(args)
+        builder = _export_llama(export_config)
         graph_module = builder.edge_manager.exported_program().graph_module
         delegation_info = get_delegation_info(graph_module)
 
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -203,7 +203,7 @@ def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
         # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
         with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
-            if hasattr(self.args, "qnn") and self.args.qnn:
+            if self.args.backend.qnn.enabled:
                 # TODO: this is temporary, as qnn flow does not work with new, non-functional export IR.
                 # See issue: https://github.com/pytorch/executorch/issues/7373
 
@@ -249,8 +249,8 @@ def export(self) -> "LLMEdgeManager":
         # Persisting those changes back to an ExportedProgram will require
         # an additional export().
         self.pre_autograd_graph_module = exported_module.module()
-        if hasattr(self.args, "export_only") and self.args.export_only:
-            torch.export.save(exported_module, self.args.output_name)
+        if self.args.export.export_only:
+            torch.export.save(exported_module, self.args.export.output_name)
         return self
 
     def run_canonical_optimizations(self):
@@ -414,7 +414,7 @@ def export_to_edge(self) -> "LLMEdgeManager":
                 self.export()
 
             override_export_behaviour = contextlib.nullcontext()
-            if hasattr(self.args, "qnn") and self.args.qnn:
+            if self.args.backend.qnn.enabled:
                 override_export_behaviour = patch.object(
                     torch._utils_internal,
                     "export_training_ir_rollout_check",