Fix Llava test

jackzhxng · jackzhxng · commit 9522e1929682 · 2025-03-28T10:12:25.000-07:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -1291,7 +1291,7 @@ def _load_llama_model(
             model.vocab_size,
             metadata_str,
         ),
-        args=config,  # TODO: Rename builder args field to config.
+        config=config,  # TODO: Rename builder args field to config.
     )
 
 
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
@@ -862,11 +862,6 @@ def get_quant_weight_transform(
     if config.calibration.seq_length is not None:
         quant_args["calibration_seq_length"] = config.calibration.seq_length
 
-    group_size = config.quantization.group_size
-    calibration_tasks = config.calibration.tasks
-    calibration_limit = config.calibration.limit
-    calibration_seq_length = config.calibration.seq_length
-
     return partial(
         quantize,
         **quant_args,
diff --git a/examples/models/llama3_2_vision/runner/eager.py b/examples/models/llama3_2_vision/runner/eager.py
@@ -22,18 +22,18 @@ class EagerLlamaRunner(TorchTuneLlamaRunner):
     Runs llama in eager mode with provided checkpoint file.
     """
 
-    def __init__(self, args):
-        with open(args.params, "r") as f:
+    def __init__(self, config):
+        with open(config.model.params, "r") as f:
             params = json.loads(f.read())
         super().__init__(
-            tokenizer_path=args.tokenizer_path,
-            max_seq_len=args.max_seq_length,
+            tokenizer_path=config.model.tokenizer_path,
+            max_seq_len=config.sequence.max_seq_length,
             max_batch_size=1,
-            use_kv_cache=args.use_kv_cache,
+            use_kv_cache=config.kv_cache.use_kv_cache,
             vocab_size=params["vocab_size"],
             device="cuda" if torch.cuda.is_available() else "cpu",
         )
-        manager: LLMEdgeManager = _prepare_for_llama_export(args)
+        manager: LLMEdgeManager = _prepare_for_llama_export(config)
         self.model = manager.model.eval().to(device=self.device)
 
     def forward(
diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py
@@ -93,7 +93,6 @@ def forward(self, input_pos, embeddings):
         use_kv_cache=True,
         example_inputs=(torch.tensor([0], dtype=torch.int64), embeddings),
         dynamic_shapes=dynamic_shapes,
-        args=llava.text_model_args,
     )
 
     dtype_override = DType.fp32
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -21,6 +21,7 @@
     DuplicateDynamicQuantChainPass,
 )
 from executorch.backends.xnnpack._passes.convert_to_linear import ConvertToLinearPass
+
 from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower
 from executorch.exir.backend.partitioner import Partitioner
 
@@ -33,9 +34,9 @@
 from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
 
 from executorch.extension.export_util.utils import export_to_edge, save_pte_program
-
 from executorch.extension.llm.export.export_passes import RemoveRedundantTransposes
 from executorch.extension.llm.tokenizer.utils import get_tokenizer
+from omegaconf import DictConfig
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.ao.quantization.quantizer import Quantizer
 from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
@@ -87,7 +88,7 @@ def __init__(
         use_kv_cache,
         example_inputs,
         example_kwarg_inputs: Optional[Dict] = None,
-        args: Optional[Any] = None,
+        config: Optional[DictConfig] = None,
         enable_dynamic_shape: bool = False,
         generate_full_logits: bool = False,
         calibration_tasks: Optional[List[str]] = None,
@@ -121,7 +122,7 @@ def __init__(
         self.output_dir = "."
         self.dynamic_shapes = dynamic_shapes
         self._saved_pte_filename = None
-        self.args = args
+        self.config = config
         self.calibration_tasks = calibration_tasks
         self.calibration_limit = calibration_limit
         self.calibration_seq_length = calibration_seq_length
@@ -203,7 +204,7 @@ def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
         # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
         with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
-            if self.args.backend.qnn.enabled:
+            if self.config.backend.qnn.enabled:
                 # TODO: this is temporary, as qnn flow does not work with new, non-functional export IR.
                 # See issue: https://github.com/pytorch/executorch/issues/7373
 
@@ -249,8 +250,8 @@ def export(self) -> "LLMEdgeManager":
         # Persisting those changes back to an ExportedProgram will require
         # an additional export().
         self.pre_autograd_graph_module = exported_module.module()
-        if self.args.export.export_only:
-            torch.export.save(exported_module, self.args.export.output_name)
+        if self.config.export.export_only:
+            torch.export.save(exported_module, self.config.export.output_name)
         return self
 
     def run_canonical_optimizations(self):
@@ -414,7 +415,7 @@ def export_to_edge(self) -> "LLMEdgeManager":
                 self.export()
 
             override_export_behaviour = contextlib.nullcontext()
-            if self.args.backend.qnn.enabled:
+            if self.config.backend.qnn.enabled:
                 override_export_behaviour = patch.object(
                     torch._utils_internal,
                     "export_training_ir_rollout_check",

Original file line number	Diff line number	Diff line change
`@@ -1291,7 +1291,7 @@ def _load_llama_model(`
`1291`	`1291`	`model.vocab_size,`
`1292`	`1292`	`metadata_str,`
`1293`	`1293`	`),`
`1294`		`- args=config, # TODO: Rename builder args field to config.`
	`1294`	`+ config=config, # TODO: Rename builder args field to config.`
`1295`	`1295`	`)`
`1296`	`1296`
`1297`	`1297`
Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,6 @@ def forward(self, input_pos, embeddings):`
`93`	`93`	`use_kv_cache=True,`
`94`	`94`	`example_inputs=(torch.tensor([0], dtype=torch.int64), embeddings),`
`95`	`95`	`dynamic_shapes=dynamic_shapes,`
`96`		`- args=llava.text_model_args,`
`97`	`96`	`)`
`98`	`97`
`99`	`98`	`dtype_override = DType.fp32`