Skip to content

Commit 9522e19

Browse files
committed
Fix Llava test
1 parent 6887e93 commit 9522e19

File tree

5 files changed

+15
-20
lines changed

5 files changed

+15
-20
lines changed

examples/models/llama/export_llama_lib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1291,7 +1291,7 @@ def _load_llama_model(
12911291
model.vocab_size,
12921292
metadata_str,
12931293
),
1294-
args=config, # TODO: Rename builder args field to config.
1294+
config=config, # TODO: Rename builder args field to config.
12951295
)
12961296

12971297

examples/models/llama/source_transformation/quantize.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -862,11 +862,6 @@ def get_quant_weight_transform(
862862
if config.calibration.seq_length is not None:
863863
quant_args["calibration_seq_length"] = config.calibration.seq_length
864864

865-
group_size = config.quantization.group_size
866-
calibration_tasks = config.calibration.tasks
867-
calibration_limit = config.calibration.limit
868-
calibration_seq_length = config.calibration.seq_length
869-
870865
return partial(
871866
quantize,
872867
**quant_args,

examples/models/llama3_2_vision/runner/eager.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,18 @@ class EagerLlamaRunner(TorchTuneLlamaRunner):
2222
Runs llama in eager mode with provided checkpoint file.
2323
"""
2424

25-
def __init__(self, args):
26-
with open(args.params, "r") as f:
25+
def __init__(self, config):
26+
with open(config.model.params, "r") as f:
2727
params = json.loads(f.read())
2828
super().__init__(
29-
tokenizer_path=args.tokenizer_path,
30-
max_seq_len=args.max_seq_length,
29+
tokenizer_path=config.model.tokenizer_path,
30+
max_seq_len=config.sequence.max_seq_length,
3131
max_batch_size=1,
32-
use_kv_cache=args.use_kv_cache,
32+
use_kv_cache=config.kv_cache.use_kv_cache,
3333
vocab_size=params["vocab_size"],
3434
device="cuda" if torch.cuda.is_available() else "cpu",
3535
)
36-
manager: LLMEdgeManager = _prepare_for_llama_export(args)
36+
manager: LLMEdgeManager = _prepare_for_llama_export(config)
3737
self.model = manager.model.eval().to(device=self.device)
3838

3939
def forward(

examples/models/llava/export_llava.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ def forward(self, input_pos, embeddings):
9393
use_kv_cache=True,
9494
example_inputs=(torch.tensor([0], dtype=torch.int64), embeddings),
9595
dynamic_shapes=dynamic_shapes,
96-
args=llava.text_model_args,
9796
)
9897

9998
dtype_override = DType.fp32

extension/llm/export/builder.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
DuplicateDynamicQuantChainPass,
2222
)
2323
from executorch.backends.xnnpack._passes.convert_to_linear import ConvertToLinearPass
24+
2425
from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower
2526
from executorch.exir.backend.partitioner import Partitioner
2627

@@ -33,9 +34,9 @@
3334
from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
3435

3536
from executorch.extension.export_util.utils import export_to_edge, save_pte_program
36-
3737
from executorch.extension.llm.export.export_passes import RemoveRedundantTransposes
3838
from executorch.extension.llm.tokenizer.utils import get_tokenizer
39+
from omegaconf import DictConfig
3940
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
4041
from torch.ao.quantization.quantizer import Quantizer
4142
from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
@@ -87,7 +88,7 @@ def __init__(
8788
use_kv_cache,
8889
example_inputs,
8990
example_kwarg_inputs: Optional[Dict] = None,
90-
args: Optional[Any] = None,
91+
config: Optional[DictConfig] = None,
9192
enable_dynamic_shape: bool = False,
9293
generate_full_logits: bool = False,
9394
calibration_tasks: Optional[List[str]] = None,
@@ -121,7 +122,7 @@ def __init__(
121122
self.output_dir = "."
122123
self.dynamic_shapes = dynamic_shapes
123124
self._saved_pte_filename = None
124-
self.args = args
125+
self.config = config
125126
self.calibration_tasks = calibration_tasks
126127
self.calibration_limit = calibration_limit
127128
self.calibration_seq_length = calibration_seq_length
@@ -203,7 +204,7 @@ def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
203204
# 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
204205
# 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
205206
with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
206-
if self.args.backend.qnn.enabled:
207+
if self.config.backend.qnn.enabled:
207208
# TODO: this is temporary, as qnn flow does not work with new, non-functional export IR.
208209
# See issue: https://github.com/pytorch/executorch/issues/7373
209210

@@ -249,8 +250,8 @@ def export(self) -> "LLMEdgeManager":
249250
# Persisting those changes back to an ExportedProgram will require
250251
# an additional export().
251252
self.pre_autograd_graph_module = exported_module.module()
252-
if self.args.export.export_only:
253-
torch.export.save(exported_module, self.args.export.output_name)
253+
if self.config.export.export_only:
254+
torch.export.save(exported_module, self.config.export.output_name)
254255
return self
255256

256257
def run_canonical_optimizations(self):
@@ -414,7 +415,7 @@ def export_to_edge(self) -> "LLMEdgeManager":
414415
self.export()
415416

416417
override_export_behaviour = contextlib.nullcontext()
417-
if self.args.backend.qnn.enabled:
418+
if self.config.backend.qnn.enabled:
418419
override_export_behaviour = patch.object(
419420
torch._utils_internal,
420421
"export_training_ir_rollout_check",

0 commit comments

Comments
 (0)