From b65dd1060e7c35b6177d2a349671130c907bb2a8 Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Thu, 17 Apr 2025 12:32:21 -0700 Subject: [PATCH] Instruct users to run llama for qnn to the active repro (#10231) Summary: Many users are trying to export llama with this flow https://github.com/pytorch/executorch/tree/main/examples/models/llama and end up with non performant, or different isssues, like https://github.com/pytorch/executorch/issues/10226. Instruct users to use the qcom version Reviewed By: kirklandsign Differential Revision: D73125467 --- examples/models/llama/export_llama_lib.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 64cbc9e23af..2553f82139a 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -816,6 +816,10 @@ def _to_edge_and_lower_llama( # noqa: C901 modelname = f"coreml_{modelname}" if args.qnn: + logging.warning( + "The model definition in current repro is not performant, please refer to the instruction" + " in https://github.com/pytorch/executorch/tree/main/examples/qualcomm/oss_scripts/llama/README.md for better performance." + ) from executorch.extension.llm.custom_ops import model_sharding partitioners.append(