Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8581,18 +8581,18 @@ def test_attention_sink(self):
msg["wiki_ppl"], self.llm_specs[model_name].wikitext_ppl
)

def test_qwen2_5(self):
# This is not testing static llm flow.
def test_hf_causal_lm(self):
# This is the Hugging Face transformers flow, not the static llm flow.
if not self.required_envs([]):
self.skipTest("missing required envs")
prompt = "My favourite condiment is "
cmds = [
"python",
f"{self.executorch_root}/examples/qualcomm/oss_scripts/qwen2_5/qwen2_5.py",
f"{self.executorch_root}/examples/qualcomm/oss_scripts/hf_causal_lm.py",
"--prompt",
prompt,
"--decoder_model",
"qwen2.5_0.5B",
"qwen2_5-0_5b",
"--ptq",
"16a8w",
"--enable_spinquant_r3",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
logging.basicConfig(level=logging.INFO, format=FORMAT)
logging.getLogger().setLevel(logging.INFO)

PTE_FILENAME = "qwen_qnn_q16"
PTE_FILENAME = "hf_causal_lm_qnn"


def compile(args: argparse.Namespace, qnn_config: QnnConfig): # noqa: C901
Expand Down Expand Up @@ -195,7 +195,7 @@ def main(args):
"-a",
"--artifact",
help="path for storing generated artifacts by this example.",
default="qwen2_5",
default="hf_causal_lm",
type=str,
)

Expand All @@ -216,8 +216,8 @@ def main(args):

parser.add_argument(
"--decoder_model",
choices=["qwen2.5_0.5B", "qwen2.5_0.5B_instruct", "qwen2.5_1.5B_instruct"],
help="The Qwen model to export. Current available options are: [qwen2.5_0.5B, qwen2.5_0.5B_instruct, qwen2.5_1.5B_instruct]",
choices=list(HUGGING_FACE_REPO_IDS.keys()),
help=f"The Hugging Face decoder model to export. Available options are: {list(HUGGING_FACE_REPO_IDS.keys())}",
required=True,
)

Expand Down
2 changes: 1 addition & 1 deletion examples/qualcomm/oss_scripts/llm_utils/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Tutorial to run [eval_decoder_model_qnn.py](./eval_decoder_model_qnn.py)
This script, [`eval_decoder_model_qnn.py`](./eval_decoder_model_qnn.py), is designed to evaluate large language models (LLMs) from transformers that have been compiled into ExecuTorch Portable Executable (PTE) format for execution on Qualcomm devices. It leverages the `lm-evaluation-harness` library to perform various NLP evaluation tasks.

> ⚠️ **Important:** Note that this script runs PTE files generated specifically for Hugging Face Transformers, such as [qwen2_5.py](../qwen2_5/qwen2_5.py), rather than [the static LLaMA version](../llama/llama.py).
> ⚠️ **Important:** Note that this script runs PTE files generated specifically for Hugging Face Transformers, such as [hf_causal_lm.py](../hf_causal_lm.py), rather than [the static LLaMA version](../llama/llama.py).

### Features:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
Expand Down Expand Up @@ -44,9 +44,13 @@
logging.basicConfig(level=logging.INFO, format=FORMAT)

HUGGING_FACE_REPO_IDS = {
"qwen2.5_0.5B": "Qwen/Qwen2.5-0.5B",
"qwen2.5_1.5B_instruct": "Qwen/Qwen2.5-1.5B-Instruct",
"qwen2.5_0.5B_instruct": "Qwen/Qwen2.5-0.5B-Instruct",
"llama3_2-1b": "NousResearch/Llama-3.2-1B",
"qwen2_5-0_5b": "Qwen/Qwen2.5-0.5B",
"qwen2_5-1_5b_instruct": "Qwen/Qwen2.5-1.5B-Instruct",
"qwen2_5-0_5b_instruct": "Qwen/Qwen2.5-0.5B-Instruct",
"qwen3-0_6b": "Qwen/Qwen3-0.6B",
"smollm2_135m": "HuggingFaceTB/SmolLM2-135M",
"gemma-2b" : "weqweasdas/RM-Gemma-2B",
}


Expand All @@ -64,6 +68,7 @@
config.ar_len = 1 # kv mode
config.max_batch_size = batch_size
config.enable_spinquant_r3 = enable_spinquant_r3
config.use_cache = True

# Some config has head_dim provided that is different from equation below(e.g., qwen3)
if not hasattr(config, "head_dim"):
Expand Down
Loading