Skip to content

[tuner] add padding_conv attribute along IGEMM supprot for conv #2357

[tuner] add padding_conv attribute along IGEMM supprot for conv

[tuner] add padding_conv attribute along IGEMM supprot for conv #2357

# Copyright 2025 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
name: CI - Llama 8B Perplexity Tests
on:
workflow_dispatch:
pull_request:
push:
branches:
- main
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
test_perplexity:
name: "Llama 8B Perplexity tests"
strategy:
matrix:
model: [{
irpa: "/amdshark-dev/ossci-models/llama_3_1/instruct_8b_fp16.irpa",
torch_dataset: "amdsharktank/tests/evaluate/datasets/llama_8b_fp16_torch.json",
iree_dataset: "amdsharktank/tests/evaluate/datasets/llama_8b_fp16_iree.json"
},
{
irpa: "/amdshark-dev/ossci-models/llama_3_1/instruct_8b_fp8_e4m3fnuz.irpa",
torch_dataset: "amdsharktank/tests/evaluate/datasets/llama_8b_fp8_e4m3_fnuz_torch.json",
iree_dataset: "amdsharktank/tests/evaluate/datasets/llama_8b_fp8_e4m3_fnuz_iree.json"
}]
python-version: [3.11]
torch-version: ["2.6.0"]
runs-on: [linux-mi325-1gpu-ossci-nod-ai]
fail-fast: false
runs-on: ${{matrix.runs-on}}
defaults:
run:
shell: bash
env:
VENV_DIR: ${{ github.workspace }}/.venv
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: ${{matrix.python-version}}
- name: Create Python venv
run: python -m venv ${VENV_DIR}
- name: Install amdsharktank deps
run: |
source ${VENV_DIR}/bin/activate
amdsharktank/build_tools/install_test_dependencies.sh \
--torch-version ${{matrix.torch-version}} \
--pytorch-rocm
pip freeze
# TODO: remove this test and leave only chunked prefill once it is stabilized.
- name: Run Torch Model Eval
continue-on-error: true # xfail: expected to fail
run: |
source ${VENV_DIR}/bin/activate
export IRPA=${{ matrix.model.irpa }}
export TOKENIZER=/amdshark-dev/ossci-models/llama_3_1/tokenizer
export DATASET=${{ matrix.model.torch_dataset }}
python3 -m amdsharktank.tools.eval_llm_model \
--irpa=${IRPA} \
--tokenizer=${TOKENIZER} \
--dataset=${DATASET} \
--expected-err=1e-2 \
--min-context=10
- name: Run Torch Model Eval with Chunked Prefill
continue-on-error: true # xfail: expected to fail
run: |
source ${VENV_DIR}/bin/activate
export IRPA=${{ matrix.model.irpa }}
export TOKENIZER=/amdshark-dev/ossci-models/llama_3_1/tokenizer
export DATASET=${{ matrix.model.torch_dataset }}
# See https://github.com/nod-ai/amd-shark-ai/issues/2494
# for more information why we choose a larger expected error due to different
# tiling for some ops compared to the baseline.
python3 -m amdsharktank.tools.eval_llm_model \
--irpa=${IRPA} \
--tokenizer=${TOKENIZER} \
--dataset=${DATASET} \
--expected-err=4e-2 \
--chunk-block-size=1 \
--min-context=10
- name: Run IREE Model Eval
continue-on-error: true # xfail: expected to fail
run: |
source ${VENV_DIR}/bin/activate
export IRPA=${{ matrix.model.irpa }}
export TOKENIZER=/amdshark-dev/ossci-models/llama_3_1/tokenizer
export DATASET=${{ matrix.model.iree_dataset }}
python3 -m amdsharktank.tools.eval_llm_vmfb \
--irpa=${IRPA} \
--tokenizer=${TOKENIZER} \
--dataset=${DATASET} \
--expected-err=5e-2 \
--min-context=10 \
--iree-hal-target-device=hip \
--iree-hip-target=gfx942 \