CI - E2E Llama 3.1 405B FP4 Test #455
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2025 Advanced Micro Devices, Inc. | |
| # | |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. | |
| # See https://llvm.org/LICENSE.txt for license information. | |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
| name: CI - E2E Llama 3.1 405B FP4 Test | |
| on: | |
| workflow_dispatch: | |
| schedule: | |
| # Run every 6 hour starting from 0h 30min on all days | |
| - cron: "30 */6 * * *" | |
| permissions: | |
| contents: write | |
| concurrency: | |
| # A PR number if a pull request and otherwise the commit hash. This cancels | |
| # queued and in-progress runs for the same PR (presubmit) or commit | |
| # (postsubmit). The workflow name is prepended to avoid conflicts between | |
| # different workflows. | |
| group: ${{ github.workflow }}-${{ github.event.number || github.sha }} | |
| cancel-in-progress: true | |
| jobs: | |
| test_llama_large: | |
| if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} | |
| timeout-minutes: 240 | |
| name: "Release: Llama 405B FP4 Benchmarking Tests" | |
| strategy: | |
| matrix: | |
| version: [3.11] | |
| fail-fast: false | |
| runs-on: linux-mi355-1gpu-ossci-nod-ai | |
| container: | |
| image: 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:405945a40deaff9db90b9839c0f41d4cba4a383c1a7459b28627047bf6302a26' | |
| options: --ipc host | |
| --group-add video | |
| --device /dev/kfd | |
| --device /dev/dri | |
| --env-file /etc/podinfo/gha-gpu-isolation-settings | |
| volumes: | |
| - /amdshark-dev:/amdshark-dev | |
| - /amdshark-cache:/amdshark-cache | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| VENV_DIR: ${{ github.workspace }}/.venv | |
| IRPA: "/amdshark-dev/ossci-models/llama_3_1/405b/fp4/fp4_preshuffled_2025_09_12.irpa" | |
| TOKENIZER: "/amdshark-dev/ossci-models/llama_3_1/405b/fp4/tokenizer.json" | |
| TOKENIZER_CONFIG: "/amdshark-dev/ossci-models/llama_3_1/405b/fp4/tokenizer_config.json" | |
| steps: | |
| - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
| - name: "Setting up Python" | |
| id: setup_python | |
| uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 | |
| with: | |
| python-version: ${{matrix.version}} | |
| - name: Create Python venv | |
| run: | | |
| python -m venv ${VENV_DIR} | |
| source ${VENV_DIR}/bin/activate | |
| - name: Install pip deps | |
| run: | | |
| # Install TheRock | |
| sudo apt-get update | |
| sudo apt install -y gfortran build-essential binutils | |
| python -m pip install \ | |
| --index-url https://d2awnip2yjpvqn.cloudfront.net/v2/gfx950-dcgpu/ \ | |
| rocm[libraries,devel] | |
| # Set environment variables | |
| export ROCM_PATH=$(python -m rocm_sdk path --root) | |
| export LD_LIBRARY_PATH="$ROCM_PATH/lib":$LD_LIBRARY_PATH | |
| echo "ROCM_PATH=$ROCM_PATH" >> $GITHUB_ENV | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| # Install amdshark deps | |
| bash scripts/setenv.sh --source | |
| hf auth login --token ${{ secrets.HF_FLUX_TOKEN }} | |
| - name: Check runner health | |
| run: | | |
| echo "ROCM_PATH=$ROCM_PATH" | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" | |
| rocm-smi | |
| rocminfo | |
| iree-run-module --list_devices | |
| - name: E2E Test 405b without topk | |
| id: llama_405b_fp4_without_topk_test | |
| if: always() | |
| run: | | |
| mkdir -p output_artifacts/output_llama-405b-fp4-without-topk | |
| export PATH=$PWD/iree-build/tools/:$PATH | |
| export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python | |
| python3 -m amdsharktank.tools.e2e_model_test --model llama-405b-fp4-without-topk --gpu-model MI350X 2>&1 | tee output_artifacts/output_llama-405b-fp4-without-topk/e2e_llama-405b-fp4-without-topk.log | |
| - name: E2E Test 405b with topk( without validate vmfb as currently validate vmfb does not support with topk option ) | |
| id: llama_405b_fp4_with_topk_test | |
| if: always() | |
| run: | | |
| mkdir -p output_artifacts/output_llama-405b-fp4-with-topk | |
| export PATH=$PWD/iree-build/tools/:$PATH | |
| export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python | |
| python3 -m amdsharktank.tools.e2e_model_test --model llama-405b-fp4-with-topk --stage benchmark --gpu-model MI350X 2>&1 | tee output_artifacts/output_llama-405b-fp4-with-topk/e2e_llama-405b-fp4-with-topk.log | |
| python3 -m amdsharktank.tools.e2e_model_test --model llama-405b-fp4-with-topk --stage online_serving --gpu-model MI350X 2>&1 | tee -a output_artifacts/output_llama-405b-fp4-with-topk/e2e_llama-405b-fp4-with-topk.log | |
| - name: Run IREE Model Eval (Perplexity) | |
| if: ${{ steps.llama_405b_fp4_without_topk_test.outcome == 'success' }} | |
| continue-on-error: true | |
| run: | | |
| export DATASET="amdsharktank/tests/evaluate/datasets/llama_405b_fp8_e4m3fn_iree.json" | |
| export TOKENIZER="/amdshark-dev/llama3.1/405b/fp4/" | |
| export IRPA="$IRPA" | |
| python3 -m amdsharktank.tools.eval_llm_vmfb \ | |
| --irpa=${IRPA} \ | |
| --tokenizer=${TOKENIZER} \ | |
| --dataset=${DATASET} \ | |
| --expected-err=1e-2 \ | |
| --min-context=10 \ | |
| --iree-hal-target-device=hip \ | |
| --iree-hip-target=gfx950 \ | |
| --vmfb output_artifacts/output_llama-405b-fp4-without-topk/output.vmfb \ | |
| --config output_artifacts/output_llama-405b-fp4-without-topk/config_attn.json 2>&1 | tee output_artifacts/output_llama-405b-fp4-without-topk/eval_llm_vmfb_perplexity.log | |
| - name: Upload log files | |
| if: always() | |
| uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 | |
| with: | |
| name: llama-logs | |
| path: | | |
| output_artifacts/output_*/consolidated_benchmark.json | |
| output_artifacts/output_*/*.log | |
| output_artifacts/version.txt | |
| - name: Cleanup output artifacts | |
| if: always() | |
| run: | | |
| rm -rf output_artifacts | |
| test ! -d output_artifacts && echo "Output artifacts are removed" | |
| # New job to push logs to amd-shark-ai-reports repository | |
| push_logs: | |
| name: "Push log llama 405B FP4" | |
| needs: [ test_llama_large ] | |
| if: always() | |
| runs-on: ubuntu-24.04 | |
| steps: | |
| - name: Download log artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: llama-logs | |
| path: logs | |
| - name: Checkout Target Repo | |
| if: always() | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: nod-ai/amd-shark-ai-reports | |
| token: ${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }} | |
| path: amd-shark-ai-reports | |
| - name: Push artifacts | |
| if: always() | |
| run: | | |
| git config --global user.name "GitHub Actions Bot" | |
| git config --global user.email "" | |
| date=$(date -u +'%Y-%m-%d-%H'h) | |
| mkdir -p "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-with-topk" | |
| mkdir -p "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-without-topk" | |
| cp logs/version.txt "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-with-topk" | |
| cp logs/version.txt "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-without-topk" | |
| cp -r logs/output_llama-405b-fp4-with-topk/* "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-with-topk" || true | |
| cp -r logs/output_llama-405b-fp4-without-topk/* "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-without-topk" || true | |
| cd amd-shark-ai-reports | |
| git pull | |
| git add $date | |
| git commit -m "Add CI Llama 405B FP4 logs on $(date -u +'%Y-%m-%d-%H'h)" | |
| git push origin main | |
| rm -rf ../logs |