Release Llama 3.1 8B FP8 Benchmarking Tests from IREE TOM #352
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2024 Advanced Micro Devices, Inc. | |
| # | |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. | |
| # See https://llvm.org/LICENSE.txt for license information. | |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
| name: Release Llama 3.1 8B FP8 Benchmarking Tests from IREE TOM | |
| on: | |
| workflow_dispatch: | |
| schedule: | |
| # Running every 6 hour on all days | |
| - cron: "0 */6 * * *" | |
| permissions: | |
| contents: write | |
| concurrency: | |
| # A PR number if a pull request and otherwise the commit hash. This cancels | |
| # queued and in-progress runs for the same PR (presubmit) or commit | |
| # (postsubmit). The workflow name is prepended to avoid conflicts between | |
| # different workflows. | |
| group: ${{ github.workflow }}-${{ github.event.number || github.sha }} | |
| cancel-in-progress: true | |
| jobs: | |
| test_llama_large: | |
| if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} | |
| timeout-minutes: 240 | |
| name: "Release: Llama 8B FP8 Benchmarking Tests from IREE TOM" | |
| strategy: | |
| matrix: | |
| version: [ 3.11 ] | |
| fail-fast: false | |
| runs-on: linux-mi325-1gpu-ossci-nod-ai | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| VENV_DIR: ${{ github.workspace }}/.venv | |
| OFFLINE_SERVING: DISABLED | |
| steps: | |
| - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
| - name: Get Current Date And Check if it is serving day | |
| id: date | |
| run: | | |
| echo "date=$(date -u +'%Y-%m-%d')" >> "$GITHUB_OUTPUT" | |
| if [ "$(date -u +%u)" = "2" ]; then | |
| echo "IS_SERVING_DAY=true" >> $GITHUB_ENV | |
| else | |
| echo "IS_SERVING_DAY=false" >> $GITHUB_ENV | |
| fi | |
| - name: "Setting up Python" | |
| id: setup_python | |
| uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 | |
| with: | |
| python-version: ${{matrix.version}} | |
| - name: Create Python venv | |
| run: | | |
| python -m venv ${VENV_DIR} | |
| source ${VENV_DIR}/bin/activate | |
| - name: Install pip deps and build IREE | |
| run: | | |
| bash scripts/setenv.sh --tom | |
| mkdir -p output_artifacts | |
| pip freeze | grep -E 'iree|amdshark' > $(pwd)/output_artifacts/version.txt | |
| cd iree | |
| echo -n "IREE " >> $(pwd)/../output_artifacts/version.txt | |
| git log -1 --pretty=%H >> $(pwd)/../output_artifacts/version.txt | |
| cd - | |
| - name: Run export and compile | |
| id: export_compile | |
| run: | | |
| export PATH=$PWD/iree-build/tools/:$PATH | |
| export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python | |
| bash scripts/export_and_compile.sh \ | |
| --irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \ | |
| --dtype fp8 --bs-prefill 4 --bs-decode 4 2>&1 | tee "$(pwd)/output_artifacts/export_and_compilation.log" | |
| - name: Validate VMFB Responses | |
| id: validate_vmfb | |
| if: ${{ steps.export_compile.outcome == 'success' }} | |
| continue-on-error: true | |
| run: | | |
| echo "Validate Responses" | |
| export PATH=$PWD/iree-build/tools/:$PATH | |
| export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python | |
| bash scripts/validate_numerics.sh \ | |
| --irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \ | |
| --vmfb $(pwd)/output_artifacts/output.vmfb \ | |
| --config $(pwd)/output_artifacts/config_attn.json \ | |
| --tokenizer /amdshark-dev/8b/instruct/tokenizer.json \ | |
| --tokenizer_config /amdshark-dev/8b/instruct/tokenizer_config.json \ | |
| --steps 64 \ | |
| --kv-cache-dtype float8_e4m3fnuz | tee output_artifacts/run_llm_vmfb.log | |
| - name: Run IREE Benchmark Module | |
| if: ${{ steps.export_compile.outcome == 'success' }} | |
| run: | | |
| export PATH=$PWD/iree-build/tools/:$PATH | |
| export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python | |
| bash scripts/run_iree_benchmark.sh --bs-prefill 4 --bs-decode 4 \ | |
| --parameters /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \ | |
| --model llama-8B-FP8 2>&1 | tee "$(pwd)/output_artifacts/iree_benchmark.log" | |
| python scripts/utils.py \ | |
| --combine-json $(pwd)/output_artifacts/benchmark_module \ | |
| --output-json $(pwd)/output_artifacts/consolidated_benchmark.json \ | |
| --append-isl | |
| - name: Run online serving | |
| if: ${{ steps.export_compile.outcome == 'success' }} | |
| run: | | |
| export PATH=$PWD/iree-build/tools/:$PATH | |
| export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python | |
| cd shortfin | |
| export ROCR_VISIBLE_DEVICES=0 | |
| bash ../scripts/run_serving.sh \ | |
| --irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \ | |
| --tokenizer_json /amdshark-dev/8b/instruct/tokenizer.json \ | |
| --vmfb ../output_artifacts/output.vmfb \ | |
| --model_config ../output_artifacts/config_attn.json \ | |
| --port 8900 | tee ../output_artifacts/serving.log | |
| cd .. | |
| - name: Run Offline serving :chat | |
| if: ${{ env.OFFLINE_SERVING == 'ENABLED' }} | |
| run: | | |
| if [ "$(date +%u)" -eq 1 ]; then | |
| cd shortfin | |
| export ROCR_VISIBLE_DEVICES=0 | |
| bash ../scripts/run_offline_serving.sh \ | |
| --irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \ | |
| --tokenizer_json /amdshark-dev/8b/instruct/tokenizer.json \ | |
| --vmfb ../output_artifacts/output.vmfb \ | |
| --model_config ../output_artifacts/config_attn.json \ | |
| --mode chat | |
| cd .. | |
| python scripts/utils.py \ | |
| --combine-json $(pwd)/output_artifacts/chat \ | |
| --output-json $(pwd)/output_artifacts/consolidated_chat_serving.json | |
| fi | |
| - name: Run Offline serving :reasoning | |
| if: ${{ env.OFFLINE_SERVING == 'ENABLED' }} | |
| run: | | |
| if [ "$(date +%u)" -eq 3 ]; then | |
| cd shortfin | |
| export ROCR_VISIBLE_DEVICES=0 | |
| bash ../scripts/run_offline_serving.sh \ | |
| --irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \ | |
| --tokenizer_json /amdshark-dev/8b/instruct/tokenizer.json \ | |
| --vmfb ../output_artifacts/output.vmfb \ | |
| --model_config ../output_artifacts/config_attn.json \ | |
| --mode reasoning | |
| cd .. | |
| python scripts/utils.py \ | |
| --combine-json $(pwd)/output_artifacts/reasoning \ | |
| --output-json $(pwd)/output_artifacts/consolidated_reasoning_serving.json | |
| fi | |
| - name: Run Offline serving :summary | |
| if: ${{ env.OFFLINE_SERVING == 'ENABLED' }} | |
| run: | | |
| if [ "$(date +%u)" -eq 5 ]; then | |
| cd shortfin | |
| export ROCR_VISIBLE_DEVICES=0 | |
| bash ../scripts/run_offline_serving.sh \ | |
| --irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \ | |
| --tokenizer_json /amdshark-dev/8b/instruct/tokenizer.json \ | |
| --vmfb ../output_artifacts/output.vmfb \ | |
| --model_config ../output_artifacts/config_attn.json \ | |
| --mode summary | |
| cd .. | |
| python scripts/utils.py \ | |
| --combine-json $(pwd)/output_artifacts/summary \ | |
| --output-json $(pwd)/output_artifacts/consolidated_summary_serving.json | |
| fi | |
| - name: Upload log files | |
| if: always() | |
| uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 | |
| with: | |
| name: llama-logs | |
| path: | | |
| output_artifacts/consolidated*.json | |
| output_artifacts/*.log | |
| output_artifacts/version.txt | |
| - name: Cleanup output artifacts | |
| if: always() | |
| run: | | |
| rm -rf output_artifacts | |
| test ! -d output_artifacts && echo "Output artifacts are removed" | |
| # New job to push logs to amd-shark-ai-reports repository | |
| push_logs: | |
| name: "Push log llama 8B FP8" | |
| needs: [ test_llama_large ] | |
| if: always() | |
| runs-on: ubuntu-24.04 | |
| steps: | |
| - name: Download log artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: llama-logs | |
| path: logs | |
| - name: Checkout Target Repo | |
| if: always() | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: nod-ai/amd-shark-ai-reports | |
| token: ${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }} | |
| path: amd-shark-ai-reports | |
| - name: Push artifacts | |
| if: always() | |
| run: | | |
| git config --global user.name "GitHub Actions Bot" | |
| git config --global user.email "" | |
| date=$(date -u +'%Y-%m-%d-%H'h) | |
| mkdir -p "amd-shark-ai-reports/$date/llama_3.1-8b-fp8" | |
| cp -v logs/*.json "amd-shark-ai-reports/$date/llama_3.1-8b-fp8" || true | |
| cp -v logs/*.log "amd-shark-ai-reports/$date/llama_3.1-8b-fp8" || true | |
| cp -v logs/version.txt "amd-shark-ai-reports/$date/llama_3.1-8b-fp8" | |
| cd amd-shark-ai-reports | |
| git pull | |
| git add $date | |
| git commit -m "Add CI Llama 8B FP8 logs on $(date -u +'%Y-%m-%d-%H'h)" | |
| git push origin main | |
| rm -rf ../logs |