Release Llama 3.1 8B FP8 Benchmarking Tests from IREE TOM

Release Llama 3.1 8B FP8 Benchmarking Tests from IREE TOM #352

Workflow file for this run

.github/workflows/ci_llama_3.1_8b_fp8_from_tom.yml at 95fa5bd

	# Copyright 2024 Advanced Micro Devices, Inc.
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	name: Release Llama 3.1 8B FP8 Benchmarking Tests from IREE TOM

	on:
	workflow_dispatch:
	schedule:
	# Running every 6 hour on all days
	- cron: "0 /6 * *"

	permissions:
	contents: write

	concurrency:
	# A PR number if a pull request and otherwise the commit hash. This cancels
	# queued and in-progress runs for the same PR (presubmit) or commit
	# (postsubmit). The workflow name is prepended to avoid conflicts between
	# different workflows.
	group: ${{ github.workflow }}-${{ github.event.number \|\| github.sha }}
	cancel-in-progress: true

	jobs:
	test_llama_large:
	if: ${{ github.repository_owner == 'nod-ai' \|\| github.event_name != 'schedule' }}
	timeout-minutes: 240
	name: "Release: Llama 8B FP8 Benchmarking Tests from IREE TOM"
	strategy:
	matrix:
	version: [ 3.11 ]
	fail-fast: false
	runs-on: linux-mi325-1gpu-ossci-nod-ai
	defaults:
	run:
	shell: bash
	env:
	VENV_DIR: ${{ github.workspace }}/.venv
	OFFLINE_SERVING: DISABLED
	steps:
	- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

	- name: Get Current Date And Check if it is serving day
	id: date
	run: \|
	echo "date=$(date -u +'%Y-%m-%d')" >> "$GITHUB_OUTPUT"

	if [ "$(date -u +%u)" = "2" ]; then
	echo "IS_SERVING_DAY=true" >> $GITHUB_ENV
	else
	echo "IS_SERVING_DAY=false" >> $GITHUB_ENV
	fi

	- name: "Setting up Python"
	id: setup_python
	uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
	with:
	python-version: ${{matrix.version}}

	- name: Create Python venv
	run: \|
	python -m venv ${VENV_DIR}
	source ${VENV_DIR}/bin/activate

	- name: Install pip deps and build IREE
	run: \|
	bash scripts/setenv.sh --tom
	mkdir -p output_artifacts
	pip freeze \| grep -E 'iree\|amdshark' > $(pwd)/output_artifacts/version.txt
	cd iree
	echo -n "IREE " >> $(pwd)/../output_artifacts/version.txt
	git log -1 --pretty=%H >> $(pwd)/../output_artifacts/version.txt
	cd -

	- name: Run export and compile
	id: export_compile
	run: \|
	export PATH=$PWD/iree-build/tools/:$PATH
	export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python
	bash scripts/export_and_compile.sh \
	--irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \
	--dtype fp8 --bs-prefill 4 --bs-decode 4 2>&1 \| tee "$(pwd)/output_artifacts/export_and_compilation.log"


	- name: Validate VMFB Responses
	id: validate_vmfb
	if: ${{ steps.export_compile.outcome == 'success' }}
	continue-on-error: true
	run: \|
	echo "Validate Responses"
	export PATH=$PWD/iree-build/tools/:$PATH
	export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python
	bash scripts/validate_numerics.sh \
	--irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \
	--vmfb $(pwd)/output_artifacts/output.vmfb \
	--config $(pwd)/output_artifacts/config_attn.json \
	--tokenizer /amdshark-dev/8b/instruct/tokenizer.json \
	--tokenizer_config /amdshark-dev/8b/instruct/tokenizer_config.json \
	--steps 64 \
	--kv-cache-dtype float8_e4m3fnuz \| tee output_artifacts/run_llm_vmfb.log

	- name: Run IREE Benchmark Module
	if: ${{ steps.export_compile.outcome == 'success' }}
	run: \|
	export PATH=$PWD/iree-build/tools/:$PATH
	export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python
	bash scripts/run_iree_benchmark.sh --bs-prefill 4 --bs-decode 4 \
	--parameters /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \
	--model llama-8B-FP8 2>&1 \| tee "$(pwd)/output_artifacts/iree_benchmark.log"
	python scripts/utils.py \
	--combine-json $(pwd)/output_artifacts/benchmark_module \
	--output-json $(pwd)/output_artifacts/consolidated_benchmark.json \
	--append-isl

	- name: Run online serving
	if: ${{ steps.export_compile.outcome == 'success' }}
	run: \|
	export PATH=$PWD/iree-build/tools/:$PATH
	export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python
	cd shortfin
	export ROCR_VISIBLE_DEVICES=0
	bash ../scripts/run_serving.sh \
	--irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \
	--tokenizer_json /amdshark-dev/8b/instruct/tokenizer.json \
	--vmfb ../output_artifacts/output.vmfb \
	--model_config ../output_artifacts/config_attn.json \
	--port 8900 \| tee ../output_artifacts/serving.log
	cd ..

	- name: Run Offline serving :chat
	if: ${{ env.OFFLINE_SERVING == 'ENABLED' }}
	run: \|
	if [ "$(date +%u)" -eq 1 ]; then
	cd shortfin
	export ROCR_VISIBLE_DEVICES=0
	bash ../scripts/run_offline_serving.sh \
	--irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \
	--tokenizer_json /amdshark-dev/8b/instruct/tokenizer.json \
	--vmfb ../output_artifacts/output.vmfb \
	--model_config ../output_artifacts/config_attn.json \
	--mode chat
	cd ..
	python scripts/utils.py \
	--combine-json $(pwd)/output_artifacts/chat \
	--output-json $(pwd)/output_artifacts/consolidated_chat_serving.json
	fi

	- name: Run Offline serving :reasoning
	if: ${{ env.OFFLINE_SERVING == 'ENABLED' }}
	run: \|
	if [ "$(date +%u)" -eq 3 ]; then
	cd shortfin
	export ROCR_VISIBLE_DEVICES=0
	bash ../scripts/run_offline_serving.sh \
	--irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \
	--tokenizer_json /amdshark-dev/8b/instruct/tokenizer.json \
	--vmfb ../output_artifacts/output.vmfb \
	--model_config ../output_artifacts/config_attn.json \
	--mode reasoning
	cd ..
	python scripts/utils.py \
	--combine-json $(pwd)/output_artifacts/reasoning \
	--output-json $(pwd)/output_artifacts/consolidated_reasoning_serving.json
	fi

	- name: Run Offline serving :summary
	if: ${{ env.OFFLINE_SERVING == 'ENABLED' }}
	run: \|
	if [ "$(date +%u)" -eq 5 ]; then
	cd shortfin
	export ROCR_VISIBLE_DEVICES=0
	bash ../scripts/run_offline_serving.sh \
	--irpa /amdshark-dev/8b/fp8/attnf8/native_fp8_e4m3fnuz_llama3_8b.irpa \
	--tokenizer_json /amdshark-dev/8b/instruct/tokenizer.json \
	--vmfb ../output_artifacts/output.vmfb \
	--model_config ../output_artifacts/config_attn.json \
	--mode summary
	cd ..
	python scripts/utils.py \
	--combine-json $(pwd)/output_artifacts/summary \
	--output-json $(pwd)/output_artifacts/consolidated_summary_serving.json
	fi

	- name: Upload log files
	if: always()
	uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
	with:
	name: llama-logs
	path: \|
	output_artifacts/consolidated*.json
	output_artifacts/*.log
	output_artifacts/version.txt

	- name: Cleanup output artifacts
	if: always()
	run: \|
	rm -rf output_artifacts
	test ! -d output_artifacts && echo "Output artifacts are removed"

	# New job to push logs to amd-shark-ai-reports repository
	push_logs:
	name: "Push log llama 8B FP8"
	needs: [ test_llama_large ]
	if: always()
	runs-on: ubuntu-24.04
	steps:
	- name: Download log artifacts
	uses: actions/download-artifact@v4
	with:
	name: llama-logs
	path: logs

	- name: Checkout Target Repo
	if: always()
	uses: actions/checkout@v4
	with:
	repository: nod-ai/amd-shark-ai-reports
	token: ${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }}
	path: amd-shark-ai-reports

	- name: Push artifacts
	if: always()
	run: \|
	git config --global user.name "GitHub Actions Bot"
	git config --global user.email ""
	date=$(date -u +'%Y-%m-%d-%H'h)
	mkdir -p "amd-shark-ai-reports/$date/llama_3.1-8b-fp8"
	cp -v logs/*.json "amd-shark-ai-reports/$date/llama_3.1-8b-fp8" \|\| true
	cp -v logs/*.log "amd-shark-ai-reports/$date/llama_3.1-8b-fp8" \|\| true
	cp -v logs/version.txt "amd-shark-ai-reports/$date/llama_3.1-8b-fp8"
	cd amd-shark-ai-reports
	git pull
	git add $date
	git commit -m "Add CI Llama 8B FP8 logs on $(date -u +'%Y-%m-%d-%H'h)"
	git push origin main
	rm -rf ../logs

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Release Llama 3.1 8B FP8 Benchmarking Tests from IREE TOM #352

Workflow file

Release Llama 3.1 8B FP8 Benchmarking Tests from IREE TOM #352

Uh oh!

Jobs

Run details

Workflow file for this run