CI - E2E Llama 3.1 405B FP4 Test #430

Workflow file for this run

.github/workflows/ci_llama_3.1_405b_fp4.yml at 3e8e700

	# Copyright 2025 Advanced Micro Devices, Inc.
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	name: CI - E2E Llama 3.1 405B FP4 Test

	on:
	workflow_dispatch:
	schedule:
	# Run every 6 hour starting from 0h 30min on all days
	- cron: "30 /6 * *"

	permissions:
	contents: write

	concurrency:
	# A PR number if a pull request and otherwise the commit hash. This cancels
	# queued and in-progress runs for the same PR (presubmit) or commit
	# (postsubmit). The workflow name is prepended to avoid conflicts between
	# different workflows.
	group: ${{ github.workflow }}-${{ github.event.number \|\| github.sha }}
	cancel-in-progress: true

	jobs:
	test_llama_large:
	if: ${{ github.repository_owner == 'nod-ai' \|\| github.event_name != 'schedule' }}
	timeout-minutes: 240
	name: "Release: Llama 405B FP4 Benchmarking Tests"
	strategy:
	matrix:
	version: [3.11]
	fail-fast: false
	runs-on: linux-mi355-1gpu-ossci-nod-ai
	container:
	image: 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:405945a40deaff9db90b9839c0f41d4cba4a383c1a7459b28627047bf6302a26'
	options: --ipc host
	--group-add video
	--device /dev/kfd
	--device /dev/dri
	--env-file /etc/podinfo/gha-gpu-isolation-settings
	volumes:
	- /amdshark-dev:/amdshark-dev
	- /amdshark-cache:/amdshark-cache
	defaults:
	run:
	shell: bash
	env:
	VENV_DIR: ${{ github.workspace }}/.venv
	IRPA: "/amdshark-dev/ossci-models/llama_3_1/405b/fp4/fp4_preshuffled_2025_09_12.irpa"
	TOKENIZER: "/amdshark-dev/ossci-models/llama_3_1/405b/fp4/tokenizer.json"
	TOKENIZER_CONFIG: "/amdshark-dev/ossci-models/llama_3_1/405b/fp4/tokenizer_config.json"
	steps:
	- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

	- name: "Setting up Python"
	id: setup_python
	uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
	with:
	python-version: ${{matrix.version}}
	- name: Create Python venv
	run: \|
	python -m venv ${VENV_DIR}
	source ${VENV_DIR}/bin/activate

	- name: Install pip deps
	run: \|
	# Install TheRock
	sudo apt-get update
	sudo apt install -y gfortran build-essential binutils
	python -m pip install \
	--index-url https://d2awnip2yjpvqn.cloudfront.net/v2/gfx950-dcgpu/ \
	rocm[libraries,devel]

	# Set environment variables
	export ROCM_PATH=$(python -m rocm_sdk path --root)
	export LD_LIBRARY_PATH="$ROCM_PATH/lib":$LD_LIBRARY_PATH
	echo "ROCM_PATH=$ROCM_PATH" >> $GITHUB_ENV
	echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV

	# Install amdshark deps
	bash scripts/setenv.sh --source
	hf auth login --token ${{ secrets.HF_FLUX_TOKEN }}

	- name: Check runner health
	run: \|
	echo "ROCM_PATH=$ROCM_PATH"
	echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
	rocm-smi
	rocminfo
	iree-run-module --list_devices

	- name: E2E Test 405b without topk
	id: llama_405b_fp4_without_topk_test
	if: always()
	run: \|
	mkdir -p output_artifacts/output_llama-405b-fp4-without-topk
	export PATH=$PWD/iree-build/tools/:$PATH
	export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python
	python3 -m amdsharktank.tools.e2e_model_test --model llama-405b-fp4-without-topk --gpu-model MI350X 2>&1 \| tee output_artifacts/output_llama-405b-fp4-without-topk/e2e_llama-405b-fp4-without-topk.log

	- name: E2E Test 405b with topk( without validate vmfb as currently validate vmfb does not support with topk option )
	id: llama_405b_fp4_with_topk_test
	if: always()
	run: \|
	mkdir -p output_artifacts/output_llama-405b-fp4-with-topk
	export PATH=$PWD/iree-build/tools/:$PATH
	export PYTHONPATH=$PWD/iree-build/compiler/bindings/python:$PWD/iree-build/runtime/bindings/python
	python3 -m amdsharktank.tools.e2e_model_test --model llama-405b-fp4-with-topk --stage benchmark --gpu-model MI350X 2>&1 \| tee output_artifacts/output_llama-405b-fp4-with-topk/e2e_llama-405b-fp4-with-topk.log
	python3 -m amdsharktank.tools.e2e_model_test --model llama-405b-fp4-with-topk --stage online_serving --gpu-model MI350X 2>&1 \| tee -a output_artifacts/output_llama-405b-fp4-with-topk/e2e_llama-405b-fp4-with-topk.log

	- name: Run IREE Model Eval (Perplexity)
	if: ${{ steps.llama_405b_fp4_without_topk_test.outcome == 'success' }}
	continue-on-error: true
	run: \|
	export DATASET="amdsharktank/tests/evaluate/datasets/llama_405b_fp8_e4m3fn_iree.json"
	export TOKENIZER="/amdshark-dev/llama3.1/405b/fp4/"
	export IRPA="$IRPA"

	python3 -m amdsharktank.tools.eval_llm_vmfb \
	--irpa=${IRPA} \
	--tokenizer=${TOKENIZER} \
	--dataset=${DATASET} \
	--expected-err=1e-2 \
	--min-context=10 \
	--iree-hal-target-device=hip \
	--iree-hip-target=gfx950 \
	--vmfb output_artifacts/output_llama-405b-fp4-without-topk/output.vmfb \
	--config output_artifacts/output_llama-405b-fp4-without-topk/config_attn.json 2>&1 \| tee output_artifacts/output_llama-405b-fp4-without-topk/eval_llm_vmfb_perplexity.log

	- name: Upload log files
	if: always()
	uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
	with:
	name: llama-logs
	path: \|
	output_artifacts/output_*/consolidated_benchmark.json
	output_artifacts/output_/.log
	output_artifacts/version.txt

	- name: Cleanup output artifacts
	if: always()
	run: \|
	rm -rf output_artifacts
	test ! -d output_artifacts && echo "Output artifacts are removed"

	# New job to push logs to amd-shark-ai-reports repository
	push_logs:
	name: "Push log llama 405B FP4"
	needs: [ test_llama_large ]
	if: always()
	runs-on: ubuntu-24.04
	steps:
	- name: Download log artifacts
	uses: actions/download-artifact@v4
	with:
	name: llama-logs
	path: logs

	- name: Checkout Target Repo
	if: always()
	uses: actions/checkout@v4
	with:
	repository: nod-ai/amd-shark-ai-reports
	token: ${{ secrets.SHARK_AI_REPORTS_GITHUB_TOKEN }}
	path: amd-shark-ai-reports

	- name: Push artifacts
	if: always()
	run: \|
	git config --global user.name "GitHub Actions Bot"
	git config --global user.email ""
	date=$(date -u +'%Y-%m-%d-%H'h)
	mkdir -p "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-with-topk"
	mkdir -p "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-without-topk"
	cp logs/version.txt "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-with-topk"
	cp logs/version.txt "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-without-topk"
	cp -r logs/output_llama-405b-fp4-with-topk/* "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-with-topk" \|\| true
	cp -r logs/output_llama-405b-fp4-without-topk/* "amd-shark-ai-reports/$date/llama_3.1-405b-fp4-without-topk" \|\| true
	cd amd-shark-ai-reports
	git pull
	git add $date
	git commit -m "Add CI Llama 405B FP4 logs on $(date -u +'%Y-%m-%d-%H'h)"
	git push origin main
	rm -rf ../logs

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

CI - E2E Llama 3.1 405B FP4 Test #430

Workflow file

CI - E2E Llama 3.1 405B FP4 Test #430

Uh oh!

Jobs

Run details

Workflow file for this run