.github/workflows/t3000-model-perf-tests-impl.yaml

name: "[internal] T3000 model perf tests impl"

on:
  workflow_call:

jobs:

  t3000-model-perf-tests:
    strategy:
      fail-fast: false
      matrix:
        test-group: [
          { name: "t3k LLM falcon7b model perf tests", model: "falcon7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 75, owner_id: U05RWH3QUPM}, # Salar Hosseini
          { name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum
          { name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, # Colman Glagovich
          { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic
          { name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho
          #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run?
        ]
    name: ${{ matrix.test-group.name }}
    env:
      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
      ARCH_NAME: ${{ matrix.test-group.arch }}
      LOGURU_LEVEL: INFO
      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
    environment: dev
    runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"]
    steps:
      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
      - name: Enable performance mode
        run: |
          sudo cpupower frequency-set -g performance
      - uses: ./.github/actions/ensure-active-weka-mount
      - name: Set up dynamic env vars for build
        run: |
          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
          echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
      - uses: actions/download-artifact@v4
        with:
          name: TTMetal_build_${{ matrix.test-group.arch }}
      - name: Extract files
        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
      - uses: ./.github/actions/install-python-deps
      - name: Run model perf regression tests
        shell: bash {0}
        timeout-minutes: ${{ matrix.test-group.timeout }}
        run: |
          source ${{ github.workspace }}/python_env/bin/activate
          cd $TT_METAL_HOME
          export PYTHONPATH=$TT_METAL_HOME
          source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_model_perf_tests.sh
          ${{ matrix.test-group.cmd }}
          env python models/perf/merge_perf_results.py
      - name: Check perf report exists
        id: check-perf-report
        if: ${{ !cancelled() }}
        run: |
          ls -hal
          export PERF_REPORT_FILENAME="Models_Perf_$(date +%Y_%m_%d).csv"
          ls -hal $PERF_REPORT_FILENAME
          echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
      - name: Upload perf report
        if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
        uses: actions/upload-artifact@v4
        with:
          name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.model }}-bare-metal
          path: "${{ steps.check-perf-report.outputs.perf_report_filename }}"
      - uses: ./.github/actions/slack-report
        if: ${{ failure() }}
        with:
          slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
          owner: ${{ matrix.test-group.owner_id }}
      - name: Disable performance mode
        if: always()
        run: |
          sudo cpupower frequency-set -g ondemand