From 59f31463c1b07d4236cdbe46ae2591b89fd7424c Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Tue, 4 Mar 2025 10:32:29 +0000 Subject: [PATCH 1/8] tryout shared pool --- .github/workflows/build-and-run-all-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-and-run-all-tests.yml b/.github/workflows/build-and-run-all-tests.yml index f435f5d4..952a454d 100644 --- a/.github/workflows/build-and-run-all-tests.yml +++ b/.github/workflows/build-and-run-all-tests.yml @@ -36,6 +36,7 @@ jobs: test-group: [ {arch: wormhole_b0, card: n150, timeout: 5}, {arch: wormhole_b0, card: n300, timeout: 15}, + {arch: wormhole_b0, card: tt-beta-ubuntu-2204-n300-large-stable, timeout: 15}, {arch: blackhole, card: p150, timeout: 15}, ] ubuntu-version: [ From b49fdac7d0b5b84a07cc4878906e729d6ac07cb4 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Tue, 4 Mar 2025 10:39:06 +0000 Subject: [PATCH 2/8] removed self-hosted --- .github/workflows/run-tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 500f1414..e2382611 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -56,7 +56,6 @@ jobs: name: Run tests for ${{ inputs.arch }} on ${{ inputs.card }} on ${{ inputs.ubuntu-version }} runs-on: - - self-hosted - ${{ inputs.card }} container: image: ghcr.io/${{ github.repository }}/tt-umd-ci-${{ inputs.ubuntu-version }}:latest From 1b511e7ebc1f273e24f58aad194b096120507993 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Tue, 4 Mar 2025 10:46:11 +0000 Subject: [PATCH 3/8] update label --- .github/workflows/build-and-run-all-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-run-all-tests.yml b/.github/workflows/build-and-run-all-tests.yml index 952a454d..9c6d08f2 100644 --- a/.github/workflows/build-and-run-all-tests.yml +++ b/.github/workflows/build-and-run-all-tests.yml @@ -36,7 +36,7 @@ jobs: test-group: [ {arch: wormhole_b0, card: n150, timeout: 5}, {arch: wormhole_b0, card: n300, timeout: 15}, - {arch: wormhole_b0, card: tt-beta-ubuntu-2204-n300-large-stable, timeout: 15}, + {arch: wormhole_b0, card: tt-ubuntu-2204-n300-large-stable, timeout: 15}, {arch: blackhole, card: p150, timeout: 15}, ] ubuntu-version: [ From 71cf48bca4f657120fbdacefa79429bcb2b9c678 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Tue, 4 Mar 2025 15:02:45 +0000 Subject: [PATCH 4/8] try to reset card --- .github/workflows/run-tests.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index e2382611..65f0a6cd 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -90,6 +90,11 @@ jobs: shell: bash run: tar xvf artifact.tar + - name: Reset the card and read the card info + run: | + tt-smi -r 0 + tt-smi -s -f smi.log && cat smi.log + - name: Run arch-specific UMD unit tests run: | ${{ env.TEST_OUTPUT_DIR }}/umd/${{ inputs.arch }}/unit_tests From 1629ce00984d7478fa0176207bd69b08d5efb14a Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Tue, 4 Mar 2025 17:54:02 +0000 Subject: [PATCH 5/8] another try --- .github/workflows/run-tests.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 65f0a6cd..569d8395 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -91,9 +91,10 @@ jobs: run: tar xvf artifact.tar - name: Reset the card and read the card info + if: ${{ inputs.card == 'tt-ubuntu-2204-n300-large-stable' }} run: | - tt-smi -r 0 - tt-smi -s -f smi.log && cat smi.log + /home/software/syseng/wh/tt-smi -r 0 + /home/software/syseng/wh/tt-smi -s -f smi.log && cat smi.log - name: Run arch-specific UMD unit tests run: | From c18a39030cfb135aaaf2f7611073a54410b80329 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Fri, 7 Mar 2025 09:18:33 +0000 Subject: [PATCH 6/8] minor name change --- .github/workflows/build-and-run-all-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-run-all-tests.yml b/.github/workflows/build-and-run-all-tests.yml index 44044e37..3e54d6cc 100644 --- a/.github/workflows/build-and-run-all-tests.yml +++ b/.github/workflows/build-and-run-all-tests.yml @@ -36,7 +36,7 @@ jobs: test-group: [ {arch: wormhole_b0, card: n150, timeout: 5}, {arch: wormhole_b0, card: n300, timeout: 15}, - {arch: wormhole_b0, card: tt-ubuntu-2204-n300-large-stable, timeout: 15}, + {arch: wormhole_b0, card: tt-beta-ubuntu-2204-n300-large-stable, timeout: 25}, {arch: blackhole, card: p150, timeout: 15}, ] ubuntu-version: [ From 77955a1124acfff374b6551e0f7b599cb96dfc90 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Fri, 7 Mar 2025 10:25:47 +0000 Subject: [PATCH 7/8] minor fix --- .github/workflows/run-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 569d8395..6fee42b1 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -91,7 +91,7 @@ jobs: run: tar xvf artifact.tar - name: Reset the card and read the card info - if: ${{ inputs.card == 'tt-ubuntu-2204-n300-large-stable' }} + if: ${{ inputs.card == 'tt-beta-ubuntu-2204-n300-large-stable' }} run: | /home/software/syseng/wh/tt-smi -r 0 /home/software/syseng/wh/tt-smi -s -f smi.log && cat smi.log From 8f61b4fcde81a685dd07b93a28be12c9c1ae203b Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Fri, 7 Mar 2025 10:35:17 +0000 Subject: [PATCH 8/8] another try to reset --- .github/workflows/run-tests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 6fee42b1..7edcfdc2 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -93,8 +93,7 @@ jobs: - name: Reset the card and read the card info if: ${{ inputs.card == 'tt-beta-ubuntu-2204-n300-large-stable' }} run: | - /home/software/syseng/wh/tt-smi -r 0 - /home/software/syseng/wh/tt-smi -s -f smi.log && cat smi.log + /opt/tt_metal_infra/scripts/ci/wormhole_b0/reset.sh - name: Run arch-specific UMD unit tests run: |