From 7853f479a05ad823d6eabb1ab33889dedc5e2ed1 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 08:54:08 -0700 Subject: [PATCH 01/37] reorganizing inductor test for triton --- .github/workflows/inductor.yml | 100 ++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 44 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 174cd02..ead24f2 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -2,6 +2,7 @@ name: Test build/test linux gpu on: pull_request: + branches: [ main, "*"] #will remove this once we see everything is working fine workflow_dispatch: inputs: triton_pin: @@ -15,6 +16,7 @@ on: jobs: build-test: + continue-on-error: true uses: pytorch/test-infra/.github/workflows/linux_job.yml@main with: runner: linux.g5.48xlarge.nvidia.gpu @@ -22,47 +24,57 @@ jobs: gpu-arch-version: "12.1" timeout: 360 # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 - script: | - set -x - pushd .. - echo "Installing triton" - git clone https://github.com/triton-lang/triton.git - pushd triton - echo "Checking out triton branch or commit" - git checkout ${{ github.event.inputs.triton_pin || 'main' }} - sudo yum install -y zlib-devel - echo "Installing build-time dependencies" - pip install ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 - export llvm_hash=$(cat cmake/llvm-hash.txt) - echo "llvm_hash: $llvm_hash" - pushd .. - echo "Cloning llvm-project" - git clone https://github.com/llvm/llvm-project.git - pushd llvm-project - echo "Checking out llvm hash" - git checkout "$llvm_hash" - mkdir build - pushd build - echo "Building llvm" - cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" - ninja - export LLVM_BUILD_DIR=$(pwd) - popd - popd - popd - LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install -e python - echo "Installing triton python package" - popd - echo "Cloning pytorch" - git clone https://github.com/pytorch/pytorch.git - pushd pytorch - echo "Checking out pytorch branch or commit" - git checkout ${{ github.event.inputs.pytorch_pin || 'main' }} - git submodule sync - git submodule update --init --recursive - pip install -r requirements.txt - pip install mkl-static mkl-include pytest pytest-xdist - echo "Installing magma-cuda121" - conda install -y -c pytorch magma-cuda121 - python setup.py install - pytest -n 1 test/inductor/test_torchinductor.py \ No newline at end of file + steps: + - name: "Checkout" + run: | + set -x + pushd .. + echo "Installing triton" + git clone https://github.com/triton-lang/triton.git + pushd triton + echo "Checking out triton branch or commit" + git checkout ${{ github.event.inputs.triton_pin || 'main' }} + export llvm_hash=$(cat cmake/llvm-hash.txt) + echo "llvm_hash: $llvm_hash" + pushd .. + echo "Cloning llvm-project" + git clone https://github.com/llvm/llvm-project.git + pushd llvm-project + echo "Checking out llvm hash" + git checkout "$llvm_hash" + mkdir build + pushd build + echo "Building llvm" + - name: "installs" + run: | + sudo yum install -y zlib-devel + echo "Installing build-time dependencies" + pip install ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 + cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" + ninja + export LLVM_BUILD_DIR=$(pwd) + popd + popd + popd + LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install -e python + echo "Installing triton python package" + popd + - name: "pytorch download" + run: | + echo "Cloning pytorch" + git clone https://github.com/pytorch/pytorch.git + pushd pytorch + echo "Checking out pytorch branch or commit" + git checkout ${{ github.event.inputs.pytorch_pin || 'main' }} + git submodule sync + git submodule update --init --recursive + - name: "post pytorch installs" + run: | + pip install -r requirements.txt + pip install mkl-static mkl-include pytest pytest-xdist + echo "Installing magma-cuda121" + conda install -y -c pytorch magma-cuda121 + python setup.py install + - name: "tests" + run: | + pytest -n 1 test/inductor/test_torchinductor.py From f935a318583e475b5543a804080432d7456241ff Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:01:16 -0700 Subject: [PATCH 02/37] fixing missed indentation --- .github/workflows/inductor.yml | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index ead24f2..00a7ab6 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -27,24 +27,24 @@ jobs: steps: - name: "Checkout" run: | - set -x - pushd .. - echo "Installing triton" - git clone https://github.com/triton-lang/triton.git - pushd triton - echo "Checking out triton branch or commit" - git checkout ${{ github.event.inputs.triton_pin || 'main' }} - export llvm_hash=$(cat cmake/llvm-hash.txt) - echo "llvm_hash: $llvm_hash" - pushd .. - echo "Cloning llvm-project" - git clone https://github.com/llvm/llvm-project.git - pushd llvm-project - echo "Checking out llvm hash" - git checkout "$llvm_hash" - mkdir build - pushd build - echo "Building llvm" + set -x + pushd .. + echo "Installing triton" + git clone https://github.com/triton-lang/triton.git + pushd triton + echo "Checking out triton branch or commit" + git checkout ${{ github.event.inputs.triton_pin || 'main' }} + export llvm_hash=$(cat cmake/llvm-hash.txt) + echo "llvm_hash: $llvm_hash" + pushd .. + echo "Cloning llvm-project" + git clone https://github.com/llvm/llvm-project.git + pushd llvm-project + echo "Checking out llvm hash" + git checkout "$llvm_hash" + mkdir build + pushd build + echo "Building llvm" - name: "installs" run: | sudo yum install -y zlib-devel From c328c14458c8e28c26de6fdfdc823f9f5e4f645c Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:04:58 -0700 Subject: [PATCH 03/37] uses wasnt working correctly --- .github/workflows/inductor.yml | 114 ++++++++++++++++----------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 00a7ab6..140db25 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -17,64 +17,64 @@ on: jobs: build-test: continue-on-error: true - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + rruns-on: linux.g5.48xlarge.nvidia.gpu + timeout-minutes: 30 with: - runner: linux.g5.48xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: "12.1" - timeout: 360 # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 - steps: - - name: "Checkout" - run: | - set -x - pushd .. - echo "Installing triton" - git clone https://github.com/triton-lang/triton.git - pushd triton - echo "Checking out triton branch or commit" - git checkout ${{ github.event.inputs.triton_pin || 'main' }} - export llvm_hash=$(cat cmake/llvm-hash.txt) - echo "llvm_hash: $llvm_hash" - pushd .. - echo "Cloning llvm-project" - git clone https://github.com/llvm/llvm-project.git - pushd llvm-project - echo "Checking out llvm hash" - git checkout "$llvm_hash" - mkdir build - pushd build - echo "Building llvm" - - name: "installs" - run: | - sudo yum install -y zlib-devel - echo "Installing build-time dependencies" - pip install ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 - cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" - ninja - export LLVM_BUILD_DIR=$(pwd) - popd - popd - popd - LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install -e python - echo "Installing triton python package" - popd - - name: "pytorch download" - run: | - echo "Cloning pytorch" - git clone https://github.com/pytorch/pytorch.git - pushd pytorch - echo "Checking out pytorch branch or commit" - git checkout ${{ github.event.inputs.pytorch_pin || 'main' }} - git submodule sync - git submodule update --init --recursive - - name: "post pytorch installs" - run: | - pip install -r requirements.txt - pip install mkl-static mkl-include pytest pytest-xdist - echo "Installing magma-cuda121" - conda install -y -c pytorch magma-cuda121 - python setup.py install - - name: "tests" - run: | - pytest -n 1 test/inductor/test_torchinductor.py + steps: + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + - name: "Checkout" + run: | + set -x + pushd .. + echo "Installing triton" + git clone https://github.com/triton-lang/triton.git + pushd triton + echo "Checking out triton branch or commit" + git checkout ${{ github.event.inputs.triton_pin || 'main' }} + export llvm_hash=$(cat cmake/llvm-hash.txt) + echo "llvm_hash: $llvm_hash" + pushd .. + echo "Cloning llvm-project" + git clone https://github.com/llvm/llvm-project.git + pushd llvm-project + echo "Checking out llvm hash" + git checkout "$llvm_hash" + mkdir build + pushd build + echo "Building llvm" + - name: "installs" + run: | + sudo yum install -y zlib-devel + echo "Installing build-time dependencies" + pip install ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 + cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" + ninja + export LLVM_BUILD_DIR=$(pwd) + popd + popd + popd + LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install -e python + echo "Installing triton python package" + popd + - name: "pytorch download" + run: | + echo "Cloning pytorch" + git clone https://github.com/pytorch/pytorch.git + pushd pytorch + echo "Checking out pytorch branch or commit" + git checkout ${{ github.event.inputs.pytorch_pin || 'main' }} + git submodule sync + git submodule update --init --recursive + - name: "post pytorch installs" + run: | + pip install -r requirements.txt + pip install mkl-static mkl-include pytest pytest-xdist + echo "Installing magma-cuda121" + conda install -y -c pytorch magma-cuda121 + python setup.py install + - name: "tests" + run: | + pytest -n 1 test/inductor/test_torchinductor.py From 06a6e240113e5f9777e3dd25771398147dad42fe Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:11:06 -0700 Subject: [PATCH 04/37] removing inux_job.yml --- .github/workflows/inductor.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 140db25..171e5c6 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -19,12 +19,7 @@ jobs: continue-on-error: true rruns-on: linux.g5.48xlarge.nvidia.gpu timeout-minutes: 30 - with: - gpu-arch-type: cuda - gpu-arch-version: "12.1" - # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 steps: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - name: "Checkout" run: | set -x @@ -76,5 +71,9 @@ jobs: conda install -y -c pytorch magma-cuda121 python setup.py install - name: "tests" + with: + gpu-arch-type: cuda + gpu-arch-version: "12.1" + # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 run: | pytest -n 1 test/inductor/test_torchinductor.py From 3ff6e6c6cf280fcd19f602d29f30acd3c4be3378 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:11:39 -0700 Subject: [PATCH 05/37] typo --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 171e5c6..faacac6 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -17,7 +17,7 @@ on: jobs: build-test: continue-on-error: true - rruns-on: linux.g5.48xlarge.nvidia.gpu + runs-on: linux.g5.48xlarge.nvidia.gpu timeout-minutes: 30 steps: - name: "Checkout" From 8e0ebfedc833e9f71036782c1d994e1ad92ce56a Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:15:38 -0700 Subject: [PATCH 06/37] using linux job again --- .github/workflows/inductor.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index faacac6..ac0eb9d 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -71,9 +71,10 @@ jobs: conda install -y -c pytorch magma-cuda121 python setup.py install - name: "tests" + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main with: gpu-arch-type: cuda gpu-arch-version: "12.1" # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 - run: | - pytest -n 1 test/inductor/test_torchinductor.py + script: | + pytest -n 1 test/inductor/test_torchinductor.py From 59a689b2262508eb9bdb47da9a92e36812e7cc2f Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:16:53 -0700 Subject: [PATCH 07/37] fixing silly naming convention --- .github/workflows/inductor.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index ac0eb9d..9c4e36d 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -70,8 +70,7 @@ jobs: echo "Installing magma-cuda121" conda install -y -c pytorch magma-cuda121 python setup.py install - - name: "tests" - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main with: gpu-arch-type: cuda gpu-arch-version: "12.1" From 9ffe51ec94e21ae286ec6574cd9ca7d5f3e15848 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:19:55 -0700 Subject: [PATCH 08/37] removing reusable workflow --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 9c4e36d..90d0196 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -70,7 +70,7 @@ jobs: echo "Installing magma-cuda121" conda install -y -c pytorch magma-cuda121 python setup.py install - - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + - name: test with: gpu-arch-type: cuda gpu-arch-version: "12.1" From d04a8c55f9fa07f9f4a4294796dacc397473b098 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:20:41 -0700 Subject: [PATCH 09/37] didnt move run back out --- .github/workflows/inductor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 90d0196..d3d064e 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -75,5 +75,5 @@ jobs: gpu-arch-type: cuda gpu-arch-version: "12.1" # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 - script: | - pytest -n 1 test/inductor/test_torchinductor.py + run: | + pytest -n 1 test/inductor/test_torchinductor.py From bbf45bb4a575194667c50b46232383f1cf55779d Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:21:46 -0700 Subject: [PATCH 10/37] env instead of with --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index d3d064e..b023340 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -71,7 +71,7 @@ jobs: conda install -y -c pytorch magma-cuda121 python setup.py install - name: test - with: + env: gpu-arch-type: cuda gpu-arch-version: "12.1" # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 From 03cb180f3e6c6b84c7918daa928f303dbab4aa09 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:28:05 -0700 Subject: [PATCH 11/37] moving over to a conda install instead of pip --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index b023340..c636320 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -44,7 +44,7 @@ jobs: run: | sudo yum install -y zlib-devel echo "Installing build-time dependencies" - pip install ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 + conda install -y ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" ninja export LLVM_BUILD_DIR=$(pwd) From 249927208f4e297cc39971c229f45832440b4c98 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:31:27 -0700 Subject: [PATCH 12/37] removing triton after finished' --- .github/workflows/inductor.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index c636320..6204a4b 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -25,6 +25,9 @@ jobs: set -x pushd .. echo "Installing triton" + if [ -d triton ]; then + rm -r triton + fi git clone https://github.com/triton-lang/triton.git pushd triton echo "Checking out triton branch or commit" From d6f6ab144148cc29ff4797cc4cc5a9c5af3c027e Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:35:36 -0700 Subject: [PATCH 13/37] adding cleanup step --- .github/workflows/inductor.yml | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 6204a4b..4e4569e 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -25,9 +25,6 @@ jobs: set -x pushd .. echo "Installing triton" - if [ -d triton ]; then - rm -r triton - fi git clone https://github.com/triton-lang/triton.git pushd triton echo "Checking out triton branch or commit" @@ -80,3 +77,19 @@ jobs: # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 run: | pytest -n 1 test/inductor/test_torchinductor.py + clean-test: + needs: build-test + runs-on: linux.g5.48xlarge.nvidia.gpu + timeout-minutes: 30 + steps: + - name: "cleanup" + - run: | + if [ -d triton ]; then + rm -r triton + fi + if [ -d pytorch ]; then + rm -r pytorch + fi + if [ -d llvm-project ]; then + rm -r llvm-project + fi From 053ef2a3ec59415c6748aeeb29612c421bf9a0b0 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:37:01 -0700 Subject: [PATCH 14/37] an extra - added --- .github/workflows/inductor.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 4e4569e..4db17f5 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -77,13 +77,14 @@ jobs: # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 run: | pytest -n 1 test/inductor/test_torchinductor.py + clean-test: needs: build-test runs-on: linux.g5.48xlarge.nvidia.gpu timeout-minutes: 30 steps: - name: "cleanup" - - run: | + run: | if [ -d triton ]; then rm -r triton fi From 7a07954880a61f1bf984c15a974864c17baf1ed7 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:38:51 -0700 Subject: [PATCH 15/37] adding echos mostly to re-run --- .github/workflows/inductor.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 4db17f5..95bf1e6 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -86,11 +86,14 @@ jobs: - name: "cleanup" run: | if [ -d triton ]; then + echo "triton removed" rm -r triton fi if [ -d pytorch ]; then + echo "pytorch removed" rm -r pytorch fi if [ -d llvm-project ]; then + echo "llvm-project removed" rm -r llvm-project fi From 75a26d9466547a435864796729151c5554b0faeb Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:41:11 -0700 Subject: [PATCH 16/37] why didnt this work --- .github/workflows/inductor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 95bf1e6..e0ecefd 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -97,3 +97,4 @@ jobs: echo "llvm-project removed" rm -r llvm-project fi + ls -l From b1067dc832b8e1f016ceb47f21681a15b600a6e6 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:43:19 -0700 Subject: [PATCH 17/37] pwd instead of ls --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index e0ecefd..35e8a0b 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -97,4 +97,4 @@ jobs: echo "llvm-project removed" rm -r llvm-project fi - ls -l + pwd From dc2b4352dc7b325fee1c4b3a0ec20b65f978d70f Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 09:57:34 -0700 Subject: [PATCH 18/37] fixing cleanup step --- .github/workflows/inductor.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 35e8a0b..73f9b27 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -88,13 +88,20 @@ jobs: if [ -d triton ]; then echo "triton removed" rm -r triton + else + echo "triton not removed" fi if [ -d pytorch ]; then echo "pytorch removed" rm -r pytorch + else + echo "pytorch not removed" fi if [ -d llvm-project ]; then echo "llvm-project removed" rm -r llvm-project + else + echo "llvm not removed" fi pwd + ls From 11e8804dab9fc11803a14e300dd342c5d788a610 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:18:36 -0700 Subject: [PATCH 19/37] adding cleaning function --- .github/workflows/inductor.yml | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 73f9b27..90184f7 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -77,31 +77,5 @@ jobs: # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 run: | pytest -n 1 test/inductor/test_torchinductor.py - - clean-test: - needs: build-test - runs-on: linux.g5.48xlarge.nvidia.gpu - timeout-minutes: 30 - steps: - - name: "cleanup" - run: | - if [ -d triton ]; then - echo "triton removed" - rm -r triton - else - echo "triton not removed" - fi - if [ -d pytorch ]; then - echo "pytorch removed" - rm -r pytorch - else - echo "pytorch not removed" - fi - if [ -d llvm-project ]; then - echo "llvm-project removed" - rm -r llvm-project - else - echo "llvm not removed" - fi - pwd - ls + - uses: colpal/actions-clean@v1 + if: always() From e92bdbdc8ab1ddf18204117a145b116479c90497 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:23:11 -0700 Subject: [PATCH 20/37] trying a different clean action --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 90184f7..7da9ff6 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -77,5 +77,5 @@ jobs: # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 run: | pytest -n 1 test/inductor/test_torchinductor.py - - uses: colpal/actions-clean@v1 + - uses: TooMuch4U/runner-post-cleanup@v2.1 if: always() From 5e58fb77b047ef447e1ca58db9449dd7e4cc71c5 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:25:38 -0700 Subject: [PATCH 21/37] trying step instead of new workflow --- .github/workflows/inductor.yml | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 7da9ff6..8f924c0 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -77,5 +77,29 @@ jobs: # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 run: | pytest -n 1 test/inductor/test_torchinductor.py - - uses: TooMuch4U/runner-post-cleanup@v2.1 + - name: "cleanup" if: always() + run: | + pwd + ls + + if [ -d triton ]; then + echo "triton removed" + rm -r triton + else + echo "triton not removed" + fi + if [ -d pytorch ]; then + echo "pytorch removed" + rm -r pytorch + else + echo "pytorch not removed" + fi + if [ -d llvm-project ]; then + echo "llvm-project removed" + rm -r llvm-project + else + echo "llvm not removed" + fi + pwd + ls From eaef6d31c1dd1180390713568003b0c3cca4eafc Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:26:16 -0700 Subject: [PATCH 22/37] really should format these things before pushing --- .github/workflows/inductor.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 8f924c0..62a1966 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -80,9 +80,8 @@ jobs: - name: "cleanup" if: always() run: | - pwd - ls - + pwd + ls if [ -d triton ]; then echo "triton removed" rm -r triton From 4a4d02141bedc4dc532cf498e84856b5f4f61f99 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:28:08 -0700 Subject: [PATCH 23/37] trying a different directory --- .github/workflows/inductor.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 62a1966..9e2e536 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -80,6 +80,8 @@ jobs: - name: "cleanup" if: always() run: | + pwd + pushd .. pwd ls if [ -d triton ]; then From 128b42de2edbf92de989c4a7e499268c30f38ed1 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:35:19 -0700 Subject: [PATCH 24/37] if I start with the pytorch linux job does this work --- .github/workflows/inductor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 9e2e536..764f28d 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -17,6 +17,7 @@ on: jobs: build-test: continue-on-error: true + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main runs-on: linux.g5.48xlarge.nvidia.gpu timeout-minutes: 30 steps: From 0818a09b8b172d1454cf7e084e696b2e933bffb9 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:36:32 -0700 Subject: [PATCH 25/37] is this the right way? --- .github/workflows/inductor.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 764f28d..33ba292 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -17,10 +17,11 @@ on: jobs: build-test: continue-on-error: true - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + runs-on: linux.g5.48xlarge.nvidia.gpu timeout-minutes: 30 steps: + - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - name: "Checkout" run: | set -x From f20f6f1eb820921695f650692fe0df76ca724c00 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:36:35 -0700 Subject: [PATCH 26/37] is this the right way? --- .github/workflows/inductor.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 33ba292..2166c76 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -22,6 +22,9 @@ jobs: timeout-minutes: 30 steps: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + gpu-arch-type: cuda + gpu-arch-version: "12.1" - name: "Checkout" run: | set -x From cdfc624d7144d8320252901b3c5d47b8e9ff09bc Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:40:01 -0700 Subject: [PATCH 27/37] moving usees --- .github/workflows/inductor.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 2166c76..8bfa933 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -15,16 +15,18 @@ on: required: false jobs: + small-setup: + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.g5.48xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "12.1" build-test: continue-on-error: true - + needs: small-setup runs-on: linux.g5.48xlarge.nvidia.gpu timeout-minutes: 30 steps: - - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: - gpu-arch-type: cuda - gpu-arch-version: "12.1" - name: "Checkout" run: | set -x From 62b1ee34d328872117d687cd95af51a3355aa11a Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:53:47 -0700 Subject: [PATCH 28/37] testing --- .github/workflows/inductor.yml | 46 +++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 8bfa933..2eeb763 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -15,22 +15,52 @@ on: required: false jobs: - small-setup: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: - runner: linux.g5.48xlarge.nvidia.gpu - gpu-arch-type: cuda - gpu-arch-version: "12.1" build-test: continue-on-error: true needs: small-setup runs-on: linux.g5.48xlarge.nvidia.gpu timeout-minutes: 30 steps: - - name: "Checkout" + - name: "conda setup" run: | set -x pushd .. + base_dir = "$pwd" + echo $base_dir + # conda installation + setting up dependencies + # Miniconda3 script expects to be run from a filename ending in .sh + mc3_dir="${base_dir}/miniconda3" + mc3_setup_script=$(mktemp -t mc3XXXXX.sh) + wget -q -O${mc3_setup_script} \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + chmod u+x ${mc3_setup_script} + + mc3_setup_opts="-b -p ${mc3_dir}" + # Miniconda setup does not handle existing installations well. + # Backup existing installation before a clean new install. + if [ -d "$mc3_dir" ]; then + bkup_mc3_dir=$(mktemp -u -p $(dirname "$mc3_dir") miniconda3.bkup.XXXX) + echo "Backing up existing Miniconda3 installation to: ${bkup_mc3_dir}" + mv $mc3_dir $bkup_mc3_dir + fi + + sh ${mc3_setup_script} ${mc3_setup_opts} + ${mc3_dir}/bin/conda config --set default_python 3.11 + export PATH=${mc3_dir}/bin:$PATH + + # Pass shell name to conda init + ${mc3_dir}/bin/conda init $(basename ${SHELL}) + ${mc3_dir}/bin/conda config --set solver classic + ${mc3_dir}/bin/conda install -y ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 + sudo dnf groupinstall -y "Development Tools" + sudo yum install -y zlib-devel + # zlib-devel to resolve libz.so cannot be found error + # https://fburl.com/workplace/o8gp4ock + sudo dnf install zlib-devel libzstd-devel + ${mc3_dir}/bin/conda config --set solver libmamba + pushd $base_dir + - name: "Checkout" + run: | echo "Installing triton" git clone https://github.com/triton-lang/triton.git pushd triton @@ -49,9 +79,7 @@ jobs: echo "Building llvm" - name: "installs" run: | - sudo yum install -y zlib-devel echo "Installing build-time dependencies" - conda install -y ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" ninja export LLVM_BUILD_DIR=$(pwd) From 7b5890d502d823ce75a5fea40e76b50e2db97852 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 10:59:11 -0700 Subject: [PATCH 29/37] removing needless need --- .github/workflows/inductor.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 2eeb763..d287ab8 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -17,7 +17,6 @@ on: jobs: build-test: continue-on-error: true - needs: small-setup runs-on: linux.g5.48xlarge.nvidia.gpu timeout-minutes: 30 steps: From ed0722b8ed038e80a26d2fee006f99331fb8d4e9 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 11:00:55 -0700 Subject: [PATCH 30/37] base dir --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index d287ab8..3e51684 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -24,7 +24,7 @@ jobs: run: | set -x pushd .. - base_dir = "$pwd" + base_dir = "pwd" echo $base_dir # conda installation + setting up dependencies # Miniconda3 script expects to be run from a filename ending in .sh From 2ad6aa5ec528eb69f8e9e653c52e7a96781dacc3 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 11:02:46 -0700 Subject: [PATCH 31/37] exports --- .github/workflows/inductor.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 3e51684..7b55e81 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -24,17 +24,17 @@ jobs: run: | set -x pushd .. - base_dir = "pwd" + export base_dir=$(pwd) echo $base_dir # conda installation + setting up dependencies # Miniconda3 script expects to be run from a filename ending in .sh - mc3_dir="${base_dir}/miniconda3" - mc3_setup_script=$(mktemp -t mc3XXXXX.sh) + export mc3_dir="${base_dir}/miniconda3" + export mc3_setup_script=$(mktemp -t mc3XXXXX.sh) wget -q -O${mc3_setup_script} \ https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh chmod u+x ${mc3_setup_script} - mc3_setup_opts="-b -p ${mc3_dir}" + export mc3_setup_opts="-b -p ${mc3_dir}" # Miniconda setup does not handle existing installations well. # Backup existing installation before a clean new install. if [ -d "$mc3_dir" ]; then From 364a132f46cbdc84db792269a8630e3ec966a713 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 11:04:16 -0700 Subject: [PATCH 32/37] here --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 7b55e81..4505d30 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -50,7 +50,7 @@ jobs: # Pass shell name to conda init ${mc3_dir}/bin/conda init $(basename ${SHELL}) ${mc3_dir}/bin/conda config --set solver classic - ${mc3_dir}/bin/conda install -y ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 + ${mc3_dir}/bin/conda install -y ninja cmake wheel sudo dnf groupinstall -y "Development Tools" sudo yum install -y zlib-devel # zlib-devel to resolve libz.so cannot be found error From a1a73c1cf08cad1476c22ca735d9a76609c08faf Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 11:16:42 -0700 Subject: [PATCH 33/37] removing llvm custom --- .github/workflows/inductor.yml | 157 ++++++++------------------------- 1 file changed, 36 insertions(+), 121 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 4505d30..d1b5f35 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -17,124 +17,39 @@ on: jobs: build-test: continue-on-error: true - runs-on: linux.g5.48xlarge.nvidia.gpu - timeout-minutes: 30 - steps: - - name: "conda setup" - run: | - set -x - pushd .. - export base_dir=$(pwd) - echo $base_dir - # conda installation + setting up dependencies - # Miniconda3 script expects to be run from a filename ending in .sh - export mc3_dir="${base_dir}/miniconda3" - export mc3_setup_script=$(mktemp -t mc3XXXXX.sh) - wget -q -O${mc3_setup_script} \ - https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh - chmod u+x ${mc3_setup_script} - - export mc3_setup_opts="-b -p ${mc3_dir}" - # Miniconda setup does not handle existing installations well. - # Backup existing installation before a clean new install. - if [ -d "$mc3_dir" ]; then - bkup_mc3_dir=$(mktemp -u -p $(dirname "$mc3_dir") miniconda3.bkup.XXXX) - echo "Backing up existing Miniconda3 installation to: ${bkup_mc3_dir}" - mv $mc3_dir $bkup_mc3_dir - fi - - sh ${mc3_setup_script} ${mc3_setup_opts} - ${mc3_dir}/bin/conda config --set default_python 3.11 - export PATH=${mc3_dir}/bin:$PATH - - # Pass shell name to conda init - ${mc3_dir}/bin/conda init $(basename ${SHELL}) - ${mc3_dir}/bin/conda config --set solver classic - ${mc3_dir}/bin/conda install -y ninja cmake wheel - sudo dnf groupinstall -y "Development Tools" - sudo yum install -y zlib-devel - # zlib-devel to resolve libz.so cannot be found error - # https://fburl.com/workplace/o8gp4ock - sudo dnf install zlib-devel libzstd-devel - ${mc3_dir}/bin/conda config --set solver libmamba - pushd $base_dir - - name: "Checkout" - run: | - echo "Installing triton" - git clone https://github.com/triton-lang/triton.git - pushd triton - echo "Checking out triton branch or commit" - git checkout ${{ github.event.inputs.triton_pin || 'main' }} - export llvm_hash=$(cat cmake/llvm-hash.txt) - echo "llvm_hash: $llvm_hash" - pushd .. - echo "Cloning llvm-project" - git clone https://github.com/llvm/llvm-project.git - pushd llvm-project - echo "Checking out llvm hash" - git checkout "$llvm_hash" - mkdir build - pushd build - echo "Building llvm" - - name: "installs" - run: | - echo "Installing build-time dependencies" - cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" - ninja - export LLVM_BUILD_DIR=$(pwd) - popd - popd - popd - LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install -e python - echo "Installing triton python package" - popd - - name: "pytorch download" - run: | - echo "Cloning pytorch" - git clone https://github.com/pytorch/pytorch.git - pushd pytorch - echo "Checking out pytorch branch or commit" - git checkout ${{ github.event.inputs.pytorch_pin || 'main' }} - git submodule sync - git submodule update --init --recursive - - name: "post pytorch installs" - run: | - pip install -r requirements.txt - pip install mkl-static mkl-include pytest pytest-xdist - echo "Installing magma-cuda121" - conda install -y -c pytorch magma-cuda121 - python setup.py install - - name: test - env: - gpu-arch-type: cuda - gpu-arch-version: "12.1" - # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 - run: | - pytest -n 1 test/inductor/test_torchinductor.py - - name: "cleanup" - if: always() - run: | - pwd - pushd .. - pwd - ls - if [ -d triton ]; then - echo "triton removed" - rm -r triton - else - echo "triton not removed" - fi - if [ -d pytorch ]; then - echo "pytorch removed" - rm -r pytorch - else - echo "pytorch not removed" - fi - if [ -d llvm-project ]; then - echo "llvm-project removed" - rm -r llvm-project - else - echo "llvm not removed" - fi - pwd - ls + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.g5.48xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "12.1" + timeout: 360 + # docker-image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 + script: | + set -x + pushd .. + echo "Installing triton" + git clone https://github.com/triton-lang/triton.git + pushd triton + echo "Checking out triton branch or commit" + git checkout ${{ github.event.inputs.triton_pin || 'main' }} + sudo yum install -y zlib-devel + echo "Installing build-time dependencies" + pip install ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 + export llvm_hash=$(cat cmake/llvm-hash.txt) + echo "llvm_hash: $llvm_hash" + pip install -e python + pushd .. + echo "Cloning pytorch" + git clone https://github.com/pytorch/pytorch.git + pushd pytorch + echo "Checking out pytorch branch or commit" + git checkout ${{ github.event.inputs.pytorch_pin || 'main' }} + git submodule sync + git submodule update --init --recursive + pip install -r requirements.txt + pip install mkl-static mkl-include pytest pytest-xdist + echo "Installing magma-cuda121" + conda install -y -c pytorch magma-cuda121 + python setup.py install + pip freeze + pytest -n 1 test/inductor/test_torchinductor.py From 47a17ad43237f0b8d7d2311c6b5e843564ec2c15 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 11:17:59 -0700 Subject: [PATCH 34/37] moving continue on error --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index d1b5f35..10a6fea 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -16,9 +16,9 @@ on: jobs: build-test: - continue-on-error: true uses: pytorch/test-infra/.github/workflows/linux_job.yml@main with: + continue-on-error: true runner: linux.g5.48xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: "12.1" From e94ea4b4ace6e4db708672ec9301658a77b74214 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 11:24:52 -0700 Subject: [PATCH 35/37] moving to non editable triton --- .github/workflows/inductor.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 10a6fea..4bf68d0 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -37,7 +37,9 @@ jobs: pip install ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 export llvm_hash=$(cat cmake/llvm-hash.txt) echo "llvm_hash: $llvm_hash" - pip install -e python + pushd python + pip install . + pushd .. pushd .. echo "Cloning pytorch" git clone https://github.com/pytorch/pytorch.git From 9740dca69df9982e4b9861d0baccfc6100060c8a Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 11:34:30 -0700 Subject: [PATCH 36/37] only change im making is running on pushes to main instead of just new pins --- .github/workflows/inductor.yml | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 4bf68d0..d8e4e68 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -1,8 +1,8 @@ name: Test build/test linux gpu on: - pull_request: - branches: [ main, "*"] #will remove this once we see everything is working fine + push: + branches: [ main ] workflow_dispatch: inputs: triton_pin: @@ -18,7 +18,6 @@ jobs: build-test: uses: pytorch/test-infra/.github/workflows/linux_job.yml@main with: - continue-on-error: true runner: linux.g5.48xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: "12.1" @@ -37,10 +36,24 @@ jobs: pip install ninja==1.11.1.1 cmake==3.30.2 wheel==0.44.0 export llvm_hash=$(cat cmake/llvm-hash.txt) echo "llvm_hash: $llvm_hash" - pushd python - pip install . - pushd .. pushd .. + echo "Cloning llvm-project" + git clone https://github.com/llvm/llvm-project.git + pushd llvm-project + echo "Checking out llvm hash" + git checkout "$llvm_hash" + mkdir build + pushd build + echo "Building llvm" + cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" + ninja + export LLVM_BUILD_DIR=$(pwd) + popd + popd + popd + LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install -e python + echo "Installing triton python package" + popd echo "Cloning pytorch" git clone https://github.com/pytorch/pytorch.git pushd pytorch @@ -53,5 +66,4 @@ jobs: echo "Installing magma-cuda121" conda install -y -c pytorch magma-cuda121 python setup.py install - pip freeze pytest -n 1 test/inductor/test_torchinductor.py From f4b7c2f0ff58d2ae48783176f091ef6eb94fd770 Mon Sep 17 00:00:00 2001 From: adamomainz Date: Fri, 6 Sep 2024 11:35:29 -0700 Subject: [PATCH 37/37] main and master --- .github/workflows/inductor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index d8e4e68..0bbe4e1 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -2,7 +2,7 @@ name: Test build/test linux gpu on: push: - branches: [ main ] + branches: [ master, main ] workflow_dispatch: inputs: triton_pin: