Skip to content

Commit 1365ae8

Browse files
jithunnair-amdjeffdaily
authored andcommitted
[ROCm][CI] upgrade CI to ROCm 6.3 (pytorch#142152)
Pull Request resolved: pytorch#142152 Approved by: https://github.com/jeffdaily, https://github.com/pruthvistony Co-authored-by: Jeff Daily <jeff.daily@amd.com>
1 parent b0be30d commit 1365ae8

File tree

9 files changed

+71
-52
lines changed

9 files changed

+71
-52
lines changed

.ci/docker/build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ case "$image" in
268268
PROTOBUF=yes
269269
DB=yes
270270
VISION=yes
271-
ROCM_VERSION=6.1
271+
ROCM_VERSION=6.2.4
272272
NINJA_VERSION=1.9.0
273273
CONDA_CMAKE=yes
274274
TRITON=yes
@@ -279,7 +279,7 @@ case "$image" in
279279
PROTOBUF=yes
280280
DB=yes
281281
VISION=yes
282-
ROCM_VERSION=6.2.4
282+
ROCM_VERSION=6.3
283283
NINJA_VERSION=1.9.0
284284
CONDA_CMAKE=yes
285285
TRITON=yes

.ci/docker/common/install_rocm.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,22 @@ install_ubuntu() {
6262
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
6363
done
6464

65+
# ROCm 6.3 had a regression where initializing static code objects had significant overhead
66+
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then
67+
# clr build needs CppHeaderParser but can only find it using conda's python
68+
/opt/conda/bin/python -m pip install CppHeaderParser
69+
git clone https://github.com/ROCm/HIP -b rocm-6.3.x
70+
HIP_COMMON_DIR=$(readlink -f HIP)
71+
git clone https://github.com/jeffdaily/clr -b release/rocm-rel-6.3-statco-hotfix
72+
mkdir -p clr/build
73+
pushd clr/build
74+
cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR
75+
make -j
76+
cp hipamd/lib/libamdhip64.so.6.3.* /opt/rocm/lib/libamdhip64.so.6.3.*
77+
popd
78+
rm -rf HIP clr
79+
fi
80+
6581
# Cleanup
6682
apt-get autoclean && apt-get clean
6783
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

.github/workflows/inductor-rocm.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ jobs:
2929
curr_branch: ${{ github.head_ref || github.ref_name }}
3030
curr_ref_type: ${{ github.ref_type }}
3131

32-
linux-focal-rocm6_2-py3_10-inductor-build:
33-
name: rocm6.2-py3.10-inductor
32+
linux-focal-rocm6_3-py3_10-inductor-build:
33+
name: rocm6.3-py3.10-inductor
3434
uses: ./.github/workflows/_linux-build.yml
3535
needs: get-label-type
3636
with:
3737
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
38-
build-environment: linux-focal-rocm6.2-py3.10
38+
build-environment: linux-focal-rocm6.3-py3.10
3939
docker-image-name: pytorch-linux-focal-rocm-n-py3
4040
test-matrix: |
4141
{ include: [
@@ -44,15 +44,15 @@ jobs:
4444
]}
4545
secrets: inherit
4646

47-
linux-focal-rocm6_2-py3_10-inductor-test:
47+
linux-focal-rocm6_3-py3_10-inductor-test:
4848
permissions:
4949
id-token: write
5050
contents: read
51-
name: rocm6.2-py3.10-inductor
51+
name: rocm6.3-py3.10-inductor
5252
uses: ./.github/workflows/_rocm-test.yml
53-
needs: linux-focal-rocm6_2-py3_10-inductor-build
53+
needs: linux-focal-rocm6_3-py3_10-inductor-build
5454
with:
55-
build-environment: linux-focal-rocm6.2-py3.10
56-
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-inductor-build.outputs.docker-image }}
57-
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-inductor-build.outputs.test-matrix }}
55+
build-environment: linux-focal-rocm6.3-py3.10
56+
docker-image: ${{ needs.linux-focal-rocm6_3-py3_10-inductor-build.outputs.docker-image }}
57+
test-matrix: ${{ needs.linux-focal-rocm6_3-py3_10-inductor-build.outputs.test-matrix }}
5858
secrets: inherit

.github/workflows/periodic.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,13 @@ jobs:
139139
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-debug-build.outputs.test-matrix }}
140140
secrets: inherit
141141

142-
linux-focal-rocm6_2-py3_10-build:
143-
name: linux-focal-rocm6.2-py3.10
142+
linux-focal-rocm6_3-py3_10-build:
143+
name: linux-focal-rocm6.3-py3.10
144144
uses: ./.github/workflows/_linux-build.yml
145145
needs: get-label-type
146146
with:
147147
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
148-
build-environment: linux-focal-rocm6.2-py3.10
148+
build-environment: linux-focal-rocm6.3-py3.10
149149
docker-image-name: pytorch-linux-focal-rocm-n-py3
150150
test-matrix: |
151151
{ include: [
@@ -155,19 +155,19 @@ jobs:
155155
]}
156156
secrets: inherit
157157

158-
linux-focal-rocm6_2-py3_10-test:
158+
linux-focal-rocm6_3-py3_10-test:
159159
permissions:
160160
id-token: write
161161
contents: read
162-
name: linux-focal-rocm6.2-py3.10
162+
name: linux-focal-rocm6.3-py3.10
163163
uses: ./.github/workflows/_rocm-test.yml
164164
needs:
165-
- linux-focal-rocm6_2-py3_10-build
165+
- linux-focal-rocm6_3-py3_10-build
166166
- target-determination
167167
with:
168-
build-environment: linux-focal-rocm6.2-py3.10
169-
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
170-
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
168+
build-environment: linux-focal-rocm6.3-py3.10
169+
docker-image: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.docker-image }}
170+
test-matrix: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.test-matrix }}
171171
secrets: inherit
172172

173173
linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build:

.github/workflows/pull.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -411,15 +411,15 @@ jobs:
411411
]}
412412
secrets: inherit
413413

414-
linux-focal-rocm6_2-py3_10-build:
414+
linux-focal-rocm6_3-py3_10-build:
415415
# don't run build twice on main
416416
if: github.event_name == 'pull_request'
417-
name: linux-focal-rocm6.2-py3.10
417+
name: linux-focal-rocm6.3-py3.10
418418
uses: ./.github/workflows/_linux-build.yml
419419
needs: get-label-type
420420
with:
421421
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
422-
build-environment: linux-focal-rocm6.2-py3.10
422+
build-environment: linux-focal-rocm6.3-py3.10
423423
docker-image-name: pytorch-linux-focal-rocm-n-py3
424424
sync-tag: rocm-build
425425
test-matrix: |

.github/workflows/rocm.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ jobs:
2626
id-token: write
2727
contents: read
2828

29-
linux-focal-rocm6_2-py3_10-build:
29+
linux-focal-rocm6_3-py3_10-build:
3030
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
31-
name: linux-focal-rocm6.2-py3.10
31+
name: linux-focal-rocm6.3-py3.10
3232
uses: ./.github/workflows/_linux-build.yml
3333
with:
34-
build-environment: linux-focal-rocm6.2-py3.10
34+
build-environment: linux-focal-rocm6.3-py3.10
3535
docker-image-name: pytorch-linux-focal-rocm-n-py3
3636
sync-tag: rocm-build
3737
test-matrix: |
@@ -45,17 +45,17 @@ jobs:
4545
]}
4646
secrets: inherit
4747

48-
linux-focal-rocm6_2-py3_10-test:
48+
linux-focal-rocm6_3-py3_10-test:
4949
permissions:
5050
id-token: write
5151
contents: read
52-
name: linux-focal-rocm6.2-py3.10
52+
name: linux-focal-rocm6.3-py3.10
5353
uses: ./.github/workflows/_rocm-test.yml
5454
needs:
55-
- linux-focal-rocm6_2-py3_10-build
55+
- linux-focal-rocm6_3-py3_10-build
5656
- target-determination
5757
with:
58-
build-environment: linux-focal-rocm6.2-py3.10
59-
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
60-
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
58+
build-environment: linux-focal-rocm6.3-py3.10
59+
docker-image: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.docker-image }}
60+
test-matrix: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.test-matrix }}
6161
secrets: inherit

.github/workflows/slow.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,13 @@ jobs:
103103
test-matrix: ${{ needs.linux-focal-py3_9-clang10-build.outputs.test-matrix }}
104104
secrets: inherit
105105

106-
linux-focal-rocm6_2-py3_10-build:
107-
name: linux-focal-rocm6.2-py3.10
106+
linux-focal-rocm6_3-py3_10-build:
107+
name: linux-focal-rocm6.3-py3.10
108108
uses: ./.github/workflows/_linux-build.yml
109109
needs: get-label-type
110110
with:
111111
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
112-
build-environment: linux-focal-rocm6.2-py3.10
112+
build-environment: linux-focal-rocm6.3-py3.10
113113
docker-image-name: pytorch-linux-focal-rocm-n-py3
114114
test-matrix: |
115115
{ include: [
@@ -118,19 +118,19 @@ jobs:
118118
]}
119119
secrets: inherit
120120

121-
linux-focal-rocm6_2-py3_10-test:
121+
linux-focal-rocm6_3-py3_10-test:
122122
permissions:
123123
id-token: write
124124
contents: read
125-
name: linux-focal-rocm6.2-py3.10
125+
name: linux-focal-rocm6.3-py3.10
126126
uses: ./.github/workflows/_rocm-test.yml
127127
needs:
128-
- linux-focal-rocm6_2-py3_10-build
128+
- linux-focal-rocm6_3-py3_10-build
129129
- target-determination
130130
with:
131-
build-environment: linux-focal-rocm6.2-py3.10
132-
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
133-
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
131+
build-environment: linux-focal-rocm6.3-py3.10
132+
docker-image: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.docker-image }}
133+
test-matrix: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.test-matrix }}
134134
secrets: inherit
135135

136136
linux-jammy-py3_10-clang15-asan-build:

.github/workflows/trunk.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,13 @@ jobs:
164164
runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
165165
secrets: inherit
166166

167-
linux-focal-rocm6_2-py3_10-build:
168-
name: linux-focal-rocm6.2-py3.10
167+
linux-focal-rocm6_3-py3_10-build:
168+
name: linux-focal-rocm6.3-py3.10
169169
uses: ./.github/workflows/_linux-build.yml
170170
needs: get-label-type
171171
with:
172172
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
173-
build-environment: linux-focal-rocm6.2-py3.10
173+
build-environment: linux-focal-rocm6.3-py3.10
174174
docker-image-name: pytorch-linux-focal-rocm-n-py3
175175
sync-tag: rocm-build
176176
test-matrix: |
@@ -181,19 +181,19 @@ jobs:
181181
]}
182182
secrets: inherit
183183

184-
linux-focal-rocm6_2-py3_10-test:
184+
linux-focal-rocm6_3-py3_10-test:
185185
permissions:
186186
id-token: write
187187
contents: read
188-
name: linux-focal-rocm6.2-py3.10
188+
name: linux-focal-rocm6.3-py3.10
189189
uses: ./.github/workflows/_rocm-test.yml
190190
needs:
191-
- linux-focal-rocm6_2-py3_10-build
191+
- linux-focal-rocm6_3-py3_10-build
192192
- target-determination
193193
with:
194-
build-environment: linux-focal-rocm6.2-py3.10
195-
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
196-
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
194+
build-environment: linux-focal-rocm6.3-py3.10
195+
docker-image: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.docker-image }}
196+
test-matrix: ${{ needs.linux-focal-rocm6_3-py3_10-build.outputs.test-matrix }}
197197
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor distributed/test_c10d_common distributed/test_c10d_nccl"
198198
secrets: inherit
199199

test/cpp/api/rnn.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,8 @@ TEST_F(RNNTest, BidirectionalGRUReverseForward_CUDA) {
499499
// Reverse forward of bidirectional LSTM should act
500500
// as regular forward of unidirectional LSTM
501501
void BidirectionalLSTMReverseForwardTest(bool cuda) {
502+
// ROCm 6.3 had a regression in RNN behavior requiring ASSERT_NEAR
503+
constexpr auto tolerance = 1e-5;
502504
auto opt = torch::TensorOptions()
503505
.dtype(torch::kFloat32)
504506
.requires_grad(false)
@@ -532,9 +534,10 @@ void BidirectionalLSTMReverseForwardTest(bool cuda) {
532534
std::get<0>(bi_output).size(0), std::get<0>(reverse_output).size(0));
533535
auto size = std::get<0>(bi_output).size(0);
534536
for (int i = 0; i < size; i++) {
535-
ASSERT_EQ(
537+
ASSERT_NEAR(
536538
std::get<0>(bi_output)[i][0][1].item<float>(),
537-
std::get<0>(reverse_output)[size - 1 - i][0][0].item<float>());
539+
std::get<0>(reverse_output)[size - 1 - i][0][0].item<float>(),
540+
tolerance);
538541
}
539542
// The hidden states of the reversed LSTM sits
540543
// in the odd indices in the first dimension.

0 commit comments

Comments
 (0)