Skip to content

Commit e1c06c3

Browse files
PeddleSpamLeon Clark
and
Leon Clark
authored
[AMDGPU] Fix error in #88512. (#92770)
Fixes error in GlobalISel CTLZ lowering caused by [#88512](#88512). --------- Co-authored-by: Leon Clark <[email protected]>
1 parent 1eb7f05 commit e1c06c3

File tree

3 files changed

+57
-37
lines changed

3 files changed

+57
-37
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4168,7 +4168,7 @@ bool AMDGPULegalizerInfo::legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI,
41684168

41694169
auto ShiftAmt = B.buildConstant(S32, 32u - NumBits);
41704170
auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u);
4171-
auto Shift = B.buildLShr(S32, {Extend}, ShiftAmt);
4171+
auto Shift = B.buildShl(S32, Extend, ShiftAmt);
41724172
auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift});
41734173
B.buildTrunc(Dst, Ctlz);
41744174
MI.eraseFromParent();

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir

+8-8
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ body: |
8282
; CHECK-NEXT: {{ $}}
8383
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
8484
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
85-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
86-
; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32)
85+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
86+
; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
8787
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
8888
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32]], [[C1]]
8989
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
@@ -147,10 +147,10 @@ body: |
147147
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
148148
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
149149
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
150-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
151-
; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR1]](s32)
152-
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[C]](s32)
153-
; CHECK-NEXT: [[AMDGPU_FFBH_U321:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR2]](s32)
150+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
151+
; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
152+
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
153+
; CHECK-NEXT: [[AMDGPU_FFBH_U321:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL2]](s32)
154154
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
155155
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32]], [[C1]]
156156
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U321]], [[C1]]
@@ -175,8 +175,8 @@ body: |
175175
; CHECK-NEXT: {{ $}}
176176
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
177177
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
178-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
179-
; CHECK-NEXT: [[FFBH:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32)
178+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
179+
; CHECK-NEXT: [[FFBH:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
180180
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
181181
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FFBH]], [[C1]]
182182
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)

llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll

+48-28
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
377377
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
378378
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
379379
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
380-
; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 24
380+
; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 24
381381
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
382382
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
383383
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3]
@@ -452,7 +452,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
452452
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
453453
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
454454
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
455-
; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 16
455+
; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 16
456456
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
457457
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
458458
; GFX9-GISEL-NEXT: global_store_short v1, v0, s[2:3]
@@ -655,7 +655,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
655655
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
656656
; GFX9-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3]
657657
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
658-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1
658+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v1
659+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
659660
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
660661
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
661662
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
@@ -760,7 +761,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
760761
; GFX9-GISEL-NEXT: global_load_ubyte v2, v0, s[2:3] offset:1
761762
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
762763
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 8, v1
763-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1
764+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v1
765+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
764766
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
765767
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
766768
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
@@ -1167,7 +1169,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(ptr addrspace(1) noalias %out, p
11671169
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
11681170
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
11691171
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1170-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0
1172+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1173+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
11711174
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
11721175
; GFX9-GISEL-NEXT: s_endpgm
11731176
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1705,8 +1708,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
17051708
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
17061709
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
17071710
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1708-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v0
1709-
; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[2:3], v0, v1
1711+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v0
1712+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
1713+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[2:3], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
17101714
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, s[2:3]
17111715
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
17121716
; GFX9-GISEL-NEXT: s_endpgm
@@ -2186,7 +2190,7 @@ define i7 @v_ctlz_zero_undef_i7(i7 %val) {
21862190
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i7:
21872191
; GFX9-GISEL: ; %bb.0:
21882192
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2189-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2193+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 25, v0
21902194
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
21912195
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
21922196
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 true)
@@ -2278,7 +2282,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out,
22782282
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
22792283
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
22802284
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2281-
; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 14
2285+
; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 14
22822286
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
22832287
; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 0x3ffff
22842288
; GFX9-GISEL-NEXT: s_lshr_b32 s1, s0, 16
@@ -2317,7 +2321,7 @@ define i18 @v_ctlz_zero_undef_i18(i18 %val) {
23172321
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i18:
23182322
; GFX9-GISEL: ; %bb.0:
23192323
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2320-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2324+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 14, v0
23212325
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
23222326
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
23232327
%ctlz = call i18 @llvm.ctlz.i18(i18 %val, i1 true)
@@ -2355,8 +2359,8 @@ define <2 x i18> @v_ctlz_zero_undef_v2i18(<2 x i18> %val) {
23552359
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i18:
23562360
; GFX9-GISEL: ; %bb.0:
23572361
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2358-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2359-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 14, v1
2362+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 14, v0
2363+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 14, v1
23602364
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
23612365
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
23622366
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2394,10 +2398,13 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) {
23942398
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i16:
23952399
; GFX9-GISEL: ; %bb.0:
23962400
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2397-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2398-
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2401+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2402+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2403+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2404+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2405+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
23992406
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2400-
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
2407+
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
24012408
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
24022409
%ctlz = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %val, i1 true)
24032410
ret <2 x i16> %ctlz
@@ -2439,11 +2446,15 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) {
24392446
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v3i16:
24402447
; GFX9-GISEL: ; %bb.0:
24412448
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2443-
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2449+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
2450+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2451+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2452+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2453+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
2454+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
24442455
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2445-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2446-
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
2456+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2457+
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
24472458
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
24482459
%ctlz = call <3 x i16> @llvm.ctlz.v3i16(<3 x i16> %val, i1 true)
24492460
ret <3 x i16> %ctlz
@@ -2492,13 +2503,20 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) {
24922503
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v4i16:
24932504
; GFX9-GISEL: ; %bb.0:
24942505
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2495-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2496-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2497-
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2506+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
2507+
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
2508+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2509+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2510+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2511+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2512+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2513+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2514+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
2515+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
24982516
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
24992517
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
2500-
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
2501-
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
2518+
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
2519+
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
25022520
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
25032521
%ctlz = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %val, i1 true)
25042522
ret <4 x i16> %ctlz
@@ -2536,8 +2554,10 @@ define <2 x i8> @v_ctlz_zero_undef_v2i8(<2 x i8> %val) {
25362554
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i8:
25372555
; GFX9-GISEL: ; %bb.0:
25382556
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2539-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2540-
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2557+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0
2558+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1
2559+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2560+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
25412561
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
25422562
%ctlz = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %val, i1 true)
25432563
ret <2 x i8> %ctlz
@@ -2579,8 +2599,8 @@ define <2 x i7> @v_ctlz_zero_undef_v2i7(<2 x i7> %val) {
25792599
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i7:
25802600
; GFX9-GISEL: ; %bb.0:
25812601
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2582-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2583-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 25, v1
2602+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 25, v0
2603+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 25, v1
25842604
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
25852605
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
25862606
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)