@@ -377,7 +377,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
377
377
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
378
378
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
379
379
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
380
- ; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 24
380
+ ; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 24
381
381
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
382
382
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
383
383
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3]
@@ -452,7 +452,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
452
452
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
453
453
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
454
454
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
455
- ; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 16
455
+ ; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 16
456
456
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
457
457
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
458
458
; GFX9-GISEL-NEXT: global_store_short v1, v0, s[2:3]
@@ -655,7 +655,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
655
655
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
656
656
; GFX9-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3]
657
657
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
658
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1
658
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v1
659
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
659
660
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
660
661
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
661
662
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
@@ -760,7 +761,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
760
761
; GFX9-GISEL-NEXT: global_load_ubyte v2, v0, s[2:3] offset:1
761
762
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
762
763
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 8, v1
763
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1
764
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v1
765
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
764
766
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
765
767
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
766
768
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
@@ -1167,7 +1169,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(ptr addrspace(1) noalias %out, p
1167
1169
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1168
1170
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
1169
1171
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1170
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0
1172
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1173
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
1171
1174
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
1172
1175
; GFX9-GISEL-NEXT: s_endpgm
1173
1176
%tid = call i32 @llvm.amdgcn.workitem.id.x ()
@@ -1705,8 +1708,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
1705
1708
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1706
1709
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
1707
1710
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1708
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v0
1709
- ; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[2:3], v0, v1
1711
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v0
1712
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
1713
+ ; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[2:3], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
1710
1714
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, s[2:3]
1711
1715
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
1712
1716
; GFX9-GISEL-NEXT: s_endpgm
@@ -2186,7 +2190,7 @@ define i7 @v_ctlz_zero_undef_i7(i7 %val) {
2186
2190
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i7:
2187
2191
; GFX9-GISEL: ; %bb.0:
2188
2192
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2189
- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2193
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 25, v0
2190
2194
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2191
2195
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2192
2196
%ctlz = call i7 @llvm.ctlz.i7 (i7 %val , i1 true )
@@ -2278,7 +2282,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out,
2278
2282
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
2279
2283
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
2280
2284
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2281
- ; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 14
2285
+ ; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 14
2282
2286
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
2283
2287
; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 0x3ffff
2284
2288
; GFX9-GISEL-NEXT: s_lshr_b32 s1, s0, 16
@@ -2317,7 +2321,7 @@ define i18 @v_ctlz_zero_undef_i18(i18 %val) {
2317
2321
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i18:
2318
2322
; GFX9-GISEL: ; %bb.0:
2319
2323
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2320
- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2324
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 14, v0
2321
2325
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2322
2326
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2323
2327
%ctlz = call i18 @llvm.ctlz.i18 (i18 %val , i1 true )
@@ -2355,8 +2359,8 @@ define <2 x i18> @v_ctlz_zero_undef_v2i18(<2 x i18> %val) {
2355
2359
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i18:
2356
2360
; GFX9-GISEL: ; %bb.0:
2357
2361
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2358
- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2359
- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 14, v1
2362
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 14, v0
2363
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 14, v1
2360
2364
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2361
2365
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2362
2366
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2394,10 +2398,13 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) {
2394
2398
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i16:
2395
2399
; GFX9-GISEL: ; %bb.0:
2396
2400
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2397
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2398
- ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2401
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2402
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2403
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2404
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2405
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2399
2406
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2400
- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4 , 16, v0
2407
+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1 , 16, v0
2401
2408
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2402
2409
%ctlz = call <2 x i16 > @llvm.ctlz.v2i16 (<2 x i16 > %val , i1 true )
2403
2410
ret <2 x i16 > %ctlz
@@ -2439,11 +2446,15 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) {
2439
2446
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v3i16:
2440
2447
; GFX9-GISEL: ; %bb.0:
2441
2448
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2443
- ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2449
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
2450
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2451
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2452
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2453
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
2454
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2444
2455
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2445
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2446
- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4 , 16, v0
2456
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2457
+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2 , 16, v0
2447
2458
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2448
2459
%ctlz = call <3 x i16 > @llvm.ctlz.v3i16 (<3 x i16 > %val , i1 true )
2449
2460
ret <3 x i16 > %ctlz
@@ -2492,13 +2503,20 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) {
2492
2503
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v4i16:
2493
2504
; GFX9-GISEL: ; %bb.0:
2494
2505
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2495
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2496
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2497
- ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2506
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
2507
+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
2508
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2509
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2510
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2511
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2512
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2513
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2514
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
2515
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
2498
2516
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2499
2517
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
2500
- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4 , 16, v0
2501
- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4 , 16, v1
2518
+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2 , 16, v0
2519
+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v3 , 16, v1
2502
2520
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2503
2521
%ctlz = call <4 x i16 > @llvm.ctlz.v4i16 (<4 x i16 > %val , i1 true )
2504
2522
ret <4 x i16 > %ctlz
@@ -2536,8 +2554,10 @@ define <2 x i8> @v_ctlz_zero_undef_v2i8(<2 x i8> %val) {
2536
2554
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i8:
2537
2555
; GFX9-GISEL: ; %bb.0:
2538
2556
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2539
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2540
- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2557
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0
2558
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1
2559
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2560
+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2541
2561
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2542
2562
%ctlz = call <2 x i8 > @llvm.ctlz.v2i8 (<2 x i8 > %val , i1 true )
2543
2563
ret <2 x i8 > %ctlz
@@ -2579,8 +2599,8 @@ define <2 x i7> @v_ctlz_zero_undef_v2i7(<2 x i7> %val) {
2579
2599
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i7:
2580
2600
; GFX9-GISEL: ; %bb.0:
2581
2601
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2582
- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2583
- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 25, v1
2602
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 25, v0
2603
+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 25, v1
2584
2604
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2585
2605
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2586
2606
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
0 commit comments