1- ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+ ; RUN: llc -global-isel=0 -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI-SDAG %s
3+ ; RUN: llc -global-isel=1 -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI-GISEL %s
24
35declare half @llvm.fabs.f16 (half %a )
46declare i1 @llvm.amdgcn.class.f16 (half %a , i32 %b )
57
6- ; GCN-LABEL: {{^}}class_f16:
7- ; GCN-DAG: buffer_load_ushort v[[A_F16:[0-9]+]]
8- ; GCN-DAG: buffer_load_dword v[[B_I32:[0-9]+]]
9- ; VI: v_cmp_class_f16_e32 vcc, v[[A_F16]], v[[B_I32]]
10- ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
11- ; GCN: buffer_store_dword v[[R_I32]]
12- ; GCN: s_endpgm
138define amdgpu_kernel void @class_f16 (
9+ ; VI-SDAG-LABEL: class_f16:
10+ ; VI-SDAG: ; %bb.0: ; %entry
11+ ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
12+ ; VI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x10
13+ ; VI-SDAG-NEXT: s_mov_b32 s7, 0x1100f000
14+ ; VI-SDAG-NEXT: s_mov_b32 s6, -1
15+ ; VI-SDAG-NEXT: s_mov_b32 s10, s6
16+ ; VI-SDAG-NEXT: s_mov_b32 s11, s7
17+ ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
18+ ; VI-SDAG-NEXT: s_mov_b32 s12, s2
19+ ; VI-SDAG-NEXT: s_mov_b32 s13, s3
20+ ; VI-SDAG-NEXT: s_mov_b32 s14, s6
21+ ; VI-SDAG-NEXT: s_mov_b32 s15, s7
22+ ; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
23+ ; VI-SDAG-NEXT: buffer_load_ushort v1, off, s[12:15], 0
24+ ; VI-SDAG-NEXT: s_mov_b32 s4, s0
25+ ; VI-SDAG-NEXT: s_mov_b32 s5, s1
26+ ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
27+ ; VI-SDAG-NEXT: v_cmp_class_f16_e32 vcc, v1, v0
28+ ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
29+ ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
30+ ; VI-SDAG-NEXT: s_endpgm
31+ ;
32+ ; VI-GISEL-LABEL: class_f16:
33+ ; VI-GISEL: ; %bb.0: ; %entry
34+ ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
35+ ; VI-GISEL-NEXT: s_mov_b32 s6, -1
36+ ; VI-GISEL-NEXT: s_mov_b32 s7, 0x1100f000
37+ ; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x10
38+ ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
39+ ; VI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
40+ ; VI-GISEL-NEXT: buffer_load_ushort v0, off, s[4:7], 0
41+ ; VI-GISEL-NEXT: s_load_dword s2, s[8:9], 0x0
42+ ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
43+ ; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[2:3], v0, s2
44+ ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
45+ ; VI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
46+ ; VI-GISEL-NEXT: s_nop 2
47+ ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
48+ ; VI-GISEL-NEXT: s_endpgm
1449 ptr addrspace (1 ) %r ,
1550 ptr addrspace (1 ) %a ,
1651 ptr addrspace (1 ) %b ) {
@@ -23,15 +58,34 @@ entry:
2358 ret void
2459}
2560
26- ; GCN-LABEL: {{^}}class_f16_fabs:
27- ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
28- ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
29- ; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]]
30- ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |s[[SA_F16]]|, [[V_B_I32]]
31- ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
32- ; GCN: buffer_store_dword v[[VR_I32]]
33- ; GCN: s_endpgm
3461define amdgpu_kernel void @class_f16_fabs (
62+ ; VI-SDAG-LABEL: class_f16_fabs:
63+ ; VI-SDAG: ; %bb.0: ; %entry
64+ ; VI-SDAG-NEXT: s_load_dword s4, s[8:9], 0x4c
65+ ; VI-SDAG-NEXT: s_load_dword s5, s[8:9], 0x28
66+ ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
67+ ; VI-SDAG-NEXT: s_mov_b32 s3, 0x1100f000
68+ ; VI-SDAG-NEXT: s_mov_b32 s2, -1
69+ ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
70+ ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4
71+ ; VI-SDAG-NEXT: v_cmp_class_f16_e64 s[4:5], |s5|, v0
72+ ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
73+ ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
74+ ; VI-SDAG-NEXT: s_endpgm
75+ ;
76+ ; VI-GISEL-LABEL: class_f16_fabs:
77+ ; VI-GISEL: ; %bb.0: ; %entry
78+ ; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x28
79+ ; VI-GISEL-NEXT: s_load_dword s4, s[8:9], 0x4c
80+ ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
81+ ; VI-GISEL-NEXT: s_mov_b32 s2, -1
82+ ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
83+ ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
84+ ; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], |v0|, s4
85+ ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
86+ ; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
87+ ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
88+ ; VI-GISEL-NEXT: s_endpgm
3589 ptr addrspace (1 ) %r ,
3690 [8 x i32 ],
3791 half %a.val ,
@@ -45,15 +99,34 @@ entry:
4599 ret void
46100}
47101
48- ; GCN-LABEL: {{^}}class_f16_fneg:
49- ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
50- ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
51- ; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]]
52- ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -s[[SA_F16]], [[V_B_I32]]
53- ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
54- ; GCN: buffer_store_dword v[[VR_I32]]
55- ; GCN: s_endpgm
56102define amdgpu_kernel void @class_f16_fneg (
103+ ; VI-SDAG-LABEL: class_f16_fneg:
104+ ; VI-SDAG: ; %bb.0: ; %entry
105+ ; VI-SDAG-NEXT: s_load_dword s4, s[8:9], 0x4c
106+ ; VI-SDAG-NEXT: s_load_dword s5, s[8:9], 0x28
107+ ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
108+ ; VI-SDAG-NEXT: s_mov_b32 s3, 0x1100f000
109+ ; VI-SDAG-NEXT: s_mov_b32 s2, -1
110+ ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
111+ ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4
112+ ; VI-SDAG-NEXT: v_cmp_class_f16_e64 s[4:5], -s5, v0
113+ ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
114+ ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
115+ ; VI-SDAG-NEXT: s_endpgm
116+ ;
117+ ; VI-GISEL-LABEL: class_f16_fneg:
118+ ; VI-GISEL: ; %bb.0: ; %entry
119+ ; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x28
120+ ; VI-GISEL-NEXT: s_load_dword s4, s[8:9], 0x4c
121+ ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
122+ ; VI-GISEL-NEXT: s_mov_b32 s2, -1
123+ ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
124+ ; VI-GISEL-NEXT: v_max_f16_e64 v0, -s3, -s3
125+ ; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s4
126+ ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
127+ ; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
128+ ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
129+ ; VI-GISEL-NEXT: s_endpgm
57130 ptr addrspace (1 ) %r ,
58131 [8 x i32 ],
59132 half %a.val ,
@@ -67,15 +140,34 @@ entry:
67140 ret void
68141}
69142
70- ; GCN-LABEL: {{^}}class_f16_fabs_fneg:
71- ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
72- ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
73- ; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]]
74- ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|s[[SA_F16]]|, [[V_B_I32]]
75- ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
76- ; GCN: buffer_store_dword v[[VR_I32]]
77- ; GCN: s_endpgm
78143define amdgpu_kernel void @class_f16_fabs_fneg (
144+ ; VI-SDAG-LABEL: class_f16_fabs_fneg:
145+ ; VI-SDAG: ; %bb.0: ; %entry
146+ ; VI-SDAG-NEXT: s_load_dword s4, s[8:9], 0x4c
147+ ; VI-SDAG-NEXT: s_load_dword s5, s[8:9], 0x28
148+ ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
149+ ; VI-SDAG-NEXT: s_mov_b32 s3, 0x1100f000
150+ ; VI-SDAG-NEXT: s_mov_b32 s2, -1
151+ ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
152+ ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4
153+ ; VI-SDAG-NEXT: v_cmp_class_f16_e64 s[4:5], -|s5|, v0
154+ ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
155+ ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
156+ ; VI-SDAG-NEXT: s_endpgm
157+ ;
158+ ; VI-GISEL-LABEL: class_f16_fabs_fneg:
159+ ; VI-GISEL: ; %bb.0: ; %entry
160+ ; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x28
161+ ; VI-GISEL-NEXT: s_load_dword s4, s[8:9], 0x4c
162+ ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
163+ ; VI-GISEL-NEXT: s_mov_b32 s2, -1
164+ ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
165+ ; VI-GISEL-NEXT: v_max_f16_e64 v0, -|s3|, -|s3|
166+ ; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s4
167+ ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
168+ ; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
169+ ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
170+ ; VI-GISEL-NEXT: s_endpgm
79171 ptr addrspace (1 ) %r ,
80172 [8 x i32 ],
81173 half %a.val ,
@@ -90,13 +182,30 @@ entry:
90182 ret void
91183}
92184
93- ; GCN-LABEL: {{^}}class_f16_1:
94- ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
95- ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s[[SA_F16]], 1{{$}}
96- ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
97- ; GCN: buffer_store_dword v[[VR_I32]]
98- ; GCN: s_endpgm
99185define amdgpu_kernel void @class_f16_1 (
186+ ; VI-SDAG-LABEL: class_f16_1:
187+ ; VI-SDAG: ; %bb.0: ; %entry
188+ ; VI-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
189+ ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
190+ ; VI-SDAG-NEXT: s_mov_b32 s3, 0x1100f000
191+ ; VI-SDAG-NEXT: s_mov_b32 s2, -1
192+ ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
193+ ; VI-SDAG-NEXT: v_cmp_class_f16_e64 s[4:5], s4, 1
194+ ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
195+ ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
196+ ; VI-SDAG-NEXT: s_endpgm
197+ ;
198+ ; VI-GISEL-LABEL: class_f16_1:
199+ ; VI-GISEL: ; %bb.0: ; %entry
200+ ; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x8
201+ ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
202+ ; VI-GISEL-NEXT: s_mov_b32 s2, -1
203+ ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
204+ ; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], s3, 1
205+ ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
206+ ; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
207+ ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
208+ ; VI-GISEL-NEXT: s_endpgm
100209 ptr addrspace (1 ) %r ,
101210 half %a.val ) {
102211entry:
@@ -106,13 +215,30 @@ entry:
106215 ret void
107216}
108217
109- ; GCN-LABEL: {{^}}class_f16_64
110- ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
111- ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s[[SA_F16]], 64{{$}}
112- ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
113- ; GCN: buffer_store_dword v[[VR_I32]]
114- ; GCN: s_endpgm
115218define amdgpu_kernel void @class_f16_64 (
219+ ; VI-SDAG-LABEL: class_f16_64:
220+ ; VI-SDAG: ; %bb.0: ; %entry
221+ ; VI-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
222+ ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
223+ ; VI-SDAG-NEXT: s_mov_b32 s3, 0x1100f000
224+ ; VI-SDAG-NEXT: s_mov_b32 s2, -1
225+ ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
226+ ; VI-SDAG-NEXT: v_cmp_class_f16_e64 s[4:5], s4, 64
227+ ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
228+ ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
229+ ; VI-SDAG-NEXT: s_endpgm
230+ ;
231+ ; VI-GISEL-LABEL: class_f16_64:
232+ ; VI-GISEL: ; %bb.0: ; %entry
233+ ; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x8
234+ ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
235+ ; VI-GISEL-NEXT: s_mov_b32 s2, -1
236+ ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
237+ ; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], s3, 64
238+ ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
239+ ; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
240+ ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
241+ ; VI-GISEL-NEXT: s_endpgm
116242 ptr addrspace (1 ) %r ,
117243 half %a.val ) {
118244entry:
@@ -122,14 +248,32 @@ entry:
122248 ret void
123249}
124250
125- ; GCN-LABEL: {{^}}class_f16_full_mask:
126- ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
127- ; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x3ff{{$}}
128- ; VI: v_cmp_class_f16_e32 vcc, s[[SA_F16]], v[[MASK]]
129- ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
130- ; GCN: buffer_store_dword v[[VR_I32]]
131- ; GCN: s_endpgm
132251define amdgpu_kernel void @class_f16_full_mask (
252+ ; VI-SDAG-LABEL: class_f16_full_mask:
253+ ; VI-SDAG: ; %bb.0: ; %entry
254+ ; VI-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
255+ ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
256+ ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3ff
257+ ; VI-SDAG-NEXT: s_mov_b32 s3, 0x1100f000
258+ ; VI-SDAG-NEXT: s_mov_b32 s2, -1
259+ ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
260+ ; VI-SDAG-NEXT: v_cmp_class_f16_e32 vcc, s4, v0
261+ ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
262+ ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
263+ ; VI-SDAG-NEXT: s_endpgm
264+ ;
265+ ; VI-GISEL-LABEL: class_f16_full_mask:
266+ ; VI-GISEL: ; %bb.0: ; %entry
267+ ; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x8
268+ ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
269+ ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff
270+ ; VI-GISEL-NEXT: s_mov_b32 s2, -1
271+ ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
272+ ; VI-GISEL-NEXT: v_cmp_class_f16_e32 vcc, s3, v0
273+ ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
274+ ; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
275+ ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
276+ ; VI-GISEL-NEXT: s_endpgm
133277 ptr addrspace (1 ) %r ,
134278 half %a.val ) {
135279entry:
@@ -139,14 +283,32 @@ entry:
139283 ret void
140284}
141285
142- ; GCN-LABEL: {{^}}class_f16_nine_bit_mask:
143- ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
144- ; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x1ff{{$}}
145- ; VI: v_cmp_class_f16_e32 vcc, s[[SA_F16]], v[[MASK]]
146- ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
147- ; GCN: buffer_store_dword v[[VR_I32]]
148- ; GCN: s_endpgm
149286define amdgpu_kernel void @class_f16_nine_bit_mask (
287+ ; VI-SDAG-LABEL: class_f16_nine_bit_mask:
288+ ; VI-SDAG: ; %bb.0: ; %entry
289+ ; VI-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
290+ ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
291+ ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x1ff
292+ ; VI-SDAG-NEXT: s_mov_b32 s3, 0x1100f000
293+ ; VI-SDAG-NEXT: s_mov_b32 s2, -1
294+ ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
295+ ; VI-SDAG-NEXT: v_cmp_class_f16_e32 vcc, s4, v0
296+ ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
297+ ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
298+ ; VI-SDAG-NEXT: s_endpgm
299+ ;
300+ ; VI-GISEL-LABEL: class_f16_nine_bit_mask:
301+ ; VI-GISEL: ; %bb.0: ; %entry
302+ ; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x8
303+ ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
304+ ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x1ff
305+ ; VI-GISEL-NEXT: s_mov_b32 s2, -1
306+ ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
307+ ; VI-GISEL-NEXT: v_cmp_class_f16_e32 vcc, s3, v0
308+ ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
309+ ; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
310+ ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
311+ ; VI-GISEL-NEXT: s_endpgm
150312 ptr addrspace (1 ) %r ,
151313 half %a.val ) {
152314entry:
@@ -155,3 +317,5 @@ entry:
155317 store i32 %r.val.sext , ptr addrspace (1 ) %r
156318 ret void
157319}
320+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
321+ ; GCN: {{.*}}
0 commit comments