Skip to content

Commit e484bea

Browse files
authored
[libspirv] Use the remangler to mangle half types (#18083)
This commit revisits the way we achieve the desired host-side mangling of `half` types as `DF16_` (from the `_Float16` type) coming from the original mangling of `Dh` type that OpenCL produces. We were previously manually achieving this by writing over a thousand wrapper functions from `_Float16` types to `half` types. The remangler can just as easily do this for us, while reducing the total source code line count by over 8000. This work was originally spurred on by commit 316418d, which modified the AMDGCN sub-group shuffle builtins. In doing so it broke them in a couple of ways. The first is a typo that hid the SubgroupShuffleXorINTEL builtins for sub 32-bit types but ommitting the 'INTEL' part of the builtin name. That was easily fixed by adjusting the builtin name. The second was an issue of providing builtins using the OpenCL 'half' data type, without the `_Float16` -> `half` wrappers which we relied on. This accidentally stopped the SYCL host being able to find the right symbols.
1 parent 82b0380 commit e484bea

File tree

10 files changed

+151
-8628
lines changed

10 files changed

+151
-8628
lines changed

libclc/generic/include/as_type.h

-9
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,4 @@
8585
#define as_half16(x) __builtin_astype(x, half16)
8686
#endif
8787

88-
#ifdef __CLC_HAS_FLOAT16
89-
#define as_float16_t(x) __builtin_astype(x, __clc_float16_t)
90-
#define as_vec2_float16_t(x) __builtin_astype(x, __clc_vec2_float16_t)
91-
#define as_vec3_float16_t(x) __builtin_astype(x, __clc_vec3_float16_t)
92-
#define as_vec4_float16_t(x) __builtin_astype(x, __clc_vec4_float16_t)
93-
#define as_vec8_float16_t(x) __builtin_astype(x, __clc_vec8_float16_t)
94-
#define as_vec16_float16_t(x) __builtin_astype(x, __clc_vec16_float16_t)
95-
#endif
96-
9788
#endif // CLC_AS_TYPE

libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl

+1-13
Original file line numberDiff line numberDiff line change
@@ -316,18 +316,6 @@ __CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, long, 0l)
316316
__CLC_GROUP_COLLECTIVE(LogicalOrKHR, __CLC_LOGICAL_OR, bool, false)
317317
__CLC_GROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true)
318318

319-
// half requires additional mangled entry points
320-
#define __CLC_GROUP_COLLECTIVE__DF16(MANGLED_NAME, SPIRV_DISPATCH) \
321-
_CLC_DEF _CLC_CONVERGENT half MANGLED_NAME(int scope, uint op, half x) { \
322-
return SPIRV_DISPATCH(scope, op, x); \
323-
}
324-
__CLC_GROUP_COLLECTIVE__DF16(_Z17__spirv_GroupFAddiiDF16_, __spirv_GroupFAdd)
325-
__CLC_GROUP_COLLECTIVE__DF16(_Z17__spirv_GroupFMiniiDF16_, __spirv_GroupFMin)
326-
__CLC_GROUP_COLLECTIVE__DF16(_Z17__spirv_GroupFMaxiiDF16_, __spirv_GroupFMax)
327-
__CLC_GROUP_COLLECTIVE__DF16(_Z20__spirv_GroupFMulKHRiiDF16_,
328-
__spirv_GroupFMulKHR)
329-
#undef __CLC_GROUP_COLLECTIVE__DF16
330-
331319
#undef __CLC_GROUP_COLLECTIVE_4
332320
#undef __CLC_GROUP_COLLECTIVE_5
333321
#undef DISPATCH_TO_CLC_GROUP_COLLECTIVE_MACRO
@@ -391,7 +379,7 @@ __CLC_GROUP_BROADCAST(int, i)
391379
__CLC_GROUP_BROADCAST(uint, j)
392380
__CLC_GROUP_BROADCAST(long, l)
393381
__CLC_GROUP_BROADCAST(ulong, m)
394-
__CLC_GROUP_BROADCAST(half, DF16_)
382+
__CLC_GROUP_BROADCAST(half, Dh)
395383
__CLC_GROUP_BROADCAST(float, f)
396384
__CLC_GROUP_BROADCAST(double, d)
397385

libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl

+27-27
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,17 @@ _CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, float4, Dv4_f, int3, Dv3_i)
9494
// Half
9595
#ifdef cl_khr_fp16
9696
// return 1-channel color data
97-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half, DF16_, int, i)
98-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half, DF16_, int2, Dv2_i)
99-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half, DF16_, int3, Dv3_i)
97+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half, Dh, int, i)
98+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half, Dh, int2, Dv2_i)
99+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half, Dh, int3, Dv3_i)
100100
// return 2-channel color data
101-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_DF16_, int, i)
102-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i)
103-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half2, Dv2_DF16_, int3, Dv3_i)
101+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_Dh, int, i)
102+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_Dh, int2, Dv2_i)
103+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half2, Dv2_Dh, int3, Dv3_i)
104104
// return 4-channel color data
105-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_DF16_, int, i)
106-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i)
107-
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half4, Dv4_DF16_, int3, Dv3_i)
105+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_Dh, int, i)
106+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_Dh, int2, Dv2_i)
107+
_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half4, Dv4_Dh, int3, Dv3_i)
108108
#endif
109109

110110
// Int
@@ -243,17 +243,17 @@ _CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, float4, Dv4_f, int3, Dv3_i)
243243
// Half
244244
#ifdef cl_khr_fp16
245245
// write 1-channel color data
246-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half, DF16_, int, i)
247-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half, DF16_, int2, Dv2_i)
248-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half, DF16_, int3, Dv3_i)
246+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half, Dh, int, i)
247+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half, Dh, int2, Dv2_i)
248+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half, Dh, int3, Dv3_i)
249249
// write 2-channel color data
250-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_DF16_, int, i)
251-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i)
252-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half2, Dv2_DF16_, int3, Dv3_i)
250+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_Dh, int, i)
251+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_Dh, int2, Dv2_i)
252+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half2, Dv2_Dh, int3, Dv3_i)
253253
// write 4-channel color data
254-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_DF16_, int, i)
255-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i)
256-
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half4, Dv4_DF16_, int3, Dv3_i)
254+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_Dh, int, i)
255+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_Dh, int2, Dv2_i)
256+
_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half4, Dv4_Dh, int3, Dv3_i)
257257
#endif
258258

259259
// Int
@@ -401,20 +401,20 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float4, Dv4_f, float3,
401401
// Half
402402
#ifdef cl_khr_fp16
403403
// return 1 channel color data
404-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half, DF16_, float, f)
405-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2, Dv2_f)
406-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half, DF16_, float3, Dv3_f)
404+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half, Dh, float, f)
405+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half, Dh, float2, Dv2_f)
406+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half, Dh, float3, Dv3_f)
407407
// return 2-channel color data
408-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_, float, f)
409-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, float2,
408+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_Dh, float, f)
409+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_Dh, float2,
410410
Dv2_f)
411-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half2, Dv2_DF16_, float3,
411+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half2, Dv2_Dh, float3,
412412
Dv3_f)
413413
// return 4-channel color data
414-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_, float, f)
415-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, float2,
414+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_Dh, float, f)
415+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_Dh, float2,
416416
Dv2_f)
417-
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half4, Dv4_DF16_, float3,
417+
_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half4, Dv4_Dh, float3,
418418
Dv3_f)
419419
#endif
420420

libclc/libspirv/lib/amdgcn-amdhsa/images/image_array.cl

+18-18
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,18 @@ _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i,
8888
// Half
8989
#ifdef cl_khr_fp16
9090
// return 1-channel color data
91-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half, DF16_, int, i, 2)
92-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half, DF16_, int2, Dv2_i,
91+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half, Dh, int, i, 2)
92+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half, Dh, int2, Dv2_i,
9393
4)
9494
// return 2-channel color data
95-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_DF16_, int, i,
95+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_Dh, int, i,
9696
2)
97-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_DF16_, int2,
97+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_Dh, int2,
9898
Dv2_i, 4)
9999
// return 4-channel color data
100-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_DF16_, int, i,
100+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_Dh, int, i,
101101
2)
102-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_DF16_, int2,
102+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_Dh, int2,
103103
Dv2_i, 4)
104104
#endif
105105

@@ -237,18 +237,18 @@ _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i,
237237
// Half
238238
#ifdef cl_khr_fp16
239239
// write 1-channel color data
240-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half, DF16_, int, i, 2)
241-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half, DF16_, int2, Dv2_i,
240+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half, Dh, int, i, 2)
241+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half, Dh, int2, Dv2_i,
242242
4)
243243
// write 2-channel color data
244-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_DF16_, int, i,
244+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_Dh, int, i,
245245
2)
246-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_DF16_, int2,
246+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_Dh, int2,
247247
Dv2_i, 4)
248248
// write 4-channel color data
249-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_DF16_, int, i,
249+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_Dh, int, i,
250250
2)
251-
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_DF16_, int2,
251+
_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_Dh, int2,
252252
Dv2_i, 4)
253253
#endif
254254

@@ -395,19 +395,19 @@ _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f,
395395
// Half
396396
#ifdef cl_khr_fp16
397397
// return 1 channel color data
398-
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half, DF16_, float,
398+
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half, Dh, float,
399399
f, 2)
400-
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2,
400+
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half, Dh, float2,
401401
Dv2_f, 4)
402402
// return 2 channel color data
403-
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_,
403+
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_Dh,
404404
float, f, 2)
405-
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_,
405+
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_Dh,
406406
float2, Dv2_f, 4)
407407
// return 4 channel color data
408-
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_,
408+
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_Dh,
409409
float, f, 2)
410-
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_,
410+
_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_Dh,
411411
float2, Dv2_f, 4)
412412
#endif
413413

libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl

+31-31
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ __AMDGCN_CLC_SUBGROUP_SUB_I32(unsigned short);
3939
// _Z28__spirv_SubgroupShuffleINTELIhET_S0_j - unsigned char
4040
// _Z28__spirv_SubgroupShuffleINTELIsET_S0_j - long
4141
// _Z28__spirv_SubgroupShuffleINTELItET_S0_j - unsigned long
42-
// _Z28__spirv_SubgroupShuffleINTELIDF16_ET_S0_j - half
42+
// _Z28__spirv_SubgroupShuffleINTELIDhET_S0_j - half
4343
#define __AMDGCN_CLC_SUBGROUP_SUB_I32(TYPE, MANGLED_TYPE_NAME) \
4444
_CLC_DEF TYPE _Z28__spirv_SubgroupShuffleINTELI##MANGLED_TYPE_NAME##ET_S0_j( \
4545
TYPE Data, unsigned int InvocationId) { \
@@ -58,7 +58,7 @@ __spirv_SubgroupShuffleINTEL(half Data, unsigned int InvocationId) {
5858
tmp = __spirv_SubgroupShuffleINTEL(tmp, InvocationId);
5959
return __clc_as_half(tmp);
6060
}
61-
_CLC_DEF half _Z28__spirv_SubgroupShuffleINTELIDF16_ET_S0_j(
61+
_CLC_DEF half _Z28__spirv_SubgroupShuffleINTELIDhET_S0_j(
6262
half Data, unsigned int InvocationId) {
6363
return __spirv_SubgroupShuffleINTEL(Data, InvocationId);
6464
}
@@ -227,10 +227,10 @@ __AMDGCN_CLC_SUBGROUP_TO_VEC(ulong8, m, 8)
227227
__AMDGCN_CLC_SUBGROUP_TO_VEC(ulong16, m, 16)
228228
// half
229229
#ifdef cl_khr_fp16
230-
__AMDGCN_CLC_SUBGROUP_TO_VEC(half2, DF16_, 2)
231-
__AMDGCN_CLC_SUBGROUP_TO_VEC(half4, DF16_, 4)
232-
__AMDGCN_CLC_SUBGROUP_TO_VEC(half8, DF16_, 8)
233-
__AMDGCN_CLC_SUBGROUP_TO_VEC(half16, DF16_, 16)
230+
__AMDGCN_CLC_SUBGROUP_TO_VEC(half2, Dh, 2)
231+
__AMDGCN_CLC_SUBGROUP_TO_VEC(half4, Dh, 4)
232+
__AMDGCN_CLC_SUBGROUP_TO_VEC(half8, Dh, 8)
233+
__AMDGCN_CLC_SUBGROUP_TO_VEC(half16, Dh, 16)
234234
#endif // cl_khr_fp16
235235
// float
236236
__AMDGCN_CLC_SUBGROUP_TO_VEC(float2, f, 2)
@@ -271,8 +271,8 @@ __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned char);
271271
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(short);
272272
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned short);
273273
#ifdef cl_khr_fp16
274-
_CLC_OVERLOAD _CLC_DEF half
275-
__spirv_SubgroupShuffleXorINTEL(half Data, unsigned int InvocationId) {
274+
_CLC_OVERLOAD _CLC_DEF half __spirv_SubgroupShuffleXorINTEL(
275+
half Data, unsigned int InvocationId) {
276276
unsigned short tmp = __clc_as_ushort(Data);
277277
tmp = (unsigned short)__spirv_SubgroupShuffleXorINTEL(tmp, InvocationId);
278278
return __clc_as_half(tmp);
@@ -284,7 +284,7 @@ __spirv_SubgroupShuffleXorINTEL(half Data, unsigned int InvocationId) {
284284
// _Z31__spirv_SubgroupShuffleXorINTELIhET_S0_j - unsigned char
285285
// _Z31__spirv_SubgroupShuffleXorINTELIsET_S0_j - short
286286
// _Z31__spirv_SubgroupShuffleXorINTELItET_S0_j - unsigned short
287-
// _Z31__spirv_SubgroupShuffleXorINTELIDF16_ET_S0_j - half
287+
// _Z31__spirv_SubgroupShuffleXorINTELIDhET_S0_j - half
288288
#define __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(TYPE, MANGLED_TYPE_NAME) \
289289
_CLC_DEF TYPE \
290290
_Z31__spirv_SubgroupShuffleXorINTELI##MANGLED_TYPE_NAME##ET_S0_j( \
@@ -296,7 +296,7 @@ __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned char, h);
296296
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(short, s);
297297
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned short, t);
298298
#ifdef cl_khr_fp16
299-
_CLC_DEF half _Z31__spirv_SubgroupShuffleXorINTELIDF16_ET_S0_j(
299+
_CLC_DEF half _Z31__spirv_SubgroupShuffleXorINTELIDhET_S0_j(
300300
half Data, unsigned int InvocationId) {
301301
return __spirv_SubgroupShuffleXorINTEL(Data, InvocationId);
302302
}
@@ -470,10 +470,10 @@ __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float8, f, 8)
470470
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float16, f, 16)
471471
// half
472472
#ifdef cl_khr_fp16
473-
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half2, DF16_, 2)
474-
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half4, DF16_, 4)
475-
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half8, DF16_, 8)
476-
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half16, DF16_, 16)
473+
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half2, Dh, 2)
474+
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half4, Dh, 4)
475+
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half8, Dh, 8)
476+
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half16, Dh, 16)
477477
#endif // cl_khr_fp16
478478
// double
479479
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(double2, d, 2)
@@ -521,11 +521,11 @@ __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(char);
521521
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned char);
522522
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(short);
523523
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned short);
524+
524525
// half
525526
#ifdef cl_khr_fp16
526-
_CLC_OVERLOAD _CLC_DEF half __spirv_SubgroupShuffleUpINTEL(half previous,
527-
half current,
528-
unsigned int delta) {
527+
_CLC_OVERLOAD _CLC_DEF half __spirv_SubgroupShuffleUpINTEL(
528+
half previous, half current, unsigned int delta) {
529529
unsigned short tmpP = __clc_as_ushort(previous);
530530
unsigned short tmpC = __clc_as_ushort(current);
531531
tmpC = __spirv_SubgroupShuffleUpINTEL(tmpP, tmpC, delta);
@@ -538,7 +538,7 @@ _CLC_OVERLOAD _CLC_DEF half __spirv_SubgroupShuffleUpINTEL(half previous,
538538
// _Z30__spirv_SubgroupShuffleUpINTELIhET_S0_S0_j - unsigned char
539539
// _Z30__spirv_SubgroupShuffleUpINTELIsET_S0_S0_j - short
540540
// _Z30__spirv_SubgroupShuffleUpINTELItET_S0_S0_j - unsigned short
541-
// _Z30__spirv_SubgroupShuffleUpINTELIDF16_ET_S0_S0_j - half
541+
// _Z30__spirv_SubgroupShuffleUpINTELIDhET_S0_S0_j - half
542542
#define __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(TYPE, MANGLED_TYPE_NAME) \
543543
_CLC_DEF TYPE \
544544
_Z30__spirv_SubgroupShuffleUpINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \
@@ -551,7 +551,7 @@ __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(short, s);
551551
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned short, t);
552552
// half
553553
#ifdef cl_khr_fp16
554-
_CLC_DEF half _Z30__spirv_SubgroupShuffleUpINTELIDF16_ET_S0_S0_j(
554+
_CLC_DEF half _Z30__spirv_SubgroupShuffleUpINTELIDhET_S0_S0_j(
555555
half previous, half current, unsigned int delta) {
556556
return __spirv_SubgroupShuffleUpINTEL(previous, current, delta);
557557
}
@@ -724,10 +724,10 @@ __AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong8, m, 8)
724724
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong16, m, 16)
725725
// half
726726
#ifdef cl_khr_fp16
727-
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half2, DF16_, 2)
728-
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half4, DF16_, 4)
729-
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half8, DF16_, 8)
730-
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half16, DF16_, 16)
727+
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half2, Dh, 2)
728+
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half4, Dh, 4)
729+
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half8, Dh, 8)
730+
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half16, Dh, 16)
731731
#endif // cl_khr_fp16
732732
// float
733733
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(float2, f, 2)
@@ -782,8 +782,8 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(short);
782782
__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned short);
783783
// half
784784
#ifdef cl_khr_fp16
785-
_CLC_OVERLOAD _CLC_DEF half
786-
__spirv_SubgroupShuffleDownINTEL(half current, half next, unsigned int delta) {
785+
_CLC_OVERLOAD _CLC_DEF half __spirv_SubgroupShuffleDownINTEL(
786+
half current, half next, unsigned int delta) {
787787
unsigned short tmpC = __clc_as_ushort(current);
788788
unsigned short tmpN = __clc_as_ushort(next);
789789
tmpC = __spirv_SubgroupShuffleDownINTEL(tmpC, tmpN, delta);
@@ -796,7 +796,7 @@ __spirv_SubgroupShuffleDownINTEL(half current, half next, unsigned int delta) {
796796
// _Z32__spirv_SubgroupShuffleDownINTELIhET_S0_S0_j - unsigned char
797797
// _Z32__spirv_SubgroupShuffleDownINTELIsET_S0_S0_j - short
798798
// _Z32__spirv_SubgroupShuffleDownINTELItET_S0_S0_j - unsigned short
799-
// _Z32__spirv_SubgroupShuffleDownINTELIDF16_ET_S0_S0_j - half
799+
// _Z32__spirv_SubgroupShuffleDownINTELIDhET_S0_S0_j - half
800800
#define __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(TYPE, MANGLED_TYPE_NAME) \
801801
_CLC_DEF TYPE \
802802
_Z32__spirv_SubgroupShuffleDownINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \
@@ -809,7 +809,7 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(short, s);
809809
__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned short, t);
810810
// half
811811
#ifdef cl_khr_fp16
812-
_CLC_DEF half _Z32__spirv_SubgroupShuffleDownINTELIDF16_ET_S0_S0_j(
812+
_CLC_DEF half _Z32__spirv_SubgroupShuffleDownINTELIDhET_S0_S0_j(
813813
half current, half next, unsigned int delta) {
814814
return __spirv_SubgroupShuffleDownINTEL(current, next, delta);
815815
}
@@ -980,10 +980,10 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong8, m, 8)
980980
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong16, m, 16)
981981
// half
982982
#ifdef cl_khr_fp16
983-
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half2, DF16_, 2)
984-
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half4, DF16_, 4)
985-
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half8, DF16_, 8)
986-
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half16, DF16_, 16)
983+
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half2, Dh, 2)
984+
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half4, Dh, 4)
985+
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half8, Dh, 8)
986+
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half16, Dh, 16)
987987
#endif // cl_khr_fp16
988988
// float
989989
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(float2, f, 2)

libclc/libspirv/lib/generic/SOURCES

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ atomic/loadstore_helpers_unordered.ll
22
atomic/loadstore_helpers_release.ll
33
atomic/loadstore_helpers_acquire.ll
44
atomic/loadstore_helpers_seq_cst.ll
5-
float16.cl
65
subnormal_config.cl
76
subnormal_helper_func.ll
87
async/async_work_group_strided_copy.cl

0 commit comments

Comments
 (0)