Skip to content

Commit 06789cc

Browse files
authored
[libclc] Optimize ceil/fabs/floor/rint/trunc (#119596)
These functions all map to the corresponding LLVM intrinsics, but the vector intrinsics weren't being generated. The intrinsic mapping from CLC vector function to vector intrinsic was working correctly, but the mapping from OpenCL builtin to CLC function was suboptimally recursively splitting vectors in halves. For example, with this change, `ceil(float16)` calls `llvm.ceil.v16f32` directly once optimizations are applied. Now also, instead of generating LLVM intrinsics through `__asm` we now call clang elementwise builtins for each CLC builtin. This should be a more standard way of achieving the same result The CLC versions of each of these builtins are also now built and enabled for SPIR-V targets. The LLVM -> SPIR-V translator maps the intrinsics to the appropriate OpExtInst, so there should be no difference in semantics, despite the newly introduced indirection from OpenCL builtin through the CLC builtin to the intrinsic. The AMDGPU targets make use of the same `_CLC_DEFINE_UNARY_BUILTIN` macro to override `sqrt`, so those functions also appear more optimal with this change, calling the vector `llvm.sqrt.vXf32` intrinsics directly.
1 parent 3d6b2d4 commit 06789cc

24 files changed

+92
-66
lines changed

libclc/clc/include/clc/clcmacro.h

+15-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,21 @@
191191

192192
#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \
193193
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { return BUILTIN(x); } \
194-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
194+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
195+
return BUILTIN(x); \
196+
} \
197+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
198+
return BUILTIN(x); \
199+
} \
200+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
201+
return BUILTIN(x); \
202+
} \
203+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
204+
return BUILTIN(x); \
205+
} \
206+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
207+
return BUILTIN(x); \
208+
}
195209

196210
#ifdef cl_khr_fp16
197211

+4-11
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_CEIL_H__
22
#define __CLC_MATH_CLC_CEIL_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible ceil
6-
#define __clc_ceil ceil
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_ceil
11-
#define __CLC_INTRINSIC "llvm.ceil"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_CEIL_H__
+4-11
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_FABS_H__
22
#define __CLC_MATH_CLC_FABS_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible fabs
6-
#define __clc_fabs fabs
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_fabs
11-
#define __CLC_INTRINSIC "llvm.fabs"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_FABS_H__
+4-11
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_FLOOR_H__
22
#define __CLC_MATH_CLC_FLOOR_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible floor
6-
#define __clc_floor floor
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_floor
11-
#define __CLC_INTRINSIC "llvm.floor"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_FLOOR_H__
+4-11
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_RINT_H__
22
#define __CLC_MATH_CLC_RINT_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible rint
6-
#define __clc_rint rint
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_rint
11-
#define __CLC_INTRINSIC "llvm.rint"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_RINT_H__
+4-11
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_TRUNC_H__
22
#define __CLC_MATH_CLC_TRUNC_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible trunc
6-
#define __clc_trunc trunc
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_trunc
11-
#define __CLC_INTRINSIC "llvm.trunc"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_TRUNC_H__

libclc/clc/lib/clspv/SOURCES

+5-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
dummy.cl
1+
../generic/math/clc_ceil.cl
2+
../generic/math/clc_fabs.cl
3+
../generic/math/clc_floor.cl
4+
../generic/math/clc_rint.cl
5+
../generic/math/clc_trunc.cl

libclc/clc/lib/clspv/dummy.cl

-1
This file was deleted.

libclc/clc/lib/generic/SOURCES

+5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
geometric/clc_dot.cl
22
integer/clc_abs.cl
33
integer/clc_abs_diff.cl
4+
math/clc_ceil.cl
5+
math/clc_fabs.cl
6+
math/clc_floor.cl
7+
math/clc_rint.cl
8+
math/clc_trunc.cl
49
relational/clc_all.cl
510
relational/clc_any.cl
611
relational/clc_bitselect.cl
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_ceil
5+
#define __CLC_BUILTIN __builtin_elementwise_ceil
6+
#include <clc/math/unary_builtin.inc>
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_fabs
5+
#define __CLC_BUILTIN __builtin_elementwise_abs
6+
#include <clc/math/unary_builtin.inc>
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_floor
5+
#define __CLC_BUILTIN __builtin_elementwise_floor
6+
#include <clc/math/unary_builtin.inc>
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_rint
5+
#define __CLC_BUILTIN __builtin_elementwise_rint
6+
#include <clc/math/unary_builtin.inc>
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_trunc
5+
#define __CLC_BUILTIN __builtin_elementwise_trunc
6+
#include <clc/math/unary_builtin.inc>

libclc/clc/lib/spirv/SOURCES

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
../generic/geometric/clc_dot.cl
2-
2+
../generic/math/clc_ceil.cl
3+
../generic/math/clc_fabs.cl
4+
../generic/math/clc_floor.cl
5+
../generic/math/clc_rint.cl
6+
../generic/math/clc_trunc.cl

libclc/clc/lib/spirv64/SOURCES

+5
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
11
../generic/geometric/clc_dot.cl
2+
../generic/math/clc_ceil.cl
3+
../generic/math/clc_fabs.cl
4+
../generic/math/clc_floor.cl
5+
../generic/math/clc_rint.cl
6+
../generic/math/clc_trunc.cl

libclc/generic/lib/math/ceil.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
#undef __CLC_FUNCTION
66
#define __CLC_FUNCTION ceil
7-
#include "unary_builtin.inc"
7+
#include <clc/math/unary_builtin.inc>

libclc/generic/lib/math/fabs.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
#undef __CLC_FUNCTION
66
#define __CLC_FUNCTION fabs
7-
#include "unary_builtin.inc"
7+
#include <clc/math/unary_builtin.inc>

libclc/generic/lib/math/floor.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
#undef __CLC_FUNCTION
66
#define __CLC_FUNCTION floor
7-
#include "unary_builtin.inc"
7+
#include <clc/math/unary_builtin.inc>

libclc/generic/lib/math/rint.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
#undef __CLC_FUNCTION
55
#define __CLC_FUNCTION rint
6-
#include "unary_builtin.inc"
6+
#include <clc/math/unary_builtin.inc>

libclc/generic/lib/math/round.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77

88
#undef __CLC_FUNCTION
99
#define __CLC_FUNCTION round
10-
#include "unary_builtin.inc"
10+
#include <clc/math/unary_builtin.inc>

libclc/generic/lib/math/sqrt.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@
2424
#include "math/clc_sqrt.h"
2525

2626
#define __CLC_FUNCTION sqrt
27-
#include "unary_builtin.inc"
27+
#include <clc/math/unary_builtin.inc>

libclc/generic/lib/math/trunc.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
#undef __CLC_FUNCTION
55
#define __CLC_FUNCTION trunc
6-
#include "unary_builtin.inc"
6+
#include <clc/math/unary_builtin.inc>

0 commit comments

Comments
 (0)