|
36 | 36 | TypeName& operator=(TypeName&&) = default
|
37 | 37 | #endif
|
38 | 38 |
|
39 |
| -#define ARROW_UNUSED(x) (void)(x) |
40 |
| -#define ARROW_ARG_UNUSED(x) |
| 39 | +// With ARROW_PREDICT_FALSE, GCC and clang can be told that a certain branch is |
| 40 | +// not likely to be taken (for instance, a CHECK failure), and use that information in |
| 41 | +// static analysis. Giving the compiler this information can affect the generated code |
| 42 | +// layout in the absence of better information (i.e. -fprofile-arcs). [1] explains how |
| 43 | +// this feature can be used to improve code generation. It was written as a positive |
| 44 | +// comment to a negative article about the use of these annotations. |
41 | 45 | //
|
42 |
| -// GCC can be told that a certain branch is not likely to be taken (for |
43 |
| -// instance, a CHECK failure), and use that information in static analysis. |
44 |
| -// Giving it this information can help it optimize for the common case in |
45 |
| -// the absence of better information (ie. -fprofile-arcs). |
| 46 | +// ARROW_COMPILER_ASSUME allows the compiler to assume that a given expression is |
| 47 | +// true, without evaluating it, and to optimise based on this assumption [2]. If this |
| 48 | +// condition is violated at runtime, the behavior is undefined. This can be useful to |
| 49 | +// generate both faster and smaller code in compute kernels. |
46 | 50 | //
|
47 |
| -#if defined(__GNUC__) |
48 |
| -#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0)) |
49 |
| -#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) |
| 51 | +// IMPORTANT: Different optimisers are likely to react differently to this annotation! |
| 52 | +// It should be used with care when we can prove by some means that the assumption |
| 53 | +// is (1) guaranteed to always hold and (2) is useful for optimization [3]. If the |
| 54 | +// assumption is pessimistic, it might even block the compiler from decisions that |
| 55 | +// could lead to better code [4]. If you have a good intuition for what the compiler |
| 56 | +// can do with assumptions [5], you can use this macro to guide it and end up with |
| 57 | +// results you would only get with more complex code transformations. |
| 58 | +// `clang -S -emit-llvm` can be used to check how the generated code changes with |
| 59 | +// your specific use of this macro. |
| 60 | +// |
| 61 | +// [1] https://lobste.rs/s/uwgtkt/don_t_use_likely_unlikely_attributes#c_xi3wmc |
| 62 | +// [2] "Portable assumptions" |
| 63 | +// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p1774r4.pdf |
| 64 | +// [3] "Assertions Are Pessimistic, Assumptions Are Optimistic" |
| 65 | +// https://blog.regehr.org/archives/1096 |
| 66 | +// [4] https://discourse.llvm.org/t/llvm-assume-blocks-optimization/71609 |
| 67 | +// [5] J. Doerfert et al. 2019. "Performance Exploration Through Optimistic Static |
| 68 | +// Program Annotations". https://github.com/jdoerfert/PETOSPA/blob/master/ISC19.pdf |
| 69 | +#define ARROW_UNUSED(x) (void)(x) |
| 70 | +#define ARROW_ARG_UNUSED(x) |
| 71 | +#if defined(__GNUC__) // GCC and compatible compilers (clang, Intel ICC) |
50 | 72 | #define ARROW_NORETURN __attribute__((noreturn))
|
51 | 73 | #define ARROW_NOINLINE __attribute__((noinline))
|
52 | 74 | #define ARROW_FORCE_INLINE __attribute__((always_inline))
|
| 75 | +#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0)) |
| 76 | +#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) |
53 | 77 | #define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
|
54 |
| -#elif defined(_MSC_VER) |
| 78 | +#define ARROW_RESTRICT __restrict |
| 79 | +#if defined(__clang__) // clang-specific |
| 80 | +#define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr) |
| 81 | +#else // GCC-specific |
| 82 | +#if __GNUC__ >= 13 |
| 83 | +#define ARROW_COMPILER_ASSUME(expr) __attribute__((assume(expr))) |
| 84 | +#else |
| 85 | +// GCC does not have a built-in assume intrinsic before GCC 13, so we use an |
| 86 | +// if statement and __builtin_unreachable() to achieve the same effect [2]. |
| 87 | +// Unlike clang's __builtin_assume and C++23's [[assume(expr)]], using this |
| 88 | +// on GCC won't warn about side-effects in the expression, so make sure expr |
| 89 | +// is side-effect free when working with GCC versions before 13 (Jan-2024), |
| 90 | +// otherwise clang/MSVC builds will fail in CI. |
| 91 | +#define ARROW_COMPILER_ASSUME(expr) \ |
| 92 | + if (expr) { \ |
| 93 | + } else { \ |
| 94 | + __builtin_unreachable(); \ |
| 95 | + } |
| 96 | +#endif // __GNUC__ >= 13 |
| 97 | +#endif |
| 98 | +#elif defined(_MSC_VER) // MSVC |
55 | 99 | #define ARROW_NORETURN __declspec(noreturn)
|
56 | 100 | #define ARROW_NOINLINE __declspec(noinline)
|
57 | 101 | #define ARROW_FORCE_INLINE __declspec(forceinline)
|
58 | 102 | #define ARROW_PREDICT_FALSE(x) (x)
|
59 | 103 | #define ARROW_PREDICT_TRUE(x) (x)
|
60 | 104 | #define ARROW_PREFETCH(addr)
|
| 105 | +#define ARROW_RESTRICT __restrict |
| 106 | +#define ARROW_COMPILER_ASSUME(expr) __assume(expr) |
61 | 107 | #else
|
62 | 108 | #define ARROW_NORETURN
|
63 | 109 | #define ARROW_NOINLINE
|
64 | 110 | #define ARROW_FORCE_INLINE
|
65 | 111 | #define ARROW_PREDICT_FALSE(x) (x)
|
66 | 112 | #define ARROW_PREDICT_TRUE(x) (x)
|
67 | 113 | #define ARROW_PREFETCH(addr)
|
68 |
| -#endif |
69 |
| - |
70 |
| -#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) |
71 |
| -#define ARROW_RESTRICT __restrict |
72 |
| -#else |
73 | 114 | #define ARROW_RESTRICT
|
| 115 | +#define ARROW_COMPILER_ASSUME(expr) |
74 | 116 | #endif
|
75 | 117 |
|
76 | 118 | // ----------------------------------------------------------------------
|
|
0 commit comments