Skip to content

Commit a1761c7

Browse files
authored
batch: suppress warning for SIMD loop vectorization failure with clang (#2032)
batch: relax SIMD loop for certain operations When building with a newer version of clang, I uncovered a few more SIMD pragma loops fail to vectorize on clang. Since it seems to depend on the compiler version, and I don't want to disable attempted vectorization, I thought a better strategy was just to disable the warnings when it fails to vectorize in those specific places. Add a "latest dependencies with clang" test, that's how I stumbled across these in the first place. Signed-off-by: Larry Gritz <[email protected]>
1 parent 1cfaf77 commit a1761c7

File tree

6 files changed

+83
-11
lines changed

6 files changed

+83
-11
lines changed

.github/workflows/ci.yml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,27 @@ jobs:
414414
- desc: latest releases gcc11/C++17 llvm17 oiio-rel exr3.2 py3.12 avx2 batch-b16avx512
415415
nametag: linux-latest-releases
416416
runner: ubuntu-24.04
417-
cc_compiler: gcc-13
418-
cxx_compiler: g++-13
417+
cc_compiler: gcc-14
418+
cxx_compiler: g++-14
419+
cxx_std: 17
420+
fmt_ver: 12.1.0
421+
opencolorio_ver: v2.5.0
422+
openexr_ver: v3.4.2
423+
openimageio_ver: release
424+
pybind11_ver: v3.0.1
425+
python_ver: "3.12"
426+
llvm_action_ver: "18.1.7"
427+
simd: avx2,f16c
428+
batched: b8_AVX2,b8_AVX512,b16_AVX512
429+
setenvs: export LIBTIFF_VERSION=v4.7.1
430+
PTEX_VERSION=v2.4.3
431+
PUGIXML_VERSION=v1.15
432+
FREETYPE_VERSION=VER-2-14-3
433+
- desc: latest releases clang18/C++17 llvm18 oiio-rel exr3.4 py3.12 avx2 batch-b16avx512
434+
nametag: linux-latest-releases
435+
runner: ubuntu-24.04
436+
cc_compiler: clang
437+
cxx_compiler: clang++
419438
cxx_std: 17
420439
fmt_ver: 11.1.4
421440
opencolorio_ver: v2.4.2

src/include/OSL/platform.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,21 @@
200200
#endif
201201

202202
// Compiler-specific pragmas
203+
//
204+
// - OSL_PRAGMA_WARNING_PUSH/POP pushes/pops warning options (for all
205+
// compilers).
206+
// - OSL_PRAGMA_VISIBILITY_PUSH/POP pushes/pops symbol visibility options (for
207+
// all compilers that support it).
208+
// - OSL_GCC_PRAGMA makes a pragma for all gcc-like compilers, but does nothing
209+
// for MSVS.
210+
// - OSL_GCC_ONLY_PRAGMA makes a pragma for real gcc only.
211+
// - OSL_CLANG_PRAGMA makes a pragma for all clang-based compilers (including
212+
// Apple clang and Intel LLVM).
213+
// - OSL_NONINTEL_CLANG_PRAGMA makes a pragma for regular clang and Apple
214+
// clang, but not Intel clang.
215+
// - OSL_INTEL_CLASSIC_PRAGMA makes a pragma for icc only.
216+
// - OSL_INTEL_LLVM_PRAGMA makes a pragma for icx only.
217+
// - OSL_MSVS_PRAGMA makes a pragma for MSVS only.
203218
#if defined(__GNUC__) /* gcc, clang, icc */
204219
# define OSL_PRAGMA_WARNING_PUSH OSL_PRAGMA(GCC diagnostic push)
205220
# define OSL_PRAGMA_WARNING_POP OSL_PRAGMA(GCC diagnostic pop)
@@ -223,6 +238,11 @@
223238
# else
224239
# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma)
225240
# endif
241+
# if defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__INTEL_LLVM_COMPILER)
242+
# define OSL_NONINTEL_CLANG_PRAGMA(UnQuotedPragma) OSL_PRAGMA(UnQuotedPragma)
243+
# else
244+
# define OSL_NONINTEL_CLANG_PRAGMA(UnQuotedPragma)
245+
# endif
226246
# define OSL_MSVS_PRAGMA(UnQuotedPragma)
227247
#elif defined(_MSC_VER)
228248
# define OSL_PRAGMA_WARNING_PUSH __pragma(warning(push))
@@ -232,6 +252,7 @@
232252
# define OSL_GCC_PRAGMA(UnQuotedPragma)
233253
# define OSL_GCC_ONLY_PRAGMA(UnQuotedPragma)
234254
# define OSL_CLANG_PRAGMA(UnQuotedPragma)
255+
# define OSL_NONINTEL_CLANG_PRAGMA(UnQuotedPragma)
235256
# define OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma)
236257
# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma)
237258
# define OSL_MSVS_PRAGMA(UnQuotedPragma) OSL_PRAGMA(UnQuotedPragma)
@@ -243,6 +264,7 @@
243264
# define OSL_GCC_PRAGMA(UnQuotedPragma)
244265
# define OSL_GCC_ONLY_PRAGMA(UnQuotedPragma)
245266
# define OSL_CLANG_PRAGMA(UnQuotedPragma)
267+
# define OSL_NONINTEL_CLANG_PRAGMA(UnQuotedPragma)
246268
# define OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma)
247269
# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma)
248270
# define OSL_MSVS_PRAGMA(UnQuotedPragma)
@@ -291,6 +313,7 @@
291313
#define OSL_OMP_SIMD_LOOP(...) OSL_OMP_PRAGMA(omp simd __VA_ARGS__)
292314

293315
#if (OSL_GNUC_VERSION || OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_INTEL_LLVM_COMPILER_VERSION)
316+
// GCC, icc, icx: Use a simd loop for sure
294317
# define OSL_OMP_COMPLEX_SIMD_LOOP(...) OSL_OMP_SIMD_LOOP(__VA_ARGS__)
295318
#else
296319
// Ignore requests to vectorize complex/nested SIMD loops for certain

src/liboslexec/wide/wide_opalgebraic.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ calculatenormal(const Dual2<Vec3>& tmpP, bool flipHandedness)
128128

129129

130130

131+
OSL_PRAGMA_WARNING_PUSH
132+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
133+
131134
OSL_BATCHOP void
132135
__OSL_OP2(length, Wf, Wv)(void* r_, void* V_)
133136
{
@@ -136,7 +139,7 @@ __OSL_OP2(length, Wf, Wv)(void* r_, void* V_)
136139
Wide<const Vec3> wV(V_);
137140
Wide<float> wr(r_);
138141

139-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
142+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
140143
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
141144
Vec3 V = wV[lane];
142145
float r = sfm::length(V);
@@ -155,7 +158,7 @@ __OSL_MASKED_OP2(length, Wf, Wv)(void* r_, void* V_, unsigned int mask_value)
155158
Wide<const Vec3> wV(V_);
156159
Masked<float> wr(r_, Mask(mask_value));
157160

158-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
161+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
159162
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
160163
Vec3 V = wV[lane];
161164
if (wr.mask()[lane]) {
@@ -166,6 +169,8 @@ __OSL_MASKED_OP2(length, Wf, Wv)(void* r_, void* V_, unsigned int mask_value)
166169
}
167170
}
168171

172+
OSL_PRAGMA_WARNING_POP
173+
169174

170175

171176
OSL_BATCHOP void
@@ -208,6 +213,9 @@ __OSL_MASKED_OP2(length, Wdf, Wdv)(void* r_, void* V_, unsigned int mask_value)
208213

209214

210215

216+
OSL_PRAGMA_WARNING_PUSH
217+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
218+
211219
OSL_BATCHOP void
212220
__OSL_OP2(area, Wf, Wdv)(void* r_, void* DP_)
213221
{
@@ -217,7 +225,7 @@ __OSL_OP2(area, Wf, Wdv)(void* r_, void* DP_)
217225

218226
Wide<float> wr(r_);
219227

220-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
228+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
221229
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
222230
Dual2<Vec3> DP = wDP[lane];
223231

@@ -240,7 +248,7 @@ __OSL_MASKED_OP2(area, Wf, Wdv)(void* r_, void* DP_, unsigned int mask_value)
240248

241249
Masked<float> wr(r_, Mask(mask_value));
242250

243-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
251+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
244252
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
245253
Dual2<Vec3> DP = wDP[lane];
246254
if (wr.mask()[lane]) {
@@ -253,6 +261,8 @@ __OSL_MASKED_OP2(area, Wf, Wdv)(void* r_, void* DP_, unsigned int mask_value)
253261
}
254262
}
255263

264+
OSL_PRAGMA_WARNING_POP
265+
256266

257267

258268
OSL_BATCHOP void
@@ -447,6 +457,9 @@ __OSL_MASKED_OP3(distance, Wdf, Wdv, Wdv)(void* r_, void* a_, void* b_,
447457

448458

449459

460+
OSL_PRAGMA_WARNING_PUSH
461+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
462+
450463
OSL_BATCHOP void
451464
__OSL_OP2(normalize, Wv, Wv)(void* r_, void* V_)
452465
{
@@ -455,7 +468,7 @@ __OSL_OP2(normalize, Wv, Wv)(void* r_, void* V_)
455468
Wide<const Vec3> wV(V_);
456469
Wide<Vec3> wr(r_);
457470

458-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
471+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
459472
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
460473
Vec3 V = wV[lane];
461474
Vec3 N = sfm::normalize(V);
@@ -473,7 +486,7 @@ __OSL_MASKED_OP2(normalize, Wv, Wv)(void* r_, void* V_, unsigned int mask_value)
473486
Wide<const Vec3> wV(V_);
474487
Masked<Vec3> wr(r_, Mask(mask_value));
475488

476-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
489+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
477490
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
478491
Vec3 V = wV[lane];
479492
if (wr.mask()[lane]) {
@@ -484,6 +497,7 @@ __OSL_MASKED_OP2(normalize, Wv, Wv)(void* r_, void* V_, unsigned int mask_value)
484497
}
485498
}
486499

500+
OSL_PRAGMA_WARNING_POP
487501

488502

489503
OSL_BATCHOP void

src/liboslexec/wide/wide_opcolor.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ __OSL_OP(blackbody_vf)(void* bsg_, void* out, float temp)
5656

5757

5858

59+
OSL_PRAGMA_WARNING_PUSH
60+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
61+
5962
OSL_BATCHOP void
6063
__OSL_MASKED_OP2(blackbody, Wv, Wf)(void* bsg_, void* wout_, void* wtemp_,
6164
unsigned int mask_value)
@@ -68,7 +71,7 @@ __OSL_MASKED_OP2(blackbody, Wv, Wf)(void* bsg_, void* wout_, void* wtemp_,
6871
Block<int> computeRequiredBlock;
6972
Wide<int> wcomputeRequired(computeRequiredBlock);
7073

71-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
74+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
7275
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
7376
float temperature = wL[lane];
7477
bool canNotLookup = !cs.can_lookup_blackbody(temperature);
@@ -105,6 +108,8 @@ __OSL_MASKED_OP2(blackbody, Wv, Wf)(void* bsg_, void* wout_, void* wtemp_,
105108
}
106109
}
107110

111+
OSL_PRAGMA_WARNING_POP
112+
108113

109114

110115
OSL_BATCHOP void

src/liboslexec/wide/wide_opspline.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,9 @@ splineinverse_search(const MatrixT& M, R_T& result, X_T& xval, KArrayT knots,
362362

363363
namespace { // unnamed
364364

365+
OSL_PRAGMA_WARNING_PUSH
366+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
367+
365368
template<bool IsBasisUConstantT, int BasisStepT, typename MatrixT,
366369
typename RAccessorT, typename XAccessorT, typename KAccessorT>
367370
OSL_FORCEINLINE void
@@ -376,7 +379,7 @@ spline_evaluate_loop_over_wide(const MatrixT& M, RAccessorT wR, XAccessorT wX,
376379

377380
OSL_FORCEINLINE_BLOCK
378381
{
379-
OSL_OMP_PRAGMA(omp simd simdlen(vec_width))
382+
OSL_OMP_SIMD_LOOP(simdlen(vec_width))
380383
for (int lane = 0; lane < vec_width; ++lane) {
381384
X_Type x = wX[lane];
382385
auto knots = wK[lane];
@@ -547,6 +550,8 @@ splineinverse_evaluate_wide(RAccessorT wR, ustring spline_basis, XAccessorT wX,
547550
impl_by_basis[basis_type](wR, wX, wK, knot_count);
548551
}
549552

553+
OSL_PRAGMA_WARNING_POP
554+
550555
} // namespace
551556

552557

src/liboslexec/wide/wide_opstring.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ __OSL_MASKED_OP2(strlen, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
9090
}
9191

9292

93+
94+
OSL_PRAGMA_WARNING_PUSH
95+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
96+
9397
OSL_BATCHOP void
9498
__OSL_MASKED_OP2(hash, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
9599
{
@@ -98,7 +102,7 @@ __OSL_MASKED_OP2(hash, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
98102

99103
OSL_FORCEINLINE_BLOCK
100104
{
101-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
105+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
102106
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
103107
ustring s = wS[lane];
104108
if (wR.mask()[lane]) {
@@ -108,6 +112,8 @@ __OSL_MASKED_OP2(hash, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
108112
}
109113
}
110114

115+
OSL_PRAGMA_WARNING_POP
116+
111117

112118

113119
OSL_BATCHOP void

0 commit comments

Comments
 (0)