Skip to content

Commit e42590e

Browse files
[SYCL] Cherry-pick "Switch to use plain array in sycl::vec in more cases" to sycl-rel-6_0_0 (#17697)
This is a cherry-pick of #17656 + changes required to resolve merge conflicts. -------------------------------------- The problem with using std::array in sycl::vec is that we cannot compile it in some environments (namely, Windows) because the former may use something that is illegal in SYCL device code. #17025 fixed that, but only did so under preview breaking changes mode, which does not satisfy some of our customers immediately. This PR introduces two main changes: it allows to opt-in for new behavior through passing -D__SYCL_USE_NEW_VEC_IMPL=1 macro without using -fpreview-breaking-changes flag. That allows for a more gradual opt-in from customers who are interested in this fix it switches the imlpementation to use the new approach with C-style arrays if their size and alignment is the same as for the corresponding std::array - in that case their memory layout is expected to be absolutely the same and therefore it should be safe to use the new approach without fear of some ABI incompatibilities. This allows for customers to benefit from the fix without specifying any extra macro (which should be the case for the most common platforms out there) --------- Co-authored-by: Alexey Sachkov <[email protected]>
1 parent 54cbbec commit e42590e

File tree

7 files changed

+164
-61
lines changed

7 files changed

+164
-61
lines changed

Diff for: sycl/include/sycl/vector.hpp

+42-3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@
2222
#endif
2323
#endif // __clang__
2424

25+
// See vec::DataType definitions for more details
26+
#ifndef __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
27+
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES)
28+
#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 1
29+
#else
30+
#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 0
31+
#endif
32+
#endif
33+
2534
#if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__)
2635
#error "SYCL device compiler is built without ext_vector_type support"
2736
#endif
@@ -84,6 +93,9 @@ struct elem {
8493
};
8594

8695
namespace detail {
96+
// To be defined in tests, trick to access vec's private methods
97+
template <typename T1, int T2> class vec_base_test;
98+
8799
template <typename VecT, typename OperationLeftT, typename OperationRightT,
88100
template <typename> class OperationCurrentT, int... Indexes>
89101
class SwizzleOp;
@@ -142,7 +154,34 @@ class __SYCL_EBO vec
142154

143155
// This represent type of underlying value. There should be only one field
144156
// in the class, so vec<float, 16> should be equal to float16 in memory.
145-
using DataType = std::array<DataT, AdjustedNum>;
157+
//
158+
// In intel/llvm#14130 we incorrectly used std::array as an underlying storage
159+
// for vec data. The problem with std::array is that it comes from the C++
160+
// STL headers which we do not control and they may use something that is
161+
// illegal in SYCL device code. One of specific examples is use of debug
162+
// assertions in MSVC's STL implementation.
163+
//
164+
// The better approach is to use plain C++ array, but the problem here is that
165+
// C++ specification does not provide any guarantees about the memory layout
166+
// of std::array and therefore directly switching to it would technically be
167+
// an ABI-break, even though the practical chances of encountering the issue
168+
// are low.
169+
//
170+
// To play it safe, we only switch to use plain array if both its size and
171+
// alignment match those of std::array, or unless the new behavior is forced
172+
// via __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE or preview breaking changes mode.
173+
using DataType = std::conditional_t<
174+
#if __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
175+
true,
176+
#else
177+
sizeof(std::array<DataT, AdjustedNum>) == sizeof(DataT[AdjustedNum]) &&
178+
alignof(std::array<DataT, AdjustedNum>) ==
179+
alignof(DataT[AdjustedNum]),
180+
#endif
181+
DataT[AdjustedNum], std::array<DataT, AdjustedNum>>;
182+
183+
// To allow testing of private methods
184+
template <typename T1, int T2> friend class detail::vec_base_test;
146185

147186
#ifdef __SYCL_DEVICE_ONLY__
148187
using element_type_for_vector_t = typename detail::map_type<
@@ -387,7 +426,7 @@ class __SYCL_EBO vec
387426
typename vector_t_ = vector_t,
388427
typename = typename std::enable_if_t<std::is_same_v<vector_t_, vector_t>>>
389428
constexpr vec(vector_t_ openclVector) {
390-
m_Data = sycl::bit_cast<DataType>(openclVector);
429+
sycl::detail::memcpy(&m_Data, &openclVector, sizeof(openclVector));
391430
}
392431

393432
/* @SYCL2020
@@ -501,7 +540,7 @@ class __SYCL_EBO vec
501540
if constexpr (canUseNativeVectorConvert) {
502541
auto val = detail::convertImpl<T, R, roundingMode, NumElements, OpenCLVecT,
503542
OpenCLVecR>(NativeVector);
504-
Result.m_Data = sycl::bit_cast<decltype(Result.m_Data)>(val);
543+
sycl::detail::memcpy(&Result.m_Data, &val, sizeof(Result));
505544
} else
506545
#endif // __SYCL_DEVICE_ONLY__
507546
{

Diff for: sycl/test/abi/layout_vec.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ SYCL_EXTERNAL void foo(sycl::vec<int, 4>) {}
1212

1313
// CHECK: 0 | class sycl::vec<int, 4>
1414
// ignore empty base classes
15-
// CHECK: 0 | struct std::array<int, 4> m_Data
16-
// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems
15+
// CHECK: 0 | DataType m_Data
1716
// CHECK-NEXT: | [sizeof=16, dsize=16, align=16,
1817
// CHECK-NEXT: | nvsize=16, nvalign=16]
1918

@@ -23,7 +22,6 @@ SYCL_EXTERNAL void foo(sycl::vec<bool, 16>) {}
2322

2423
// CHECK: 0 | class sycl::vec<_Bool, 16>
2524
// ignore empty base classes
26-
// CHECK: 0 | struct std::array<_Bool, 16> m_Data
27-
// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems
25+
// CHECK: 0 | DataType m_Data
2826
// CHECK-NEXT: | [sizeof=16, dsize=16, align=16,
2927
// CHECK-NEXT: | nvsize=16, nvalign=16]

Diff for: sycl/test/basic_tests/vectors/storage.cpp

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only
2+
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -fpreview-breaking-changes
3+
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE=1
4+
// expected-no-diagnostics
5+
6+
#include <sycl/vector.hpp>
7+
8+
#include <type_traits>
9+
10+
namespace sycl {
11+
namespace detail {
12+
template <typename T, int N> class vec_base_test {
13+
public:
14+
static void do_check() {
15+
constexpr bool uses_std_array =
16+
std::is_same_v<typename sycl::vec<T, N>::DataType, std::array<T, N>>;
17+
constexpr bool uses_plain_array =
18+
std::is_same_v<typename sycl::vec<T, N>::DataType, T[N]>;
19+
20+
constexpr bool std_array_and_plain_array_have_the_same_layout =
21+
sizeof(std::array<T, N>) == sizeof(T[N]) &&
22+
alignof(std::array<T, N>) == alignof(T[N]);
23+
24+
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || \
25+
__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
26+
static_assert(uses_plain_array,
27+
"We must use plain array regardless of "
28+
"layout, because user is opted-in for a potential ABI-break");
29+
#else
30+
static_assert(std_array_and_plain_array_have_the_same_layout ==
31+
uses_plain_array,
32+
"If layouts are the same, we must use safer plain array "
33+
"instead of std::array, or vice versa");
34+
static_assert(
35+
!std_array_and_plain_array_have_the_same_layout == uses_std_array,
36+
"If layouts are not the same, we must use std::array to preserve ABI");
37+
#endif
38+
}
39+
};
40+
} // namespace detail
41+
} // namespace sycl
42+
43+
int main() { sycl::detail::vec_base_test<int, 4>::do_check(); }

Diff for: sycl/test/check_device_code/vector/vector_bf16_builtins.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
6868
}
6969

7070
// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_(
71-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
71+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
7272
// CHECK-NEXT: entry:
7373
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16
7474
// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
@@ -122,7 +122,7 @@ SYCL_EXTERNAL auto TestFMax(vec<bfloat16, 3> a, vec<bfloat16, 3> b) {
122122
}
123123

124124
// CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE(
125-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
125+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
126126
// CHECK-NEXT: entry:
127127
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8
128128
// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4
@@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestIsNan(vec<bfloat16, 4> a) {
148148
}
149149

150150
// CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
151-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.38") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.38") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
151+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.32") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
152152
// CHECK-NEXT: entry:
153153
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
154154
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
@@ -184,7 +184,7 @@ SYCL_EXTERNAL auto TestFabs(vec<bfloat16, 8> a) {
184184
}
185185

186186
// CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
187-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.38") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.38") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
187+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.32") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
188188
// CHECK-NEXT: entry:
189189
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
190190
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
@@ -220,7 +220,7 @@ SYCL_EXTERNAL auto TestCeil(vec<bfloat16, 8> a) {
220220
}
221221

222222
// CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_(
223-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.48") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
223+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
224224
// CHECK-NEXT: entry:
225225
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64
226226
// CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2

Diff for: sycl/test/check_device_code/vector/vector_convert_bfloat.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec<bfloat16, 3> &inp) {
6262
}
6363

6464
// CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE(
65-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
65+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
6666
// CHECK-NEXT: entry:
6767
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]])
6868
// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]]
@@ -92,7 +92,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec<bfloat16, 3> &inp) {
9292
}
9393

9494
// CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE(
95-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.10") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
95+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.8") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
9696
// CHECK-NEXT: entry:
9797
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
9898
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]]
@@ -190,7 +190,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec<int, 3> &inp) {
190190
}
191191

192192
// CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE(
193-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
193+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
194194
// CHECK-NEXT: entry:
195195
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
196196
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]]
@@ -203,7 +203,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec<long long, 1> &inp) {
203203
}
204204

205205
// CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE(
206-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
206+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
207207
// CHECK-NEXT: entry:
208208
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]])
209209
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]]

0 commit comments

Comments
 (0)