Skip to content

[SYCL] Cherry-pick "Switch to use plain array in sycl::vec in more cases" to sycl-rel-6_0_0 #17697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 42 additions & 3 deletions sycl/include/sycl/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@
#endif
#endif // __clang__

// See vec::DataType definitions for more details
#ifndef __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES)
#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 1
#else
#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 0
#endif
#endif

#if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__)
#error "SYCL device compiler is built without ext_vector_type support"
#endif
Expand Down Expand Up @@ -84,6 +93,9 @@ struct elem {
};

namespace detail {
// To be defined in tests, trick to access vec's private methods
template <typename T1, int T2> class vec_base_test;

template <typename VecT, typename OperationLeftT, typename OperationRightT,
template <typename> class OperationCurrentT, int... Indexes>
class SwizzleOp;
Expand Down Expand Up @@ -142,7 +154,34 @@ class __SYCL_EBO vec

// This represent type of underlying value. There should be only one field
// in the class, so vec<float, 16> should be equal to float16 in memory.
using DataType = std::array<DataT, AdjustedNum>;
//
// In intel/llvm#14130 we incorrectly used std::array as an underlying storage
// for vec data. The problem with std::array is that it comes from the C++
// STL headers which we do not control and they may use something that is
// illegal in SYCL device code. One of specific examples is use of debug
// assertions in MSVC's STL implementation.
//
// The better approach is to use plain C++ array, but the problem here is that
// C++ specification does not provide any guarantees about the memory layout
// of std::array and therefore directly switching to it would technically be
// an ABI-break, even though the practical chances of encountering the issue
// are low.
//
// To play it safe, we only switch to use plain array if both its size and
// alignment match those of std::array, or unless the new behavior is forced
// via __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE or preview breaking changes mode.
using DataType = std::conditional_t<
#if __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
true,
#else
sizeof(std::array<DataT, AdjustedNum>) == sizeof(DataT[AdjustedNum]) &&
alignof(std::array<DataT, AdjustedNum>) ==
alignof(DataT[AdjustedNum]),
#endif
DataT[AdjustedNum], std::array<DataT, AdjustedNum>>;

// To allow testing of private methods
template <typename T1, int T2> friend class detail::vec_base_test;

#ifdef __SYCL_DEVICE_ONLY__
using element_type_for_vector_t = typename detail::map_type<
Expand Down Expand Up @@ -387,7 +426,7 @@ class __SYCL_EBO vec
typename vector_t_ = vector_t,
typename = typename std::enable_if_t<std::is_same_v<vector_t_, vector_t>>>
constexpr vec(vector_t_ openclVector) {
m_Data = sycl::bit_cast<DataType>(openclVector);
sycl::detail::memcpy(&m_Data, &openclVector, sizeof(openclVector));
}

/* @SYCL2020
Expand Down Expand Up @@ -501,7 +540,7 @@ class __SYCL_EBO vec
if constexpr (canUseNativeVectorConvert) {
auto val = detail::convertImpl<T, R, roundingMode, NumElements, OpenCLVecT,
OpenCLVecR>(NativeVector);
Result.m_Data = sycl::bit_cast<decltype(Result.m_Data)>(val);
sycl::detail::memcpy(&Result.m_Data, &val, sizeof(Result));
} else
#endif // __SYCL_DEVICE_ONLY__
{
Expand Down
6 changes: 2 additions & 4 deletions sycl/test/abi/layout_vec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ SYCL_EXTERNAL void foo(sycl::vec<int, 4>) {}

// CHECK: 0 | class sycl::vec<int, 4>
// ignore empty base classes
// CHECK: 0 | struct std::array<int, 4> m_Data
// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems
// CHECK: 0 | DataType m_Data
// CHECK-NEXT: | [sizeof=16, dsize=16, align=16,
// CHECK-NEXT: | nvsize=16, nvalign=16]

Expand All @@ -23,7 +22,6 @@ SYCL_EXTERNAL void foo(sycl::vec<bool, 16>) {}

// CHECK: 0 | class sycl::vec<_Bool, 16>
// ignore empty base classes
// CHECK: 0 | struct std::array<_Bool, 16> m_Data
// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems
// CHECK: 0 | DataType m_Data
// CHECK-NEXT: | [sizeof=16, dsize=16, align=16,
// CHECK-NEXT: | nvsize=16, nvalign=16]
43 changes: 43 additions & 0 deletions sycl/test/basic_tests/vectors/storage.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -fpreview-breaking-changes
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE=1
// expected-no-diagnostics

#include <sycl/vector.hpp>

#include <type_traits>

namespace sycl {
namespace detail {
template <typename T, int N> class vec_base_test {
public:
static void do_check() {
constexpr bool uses_std_array =
std::is_same_v<typename sycl::vec<T, N>::DataType, std::array<T, N>>;
constexpr bool uses_plain_array =
std::is_same_v<typename sycl::vec<T, N>::DataType, T[N]>;

constexpr bool std_array_and_plain_array_have_the_same_layout =
sizeof(std::array<T, N>) == sizeof(T[N]) &&
alignof(std::array<T, N>) == alignof(T[N]);

#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || \
__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
static_assert(uses_plain_array,
"We must use plain array regardless of "
"layout, because user is opted-in for a potential ABI-break");
#else
static_assert(std_array_and_plain_array_have_the_same_layout ==
uses_plain_array,
"If layouts are the same, we must use safer plain array "
"instead of std::array, or vice versa");
static_assert(
!std_array_and_plain_array_have_the_same_layout == uses_std_array,
"If layouts are not the same, we must use std::array to preserve ABI");
#endif
}
};
} // namespace detail
} // namespace sycl

int main() { sycl::detail::vec_base_test<int, 4>::do_check(); }
10 changes: 5 additions & 5 deletions sycl/test/check_device_code/vector/vector_bf16_builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16
// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -122,7 +122,7 @@ SYCL_EXTERNAL auto TestFMax(vec<bfloat16, 3> a, vec<bfloat16, 3> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8
// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4
Expand All @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestIsNan(vec<bfloat16, 4> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.38") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.38") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.32") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -184,7 +184,7 @@ SYCL_EXTERNAL auto TestFabs(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.38") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.38") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.32") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -220,7 +220,7 @@ SYCL_EXTERNAL auto TestCeil(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.48") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64
// CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec<bfloat16, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]])
// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]]
Expand Down Expand Up @@ -92,7 +92,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec<bfloat16, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.10") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.8") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]]
Expand Down Expand Up @@ -190,7 +190,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec<int, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]]
Expand All @@ -203,7 +203,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec<long long, 1> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]]
Expand Down
Loading
Loading