From b7d51413a617798e98846903ac0aa49463000b01 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 27 Mar 2025 19:50:16 +0100 Subject: [PATCH 1/2] [SYCL] Switch to use plain array in `sycl::vec` in more cases The problem with using `std::array` in `sycl::vec` is that we cannot compile it in some environments (namely, Windows) because the former may use something that is illegal in SYCL device code. intel/llvm#17025 fixed that, but only did so under preview breaking changes mode, which does not satisfy some of our customers immediately. This PR introduces two main changes: - it allows to opt-in for new behavior through passing `-D__SYCL_USE_NEW_VEC_IMPL=1` macro without using `-fpreview-breaking-changes` flag. That allows for a more gradual opt-in from customers who are interested in this fix - it switches the imlpementation to use the new approach with C-style arrays if their size and alignment is the same as for the corresponding `std::array` - in that case their memory layout is expected to be absolutely the same and therefore it should be safe to use the new approach without fear of some ABI incompatibilities. This allows for customers to benefit from the fix without specifying any extra macro (which should be the case for the most common platforms out there) This is a cherry-pick of intel/llvm#17656 --- .../detail/type_traits/vec_marray_traits.hpp | 134 ++++++++++++++++++ sycl/include/sycl/vector.hpp | 41 +++++- sycl/test/abi/layout_vec.cpp | 6 +- sycl/test/basic_tests/vectors/storage.cpp | 43 ++++++ sycl/test/regression/vec_array_windows.cpp | 23 +++ 5 files changed, 242 insertions(+), 5 deletions(-) create mode 100644 sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp create mode 100644 sycl/test/basic_tests/vectors/storage.cpp create mode 100644 sycl/test/regression/vec_array_windows.cpp diff --git a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp new file mode 100644 index 0000000000000..6ce39bf6a072a --- /dev/null +++ b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp @@ -0,0 +1,134 @@ +//==---------- Forward declarations and traits for vector/marray types -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { +template class __SYCL_EBO vec; + +template class marray; + +namespace detail { +template class OperationCurrentT, int... Indexes> +class SwizzleOp; + +// Utility for converting a swizzle to a vector or preserve the type if it isn't +// a swizzle. +template struct simplify_if_swizzle { + using type = T; +}; + +template class OperationCurrentT, int... Indexes> +struct simplify_if_swizzle> { + using type = vec; +}; + +template +using simplify_if_swizzle_t = typename simplify_if_swizzle::type; + +// --------- is_* traits ------------------ // +template struct is_vec : std::false_type {}; +template struct is_vec> : std::true_type {}; +template constexpr bool is_vec_v = is_vec::value; + +template +struct is_ext_vector : std::false_type {}; +template +struct is_valid_type_for_ext_vector : std::false_type {}; +#if defined(__has_extension) +#if __has_extension(attribute_ext_vector_type) +template +using ext_vector = T __attribute__((ext_vector_type(N))); +template +struct is_ext_vector> : std::true_type {}; +template +struct is_valid_type_for_ext_vector>> + : std::true_type {}; +#endif +#endif +template +inline constexpr bool is_ext_vector_v = is_ext_vector::value; +template +inline constexpr bool is_valid_type_for_ext_vector_v = + is_valid_type_for_ext_vector::value; + +template struct is_swizzle : std::false_type {}; +template class OperationCurrentT, int... Indexes> +struct is_swizzle> : std::true_type {}; +template constexpr bool is_swizzle_v = is_swizzle::value; + +template +constexpr bool is_vec_or_swizzle_v = is_vec_v || is_swizzle_v; + +template struct is_marray : std::false_type {}; +template +struct is_marray> : std::true_type {}; +template constexpr bool is_marray_v = is_marray::value; + +// --------- num_elements trait ------------------ // +template +struct num_elements : std::integral_constant {}; +template +struct num_elements> : std::integral_constant {}; +template +struct num_elements> + : std::integral_constant {}; +#if defined(__has_extension) +#if __has_extension(attribute_ext_vector_type) +template +struct num_elements + : std::integral_constant {}; +#endif +#endif +template class OperationCurrentT, int... Indexes> +struct num_elements> + : std::integral_constant {}; + +template +inline constexpr std::size_t num_elements_v = num_elements::value; + +// --------- element_type trait ------------------ // +template struct element_type { + using type = T; +}; +template struct element_type> { + using type = T; +}; +template struct element_type> { + using type = T; +}; +#if defined(__has_extension) +#if __has_extension(attribute_ext_vector_type) +template +struct element_type { + using type = T; +}; +#endif +#endif +template using element_type_t = typename element_type::type; + +template +inline constexpr bool is_allowed_vec_size_v = + N == 1 || N == 2 || N == 3 || N == 4 || N == 8 || N == 16; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index ea935032ba445..1d7e2cdd44926 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -22,6 +22,15 @@ #endif #endif // __clang__ +// See vec::DataType definitions for more details +#ifndef __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) +#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 1 +#else +#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 0 +#endif +#endif + #if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__) #error "SYCL device compiler is built without ext_vector_type support" #endif @@ -84,6 +93,9 @@ struct elem { }; namespace detail { +// To be defined in tests, trick to access vec's private methods +template class vec_base_test; + template class OperationCurrentT, int... Indexes> class SwizzleOp; @@ -142,7 +154,34 @@ class __SYCL_EBO vec // This represent type of underlying value. There should be only one field // in the class, so vec should be equal to float16 in memory. - using DataType = std::array; + // + // In intel/llvm#14130 we incorrectly used std::array as an underlying storage + // for vec data. The problem with std::array is that it comes from the C++ + // STL headers which we do not control and they may use something that is + // illegal in SYCL device code. One of specific examples is use of debug + // assertions in MSVC's STL implementation. + // + // The better approach is to use plain C++ array, but the problem here is that + // C++ specification does not provide any guarantees about the memory layout + // of std::array and therefore directly switching to it would technically be + // an ABI-break, even though the practical chances of encountering the issue + // are low. + // + // To play it safe, we only switch to use plain array if both its size and + // alignment match those of std::array, or unless the new behavior is forced + // via __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE or preview breaking changes mode. + using DataType = std::conditional_t< +#if __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE + true, +#else + sizeof(std::array) == sizeof(DataT[AdjustedNum]) && + alignof(std::array) == + alignof(DataT[AdjustedNum]), +#endif + DataT[AdjustedNum], std::array>; + + // To allow testing of private methods + template friend class detail::vec_base_test; #ifdef __SYCL_DEVICE_ONLY__ using element_type_for_vector_t = typename detail::map_type< diff --git a/sycl/test/abi/layout_vec.cpp b/sycl/test/abi/layout_vec.cpp index 1f61d0fcd4666..06a9a7959a530 100644 --- a/sycl/test/abi/layout_vec.cpp +++ b/sycl/test/abi/layout_vec.cpp @@ -12,8 +12,7 @@ SYCL_EXTERNAL void foo(sycl::vec) {} // CHECK: 0 | class sycl::vec // ignore empty base classes -// CHECK: 0 | struct std::array m_Data -// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems +// CHECK: 0 | DataType m_Data // CHECK-NEXT: | [sizeof=16, dsize=16, align=16, // CHECK-NEXT: | nvsize=16, nvalign=16] @@ -23,7 +22,6 @@ SYCL_EXTERNAL void foo(sycl::vec) {} // CHECK: 0 | class sycl::vec<_Bool, 16> // ignore empty base classes -// CHECK: 0 | struct std::array<_Bool, 16> m_Data -// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems +// CHECK: 0 | DataType m_Data // CHECK-NEXT: | [sizeof=16, dsize=16, align=16, // CHECK-NEXT: | nvsize=16, nvalign=16] diff --git a/sycl/test/basic_tests/vectors/storage.cpp b/sycl/test/basic_tests/vectors/storage.cpp new file mode 100644 index 0000000000000..bbb14cfe24227 --- /dev/null +++ b/sycl/test/basic_tests/vectors/storage.cpp @@ -0,0 +1,43 @@ +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -fpreview-breaking-changes +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE=1 +// expected-no-diagnostics + +#include + +#include + +namespace sycl { +namespace detail { +template class vec_base_test { +public: + static void do_check() { + constexpr bool uses_std_array = + std::is_same_v::DataType, std::array>; + constexpr bool uses_plain_array = + std::is_same_v::DataType, T[N]>; + + constexpr bool std_array_and_plain_array_have_the_same_layout = + sizeof(std::array) == sizeof(T[N]) && + alignof(std::array) == alignof(T[N]); + +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || \ + __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE + static_assert(uses_plain_array, + "We must use plain array regardless of " + "layout, because user is opted-in for a potential ABI-break"); +#else + static_assert(std_array_and_plain_array_have_the_same_layout == + uses_plain_array, + "If layouts are the same, we must use safer plain array " + "instead of std::array, or vice versa"); + static_assert( + !std_array_and_plain_array_have_the_same_layout == uses_std_array, + "If layouts are not the same, we must use std::array to preserve ABI"); +#endif + } +}; +} // namespace detail +} // namespace sycl + +int main() { sycl::detail::vec_base_test::do_check(); } diff --git a/sycl/test/regression/vec_array_windows.cpp b/sycl/test/regression/vec_array_windows.cpp new file mode 100644 index 0000000000000..d9511de372f54 --- /dev/null +++ b/sycl/test/regression/vec_array_windows.cpp @@ -0,0 +1,23 @@ +// Test to isolate sycl::vec regression after +// https://github.com/intel/llvm/pull/14130. This PR caused sycl::vec to use +// std::array as its underlying storage. However, operations on std::array +// may emit debug-mode-only functions, on which the device compiler may fail. + +// REQUIRES: windows + +// RUN: %clangxx -fsycl -D_DEBUG %s -fsycl-device-only -Xclang -verify %s -Xclang -verify-ignore-unexpected=note,warning +// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -D_DEBUG -fsycl-device-only %s %} + +#include + +// expected-no-diagnostics +// +// Our current implementation automatically opts-in for a new implementation if +// that is possible without breaking ABI. +// However, depending on the environment (used STL implementation, in +// particular) it may not be the case. Therefore, the lines below are kept for +// reference of how an error would look like in a problematic environment. +// not-expected-error@* {{SYCL kernel cannot call a variadic function}} +// not-expected-error@* {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} +// not-expected-error@* {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} +SYCL_EXTERNAL auto GetFirstElement(sycl::vec v) { return v[0]; } From 80a56402f82566e4cee367027f219a7b5ffb3c8f Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 27 Mar 2025 15:47:16 -0700 Subject: [PATCH 2/2] Resolve merge conflicts --- .../detail/type_traits/vec_marray_traits.hpp | 134 ------------------ sycl/include/sycl/vector.hpp | 4 +- .../vector/vector_bf16_builtins.cpp | 10 +- .../vector/vector_convert_bfloat.cpp | 8 +- .../vector/vector_math_ops.cpp | 90 ++++++------ 5 files changed, 56 insertions(+), 190 deletions(-) delete mode 100644 sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp diff --git a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp deleted file mode 100644 index 6ce39bf6a072a..0000000000000 --- a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp +++ /dev/null @@ -1,134 +0,0 @@ -//==---------- Forward declarations and traits for vector/marray types -----==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - -#include - -namespace sycl { -inline namespace _V1 { -template class __SYCL_EBO vec; - -template class marray; - -namespace detail { -template class OperationCurrentT, int... Indexes> -class SwizzleOp; - -// Utility for converting a swizzle to a vector or preserve the type if it isn't -// a swizzle. -template struct simplify_if_swizzle { - using type = T; -}; - -template class OperationCurrentT, int... Indexes> -struct simplify_if_swizzle> { - using type = vec; -}; - -template -using simplify_if_swizzle_t = typename simplify_if_swizzle::type; - -// --------- is_* traits ------------------ // -template struct is_vec : std::false_type {}; -template struct is_vec> : std::true_type {}; -template constexpr bool is_vec_v = is_vec::value; - -template -struct is_ext_vector : std::false_type {}; -template -struct is_valid_type_for_ext_vector : std::false_type {}; -#if defined(__has_extension) -#if __has_extension(attribute_ext_vector_type) -template -using ext_vector = T __attribute__((ext_vector_type(N))); -template -struct is_ext_vector> : std::true_type {}; -template -struct is_valid_type_for_ext_vector>> - : std::true_type {}; -#endif -#endif -template -inline constexpr bool is_ext_vector_v = is_ext_vector::value; -template -inline constexpr bool is_valid_type_for_ext_vector_v = - is_valid_type_for_ext_vector::value; - -template struct is_swizzle : std::false_type {}; -template class OperationCurrentT, int... Indexes> -struct is_swizzle> : std::true_type {}; -template constexpr bool is_swizzle_v = is_swizzle::value; - -template -constexpr bool is_vec_or_swizzle_v = is_vec_v || is_swizzle_v; - -template struct is_marray : std::false_type {}; -template -struct is_marray> : std::true_type {}; -template constexpr bool is_marray_v = is_marray::value; - -// --------- num_elements trait ------------------ // -template -struct num_elements : std::integral_constant {}; -template -struct num_elements> : std::integral_constant {}; -template -struct num_elements> - : std::integral_constant {}; -#if defined(__has_extension) -#if __has_extension(attribute_ext_vector_type) -template -struct num_elements - : std::integral_constant {}; -#endif -#endif -template class OperationCurrentT, int... Indexes> -struct num_elements> - : std::integral_constant {}; - -template -inline constexpr std::size_t num_elements_v = num_elements::value; - -// --------- element_type trait ------------------ // -template struct element_type { - using type = T; -}; -template struct element_type> { - using type = T; -}; -template struct element_type> { - using type = T; -}; -#if defined(__has_extension) -#if __has_extension(attribute_ext_vector_type) -template -struct element_type { - using type = T; -}; -#endif -#endif -template using element_type_t = typename element_type::type; - -template -inline constexpr bool is_allowed_vec_size_v = - N == 1 || N == 2 || N == 3 || N == 4 || N == 8 || N == 16; - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 1d7e2cdd44926..f71440c9015f0 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -426,7 +426,7 @@ class __SYCL_EBO vec typename vector_t_ = vector_t, typename = typename std::enable_if_t>> constexpr vec(vector_t_ openclVector) { - m_Data = sycl::bit_cast(openclVector); + sycl::detail::memcpy(&m_Data, &openclVector, sizeof(openclVector)); } /* @SYCL2020 @@ -540,7 +540,7 @@ class __SYCL_EBO vec if constexpr (canUseNativeVectorConvert) { auto val = detail::convertImpl(NativeVector); - Result.m_Data = sycl::bit_cast(val); + sycl::detail::memcpy(&Result.m_Data, &val, sizeof(Result)); } else #endif // __SYCL_DEVICE_ONLY__ { diff --git a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp index 517bcba4c3732..f4edcaba952dc 100644 --- a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp +++ b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp @@ -68,7 +68,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -122,7 +122,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.38") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.38") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.32") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -184,7 +184,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.38") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.38") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.32") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -220,7 +220,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.48") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp index a4c9c185614a2..419e17edbf85b 100644 --- a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp @@ -62,7 +62,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] @@ -92,7 +92,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.10") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.8") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] @@ -190,7 +190,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] @@ -203,7 +203,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index 678548e9524c9..2aaefbee6cfa4 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -44,7 +44,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META19]] @@ -57,7 +57,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.10") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META22:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.8") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META22:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA11]], !noalias [[META23]] @@ -71,7 +71,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable sret(%"class.sycl::_V1::vec.15") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META26:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable sret(%"class.sycl::_V1::vec.12") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META26:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META27]] @@ -84,11 +84,11 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ult i64 [[I_0_I_I]], 4 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLIBEENS0_3VECIBLI4EEERKS4_S6__EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX_I_I_I_I_I]], align 1, !tbaa [[TBAA11]], !alias.scope [[META27]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA11]], !alias.scope [[META27]] // CHECK-NEXT: [[CMP3_I_I:%.*]] = icmp ne i8 [[TMP2]], 0 // CHECK-NEXT: [[FROMBOOL_I_I:%.*]] = zext i1 [[CMP3_I_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I_I]], align 1, !tbaa [[TBAA30:![0-9]+]], !alias.scope [[META27]] +// CHECK-NEXT: store i8 [[FROMBOOL_I_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA30:![0-9]+]], !alias.scope [[META27]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplIbEENS0_3vecIbLi4EEERKS4_S6_.exit: @@ -97,7 +97,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META34:![0-9]+]] !sycl_used_aspects [[META35:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.16") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META34:![0-9]+]] !sycl_used_aspects [[META35:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META37]] @@ -110,7 +110,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META40:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.20") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META40:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) @@ -123,17 +123,17 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 3 // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILPLINS0_3EXT6ONEAPI8BFLOAT16EEENS0_3VECIS5_LI3EEERKS7_S9__EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I10_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META41]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META44:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I10_I]]) #[[ATTR8]], !noalias [[META44]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META44:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I10_I]]) #[[ATTR8]], !noalias [[META44]] // CHECK-NEXT: [[ADD_I_I:%.*]] = fadd float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] // CHECK-NEXT: store float [[ADD_I_I]], ptr [[REF_TMP_I_I]], align 4, !tbaa [[TBAA47:![0-9]+]], !noalias [[META44]] // CHECK-NEXT: [[CALL_I_I3_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR8]], !noalias [[META44]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META41]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I12_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CALL_I_I3_I_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I12_I]], align 2, !tbaa [[TBAA49:![0-9]+]], !alias.scope [[META41]] +// CHECK-NEXT: [[ARRAYIDX_I12_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I]], ptr addrspace(4) [[ARRAYIDX_I12_I]], align 2, !tbaa [[TBAA49:![0-9]+]], !alias.scope [[META41]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENS0_3vecIS5_Li3EEERKS7_S9_.exit: @@ -147,7 +147,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.30") align 64 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.30") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.30") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META52:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.24") align 64 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.24") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.24") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META52:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA11]], !noalias [[META53]] @@ -162,7 +162,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func noundef <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.35") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.35") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] !srcloc [[META56:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.28") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.28") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] !srcloc [[META56:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 // CHECK-NEXT: [[LOADVEC4_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 @@ -176,7 +176,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 2 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.45") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.45") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META57:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 2 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META57:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA11]], !noalias [[META58]] @@ -191,7 +191,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.50") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META61:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META61:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META62]] @@ -206,7 +206,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.60") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.65") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.65") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META65:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.48") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.52") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.52") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META65:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) @@ -218,14 +218,14 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 4 // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILGTINS0_3EXT6ONEAPI8BFLOAT16EEENS0_3VECISLI4EEERKNS6_IS5_LI4EEESA__EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I13_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META66]] -// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I13_I]]) #[[ATTR8]], !noalias [[META66]] +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I13_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I]]) #[[ATTR8]], !noalias [[META66]] +// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I13_I]]) #[[ATTR8]], !noalias [[META66]] // CHECK-NEXT: [[CMP_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] // CHECK-NEXT: [[CONV5_I:%.*]] = sext i1 [[CMP_I_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I_I15_I:%.*]] = getelementptr inbounds [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CONV5_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I15_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META66]] +// CHECK-NEXT: [[ARRAYIDX_I15_I:%.*]] = getelementptr inbounds [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[CONV5_I]], ptr addrspace(4) [[ARRAYIDX_I15_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META66]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENS0_3vecIsLi4EEERKNS6_IS5_Li4EEESA_.exit: @@ -239,7 +239,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.69") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.69") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.56") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META71]] @@ -247,13 +247,13 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <3 x i1> [[CMP_I]] to <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I2_I:%.*]] = shufflevector <3 x i32> [[SEXT_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I2_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA11]], !alias.scope [[META71]] +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I2_I]], ptr addrspace(4) [[AGG_RESULT]], align 1, !alias.scope [[META71]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.74") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META74:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.60") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.60") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META74:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META75]] @@ -265,42 +265,42 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.78") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.78") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META78:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.64") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.64") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META78:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META79]] // CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[TMP0]], -// CHECK-NEXT: store <16 x i8> [[NOT_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA11]], !alias.scope [[META79]] +// CHECK-NEXT: store <16 x i8> [[NOT_I]], ptr addrspace(4) [[AGG_RESULT]], align 1, !alias.scope [[META79]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.83") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META82:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.68") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META82:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META83]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA11]], !alias.scope [[META83]] +// CHECK-NEXT: store <4 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 1, !alias.scope [[META83]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.88") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.93") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META86:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.72") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.76") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META86:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META87]] // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i16> -// CHECK-NEXT: store <2 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA11]], !alias.scope [[META87]] +// CHECK-NEXT: store <2 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 1, !alias.scope [[META87]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.55") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META90:![0-9]+]] !sycl_used_aspects [[META35]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.44") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META90:![0-9]+]] !sycl_used_aspects [[META35]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META91]] @@ -311,7 +311,7 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.98") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META94:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.80") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META94:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) @@ -322,12 +322,12 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 3 // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META95]] +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I]]) #[[ATTR8]], !noalias [[META95]] // CHECK-NEXT: [[TOBOOL_I:%.*]] = fcmp oeq float [[CALL_I_I_I]], 0.000000e+00 // CHECK-NEXT: [[CONV3_I:%.*]] = sext i1 [[TOBOOL_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I_I10_I:%.*]] = getelementptr inbounds [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CONV3_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I10_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META95]] +// CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[CONV3_I]], ptr addrspace(4) [[ARRAYIDX_I10_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META95]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP98:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: @@ -336,7 +336,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.102") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.102") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META99:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.84") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.84") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META99:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) @@ -350,13 +350,13 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 16 // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META100]] +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I]]) #[[ATTR8]], !noalias [[META100]] // CHECK-NEXT: [[FNEG_I:%.*]] = fneg float [[CALL_I_I_I]] // CHECK-NEXT: store float [[FNEG_I]], ptr [[REF_TMP_I]], align 4, !tbaa [[TBAA47]], !noalias [[META100]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I9_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] // CHECK-NEXT: [[CALL_I_I10_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I]]) #[[ATTR8]], !noalias [[META100]] -// CHECK-NEXT: store i16 [[CALL_I_I10_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I9_I]], align 2, !tbaa [[TBAA103:![0-9]+]], !alias.scope [[META100]] +// CHECK-NEXT: store i16 [[CALL_I_I10_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 2, !tbaa [[TBAA103:![0-9]+]], !alias.scope [[META100]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP105:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: