Skip to content

Commit 64139c5

Browse files
junparserserge-sans-paille
authored andcommitted
Add slide_left&slide_right for avx512vbmi
1 parent 5421aba commit 64139c5

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

include/xsimd/arch/xsimd_avx512vbmi.hpp

+60
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,64 @@
1717

1818
#include "../types/xsimd_avx512vbmi_register.hpp"
1919

20+
namespace xsimd
21+
{
22+
23+
namespace kernel
24+
{
25+
using namespace types;
26+
27+
namespace detail
28+
{
29+
template <size_t N, size_t... Is>
30+
constexpr std::array<uint8_t, sizeof...(Is)> make_slide_left_bytes_pattern(::xsimd::detail::index_sequence<Is...>)
31+
{
32+
return { (Is >= N ? Is - N : 0)... };
33+
}
34+
35+
template <size_t N, size_t... Is>
36+
constexpr std::array<uint8_t, sizeof...(Is)> make_slide_right_bytes_pattern(::xsimd::detail::index_sequence<Is...>)
37+
{
38+
return { (Is < (64 - N) ? Is + N : 0)... };
39+
}
40+
}
41+
42+
// slide_left
43+
template <size_t N, class A, class T>
44+
XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<avx512vbmi>) noexcept
45+
{
46+
if (N == 0)
47+
{
48+
return x;
49+
}
50+
if (N >= 64)
51+
{
52+
return batch<T, A>(T(0));
53+
}
54+
55+
__mmask64 mask = 0xFFFFFFFFFFFFFFFFull << (N & 63);
56+
alignas(A::alignment()) auto slide_pattern = detail::make_slide_left_bytes_pattern<N>(::xsimd::detail::make_index_sequence<512 / 8>());
57+
return _mm512_maskz_permutexvar_epi8(mask, _mm512_load_epi32(slide_pattern.data()), x);
58+
}
59+
60+
// slide_right
61+
template <size_t N, class A, class T>
62+
XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<avx512vbmi>) noexcept
63+
{
64+
if (N == 0)
65+
{
66+
return x;
67+
}
68+
if (N >= 64)
69+
{
70+
return batch<T, A>(T(0));
71+
}
72+
__mmask64 mask = 0xFFFFFFFFFFFFFFFFull >> (N & 63);
73+
alignas(A::alignment()) auto slide_pattern = detail::make_slide_right_bytes_pattern<N>(::xsimd::detail::make_index_sequence<512 / 8>());
74+
return _mm512_maskz_permutexvar_epi8(mask, _mm512_load_epi32(slide_pattern.data()), x);
75+
}
76+
77+
}
78+
}
79+
2080
#endif

0 commit comments

Comments
 (0)