Skip to content

Commit e66da98

Browse files
committed
add first() function to extract the first lane from a batch. Implemented only for x86_64 architecture
1 parent d92c6d4 commit e66da98

File tree

7 files changed

+223
-1
lines changed

7 files changed

+223
-1
lines changed

include/xsimd/arch/common/xsimd_common_memory.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,25 @@ namespace xsimd
260260
return buffer[i];
261261
}
262262

263+
// first
264+
template <class A, class T>
265+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<common>) noexcept
266+
{
267+
return get(self, 0, common {});
268+
}
269+
270+
template <class A, class T>
271+
XSIMD_INLINE T first(batch_bool<T, A> const& self, requires_arch<common>) noexcept
272+
{
273+
return get(self, 0, common {});
274+
}
275+
276+
template <class A, class T>
277+
XSIMD_INLINE auto first(batch<std::complex<T>, A> const& self, requires_arch<common>) noexcept -> typename batch<std::complex<T>, A>::value_type
278+
{
279+
return get(self, 0, common {});
280+
}
281+
263282
// load
264283
template <class A, class T>
265284
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<common>) noexcept

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
namespace xsimd
2222
{
23-
2423
namespace kernel
2524
{
2625
using namespace types;
@@ -1861,6 +1860,58 @@ namespace xsimd
18611860
auto hi = _mm256_unpackhi_pd(self, other);
18621861
return _mm256_insertf128_pd(lo, _mm256_castpd256_pd128(hi), 1);
18631862
}
1863+
1864+
// first
1865+
template <class A>
1866+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<avx>) noexcept
1867+
{
1868+
return _mm256_cvtss_f32(self);
1869+
}
1870+
1871+
template <class A>
1872+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<avx>) noexcept
1873+
{
1874+
return _mm256_cvtsd_f64(self);
1875+
}
1876+
1877+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1878+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<avx>) noexcept
1879+
{
1880+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1881+
{
1882+
return static_cast<T>(_mm256_cvtsi256_si32(self) & 0xFF);
1883+
}
1884+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1885+
{
1886+
return static_cast<T>(_mm256_cvtsi256_si32(self) & 0xFFFF);
1887+
}
1888+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1889+
{
1890+
return static_cast<T>(_mm256_cvtsi256_si32(self));
1891+
}
1892+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1893+
{
1894+
__m128i low = _mm256_castsi256_si128(self);
1895+
return static_cast<T>(_mm_cvtsi128_si64(low));
1896+
}
1897+
else
1898+
{
1899+
assert(false && "unsupported arch/op combination");
1900+
return {};
1901+
}
1902+
}
1903+
1904+
template <class A, class T>
1905+
XSIMD_INLINE std::complex<T> first(batch<std::complex<T>, A> const& self, requires_arch<avx>) noexcept
1906+
{
1907+
return { first(self.real(), A {}), first(self.imag(), A {}) };
1908+
}
1909+
1910+
template <class A, class T>
1911+
XSIMD_INLINE bool first(batch_bool<T, A> const& self, requires_arch<avx>) noexcept
1912+
{
1913+
return first(batch<T, A>(self), A {});
1914+
}
18641915
}
18651916
}
18661917

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,6 +2339,57 @@ namespace xsimd
23392339
2));
23402340
}
23412341

2342+
// first
2343+
template <class A>
2344+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<avx512f>) noexcept
2345+
{
2346+
return _mm512_cvtss_f32(self);
2347+
}
2348+
2349+
template <class A>
2350+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<avx512f>) noexcept
2351+
{
2352+
return _mm512_cvtsd_f64(self);
2353+
}
2354+
2355+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2356+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<avx512f>) noexcept
2357+
{
2358+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
2359+
{
2360+
return static_cast<T>(_mm512_cvtsi512_si32(self) & 0xFF);
2361+
}
2362+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
2363+
{
2364+
return static_cast<T>(_mm512_cvtsi512_si32(self) & 0xFFFF);
2365+
}
2366+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
2367+
{
2368+
return static_cast<T>(_mm512_cvtsi512_si32(self));
2369+
}
2370+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
2371+
{
2372+
return static_cast<T>(_mm_cvtsi128_si64(_mm512_castsi512_si128(self)));
2373+
}
2374+
else
2375+
{
2376+
assert(false && "unsupported arch/op combination");
2377+
return {};
2378+
}
2379+
}
2380+
2381+
template <class A, class T>
2382+
XSIMD_INLINE std::complex<T> first(batch<std::complex<T>, A> const& self, requires_arch<avx512f>) noexcept
2383+
{
2384+
return { first(self.real(), A {}), first(self.imag(), A {}) };
2385+
}
2386+
2387+
template <class A, class T>
2388+
XSIMD_INLINE bool first(batch_bool<T, A> const& self, requires_arch<avx512f>) noexcept
2389+
{
2390+
return first(batch<T, A>(self), A {});
2391+
}
2392+
23422393
}
23432394

23442395
}

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,57 @@ namespace xsimd
17821782
{
17831783
return _mm_unpacklo_pd(self, other);
17841784
}
1785+
1786+
// first
1787+
template <class A>
1788+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
1789+
{
1790+
return _mm_cvtss_f32(self);
1791+
}
1792+
1793+
template <class A>
1794+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
1795+
{
1796+
return _mm_cvtsd_f64(self);
1797+
}
1798+
1799+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1800+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
1801+
{
1802+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1803+
{
1804+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
1805+
}
1806+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1807+
{
1808+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
1809+
}
1810+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1811+
{
1812+
return static_cast<T>(_mm_cvtsi128_si32(self));
1813+
}
1814+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1815+
{
1816+
return static_cast<T>(_mm_cvtsi128_si64(self));
1817+
}
1818+
else
1819+
{
1820+
assert(false && "unsupported arch/op combination");
1821+
return {};
1822+
}
1823+
}
1824+
1825+
template <class A, class T>
1826+
XSIMD_INLINE std::complex<T> first(batch<std::complex<T>, A> const& self, requires_arch<sse2>) noexcept
1827+
{
1828+
return { first(self.real(), A {}), first(self.imag(), A {}) };
1829+
}
1830+
1831+
template <class A, class T>
1832+
XSIMD_INLINE bool first(batch_bool<T, A> const& self, requires_arch<sse2>) noexcept
1833+
{
1834+
return first(batch<T, A>(self), A {});
1835+
}
17851836
}
17861837
}
17871838

include/xsimd/types/xsimd_batch.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ namespace xsimd
159159

160160
XSIMD_INLINE T get(std::size_t i) const noexcept;
161161

162+
XSIMD_INLINE T first() const noexcept;
163+
162164
// comparison operators. Defined as friend to enable automatic
163165
// conversion of parameters from scalar to batch, at the cost of using a
164166
// proxy implementation from details::.
@@ -314,6 +316,8 @@ namespace xsimd
314316

315317
XSIMD_INLINE bool get(std::size_t i) const noexcept;
316318

319+
XSIMD_INLINE bool first() const noexcept;
320+
317321
// mask operations
318322
XSIMD_INLINE uint64_t mask() const noexcept;
319323
XSIMD_INLINE static batch_bool from_mask(uint64_t mask) noexcept;
@@ -405,6 +409,8 @@ namespace xsimd
405409

406410
XSIMD_INLINE value_type get(std::size_t i) const noexcept;
407411

412+
XSIMD_INLINE value_type first() const noexcept;
413+
408414
#ifdef XSIMD_ENABLE_XTL_COMPLEX
409415
// xtl-related methods
410416
template <bool i3ec>
@@ -693,6 +699,16 @@ namespace xsimd
693699
return kernel::get(*this, i, A {});
694700
}
695701

702+
/**
703+
* Retrieve the first scalar element in this batch.
704+
*/
705+
template <class T, class A>
706+
XSIMD_INLINE T batch<T, A>::first() const noexcept
707+
{
708+
detail::static_check_supported_config<T, A>();
709+
return kernel::first(*this, A {});
710+
}
711+
696712
/******************************
697713
* batch comparison operators *
698714
******************************/
@@ -1005,6 +1021,13 @@ namespace xsimd
10051021
return kernel::get(*this, i, A {});
10061022
}
10071023

1024+
template <class T, class A>
1025+
XSIMD_INLINE bool batch_bool<T, A>::first() const noexcept
1026+
{
1027+
detail::static_check_supported_config<T, A>();
1028+
return kernel::first(*this, A {});
1029+
}
1030+
10081031
/***********************************
10091032
* batch_bool comparison operators *
10101033
***********************************/
@@ -1077,6 +1100,7 @@ namespace xsimd
10771100
{
10781101
}
10791102

1103+
10801104
template <class T, class A>
10811105
template <class U, class... V, size_t I, size_t... Is>
10821106
XSIMD_INLINE auto batch_bool<T, A>::make_register(detail::index_sequence<I, Is...>, U u, V... v) noexcept -> register_type
@@ -1248,6 +1272,13 @@ namespace xsimd
12481272
return kernel::get(*this, i, A {});
12491273
}
12501274

1275+
template <class T, class A>
1276+
XSIMD_INLINE auto batch<std::complex<T>, A>::first() const noexcept -> value_type
1277+
{
1278+
detail::static_check_supported_config<std::complex<T>, A>();
1279+
return kernel::first(*this, A {});
1280+
}
1281+
12511282
/**************************************
12521283
* batch<complex> xtl-related methods *
12531284
**************************************/

test/test_batch.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,12 @@ struct batch_test
152152
}
153153
}
154154

155+
void test_first_element() const
156+
{
157+
batch_type res = batch_lhs();
158+
CHECK_EQ(res.first(), lhs[0]);
159+
}
160+
155161
void test_arithmetic() const
156162
{
157163
// +batch
@@ -934,6 +940,11 @@ TEST_CASE_TEMPLATE("[batch]", B, BATCH_TYPES)
934940
Test.test_access_operator();
935941
}
936942

943+
SUBCASE("first element")
944+
{
945+
Test.test_first_element();
946+
}
947+
937948
SUBCASE("arithmetic")
938949
{
939950
Test.test_arithmetic();

test/test_batch_complex.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ struct batch_complex_test
176176
}
177177
}
178178

179+
void test_first_element() const
180+
{
181+
batch_type res = batch_lhs();
182+
CHECK_EQ(res.first(), lhs[0]);
183+
}
184+
179185
void test_arithmetic() const
180186
{
181187
// +batch
@@ -675,6 +681,8 @@ TEST_CASE_TEMPLATE("[xsimd complex batches]", B, BATCH_COMPLEX_TYPES)
675681

676682
SUBCASE("access_operator") { Test.test_access_operator(); }
677683

684+
SUBCASE("first element") { Test.test_first_element(); }
685+
678686
SUBCASE("arithmetic") { Test.test_arithmetic(); }
679687

680688
SUBCASE("computed_assignment") { Test.test_computed_assignment(); }

0 commit comments

Comments
 (0)