Skip to content

Commit 7a7a51f

Browse files
committed
improve code generated for AVX2 signed integer comparisons
previously AVX2 gt was implemented, but lt fell back to the AVX lt, which is implemented with SSE2 instructions generic_logical has the following mappings: - gt -> lt - ge -> le - le -> lt || eq so it's best to just implement eq, lt, and le if it's available
1 parent c4e6447 commit 7a7a51f

File tree

2 files changed

+8
-46
lines changed

2 files changed

+8
-46
lines changed

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -569,44 +569,6 @@ namespace xsimd
569569
return _mm256_floor_pd(self);
570570
}
571571

572-
// ge
573-
template <class A>
574-
inline batch_bool<float, A> ge(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>)
575-
{
576-
return _mm256_cmp_ps(self, other, _CMP_GE_OQ);
577-
}
578-
template <class A>
579-
inline batch_bool<double, A> ge(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>)
580-
{
581-
return _mm256_cmp_pd(self, other, _CMP_GE_OQ);
582-
}
583-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
584-
inline batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>)
585-
{
586-
return detail::fwd_to_sse([](__m128i s, __m128i o)
587-
{ return ge(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
588-
self, other);
589-
}
590-
591-
// gt
592-
template <class A>
593-
inline batch_bool<float, A> gt(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>)
594-
{
595-
return _mm256_cmp_ps(self, other, _CMP_GT_OQ);
596-
}
597-
template <class A>
598-
inline batch_bool<double, A> gt(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>)
599-
{
600-
return _mm256_cmp_pd(self, other, _CMP_GT_OQ);
601-
}
602-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
603-
inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>)
604-
{
605-
return detail::fwd_to_sse([](__m128i s, __m128i o)
606-
{ return gt(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
607-
self, other);
608-
}
609-
610572
// hadd
611573
template <class A>
612574
inline float hadd(batch<float, A> const& rhs, requires_arch<avx>)

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -262,29 +262,29 @@ namespace xsimd
262262
}
263263
}
264264

265-
// gt
265+
// lt
266266
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
267-
inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>)
267+
inline batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>)
268268
{
269269
if (std::is_signed<T>::value)
270270
{
271271
switch (sizeof(T))
272272
{
273273
case 1:
274-
return _mm256_cmpgt_epi8(self, other);
274+
return _mm256_cmpgt_epi8(other, self);
275275
case 2:
276-
return _mm256_cmpgt_epi16(self, other);
276+
return _mm256_cmpgt_epi16(other, self);
277277
case 4:
278-
return _mm256_cmpgt_epi32(self, other);
278+
return _mm256_cmpgt_epi32(other, self);
279279
case 8:
280-
return _mm256_cmpgt_epi64(self, other);
280+
return _mm256_cmpgt_epi64(other, self);
281281
default:
282-
return gt(self, other, avx {});
282+
return lt(self, other, avx {});
283283
}
284284
}
285285
else
286286
{
287-
return gt(self, other, avx {});
287+
return lt(self, other, avx {});
288288
}
289289
}
290290

0 commit comments

Comments
 (0)