Skip to content

Commit 26063cb

Browse files
Merge pull request #642 from tomjnixon/fix_avx2_cmp
improve code generated for AVX2 signed integer comparisons
2 parents c4e6447 + 7a7a51f commit 26063cb

File tree

2 files changed

+8
-46
lines changed

2 files changed

+8
-46
lines changed

include/xsimd/arch/xsimd_avx.hpp

-38
Original file line numberDiff line numberDiff line change
@@ -569,44 +569,6 @@ namespace xsimd
569569
return _mm256_floor_pd(self);
570570
}
571571

572-
// ge
573-
template <class A>
574-
inline batch_bool<float, A> ge(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>)
575-
{
576-
return _mm256_cmp_ps(self, other, _CMP_GE_OQ);
577-
}
578-
template <class A>
579-
inline batch_bool<double, A> ge(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>)
580-
{
581-
return _mm256_cmp_pd(self, other, _CMP_GE_OQ);
582-
}
583-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
584-
inline batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>)
585-
{
586-
return detail::fwd_to_sse([](__m128i s, __m128i o)
587-
{ return ge(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
588-
self, other);
589-
}
590-
591-
// gt
592-
template <class A>
593-
inline batch_bool<float, A> gt(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>)
594-
{
595-
return _mm256_cmp_ps(self, other, _CMP_GT_OQ);
596-
}
597-
template <class A>
598-
inline batch_bool<double, A> gt(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>)
599-
{
600-
return _mm256_cmp_pd(self, other, _CMP_GT_OQ);
601-
}
602-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
603-
inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>)
604-
{
605-
return detail::fwd_to_sse([](__m128i s, __m128i o)
606-
{ return gt(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
607-
self, other);
608-
}
609-
610572
// hadd
611573
template <class A>
612574
inline float hadd(batch<float, A> const& rhs, requires_arch<avx>)

include/xsimd/arch/xsimd_avx2.hpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -262,29 +262,29 @@ namespace xsimd
262262
}
263263
}
264264

265-
// gt
265+
// lt
266266
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
267-
inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>)
267+
inline batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>)
268268
{
269269
if (std::is_signed<T>::value)
270270
{
271271
switch (sizeof(T))
272272
{
273273
case 1:
274-
return _mm256_cmpgt_epi8(self, other);
274+
return _mm256_cmpgt_epi8(other, self);
275275
case 2:
276-
return _mm256_cmpgt_epi16(self, other);
276+
return _mm256_cmpgt_epi16(other, self);
277277
case 4:
278-
return _mm256_cmpgt_epi32(self, other);
278+
return _mm256_cmpgt_epi32(other, self);
279279
case 8:
280-
return _mm256_cmpgt_epi64(self, other);
280+
return _mm256_cmpgt_epi64(other, self);
281281
default:
282-
return gt(self, other, avx {});
282+
return lt(self, other, avx {});
283283
}
284284
}
285285
else
286286
{
287-
return gt(self, other, avx {});
287+
return lt(self, other, avx {});
288288
}
289289
}
290290

0 commit comments

Comments
 (0)