Skip to content

Commit 3b54ace

Browse files
committed
Add function for x86 architecture
1 parent c9bc549 commit 3b54ace

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

source/Platform/x86/AVX2.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,19 @@ class AVX2Vector {
102102
return AVX2Vector(_mm256_andnot_si256(rhs.vector, vector));
103103
}
104104

105+
#ifdef CPU_FEATURES_ARCH_X86_64
105106
inline uint16_t sum() const {
106107
__m256i sum256 = _mm256_sad_epu8(vector, _mm256_setzero_si256());
107108
__m128i sum128 = _mm_add_epi64(_mm256_extractf128_si256(sum256, 1), _mm256_castsi256_si128(sum256));
108109
return _mm_extract_epi64(sum128, 0) + _mm_extract_epi64(sum128, 1);
109110
}
111+
#else
112+
inline uint16_t sum() const {
113+
__m256i sum256 = _mm256_sad_epu8(vector, _mm256_setzero_si256());
114+
__m128i sum128 = _mm_add_epi64(_mm256_extractf128_si256(sum256, 1), _mm256_castsi256_si128(sum256));
115+
return _mm_extract_epi32(sum128, 0) + _mm_extract_epi32(sum128, 1);
116+
}
117+
#endif
110118

111119
inline void clear() { vector = _mm256_setzero_si256(); }
112120
};

0 commit comments

Comments
 (0)