@@ -107,9 +107,11 @@ class AVX2Vector {
107
107
__m256i sum256 = _mm256_sad_epu8 (vector, _mm256_setzero_si256 ());
108
108
__m128i sum128 = _mm_add_epi64 (_mm256_extractf128_si256 (sum256, 1 ), _mm256_castsi256_si128 (sum256));
109
109
uint16_t res32 = _mm_extract_epi32 (sum128, 0 ) + _mm_extract_epi32 (sum128, 1 );
110
+ uint16_t res32_2 = _mm_extract_epi32 (sum128, 0 ) + _mm_extract_epi32 (sum128, 2 );
110
111
uint16_t res64 = _mm_extract_epi64 (sum128, 0 ) + _mm_extract_epi64 (sum128, 1 );
111
- if (res32 = = res64) {
112
+ if (res32 ! = res64) {
112
113
std::cout << " uint16_t res32 = " << res32 << " \n " ;
114
+ std::cout << " uint16_t res32_2 = " << res32_2 << " \n " ;
113
115
std::cout << " uint16_t res64 = " << res64 << " \n " ;
114
116
exit (1 );
115
117
}
@@ -120,14 +122,6 @@ class AVX2Vector {
120
122
inline uint16_t sum () const {
121
123
__m256i sum256 = _mm256_sad_epu8 (vector, _mm256_setzero_si256 ());
122
124
__m128i sum128 = _mm_add_epi64 (_mm256_extractf128_si256 (sum256, 1 ), _mm256_castsi256_si128 (sum256));
123
- uint16_t res32 = _mm_extract_epi32 (sum128, 0 ) + _mm_extract_epi32 (sum128, 1 );
124
- uint16_t res64 = _mm_extract_epi64 (sum128, 0 ) + _mm_extract_epi64 (sum128, 1 );
125
- if (res32 == res64) {
126
- std::cout << " uint16_t res32 = " << res32 << " \n " ;
127
- std::cout << " uint16_t res64 = " << res64 << " \n " ;
128
- exit (1 );
129
- }
130
-
131
125
return _mm_extract_epi32 (sum128, 0 ) + _mm_extract_epi32 (sum128, 2 );
132
126
}
133
127
#endif
0 commit comments