@@ -293,7 +293,7 @@ bool board_lesser(const Board *b1, const Board *b2)
293
293
* @param b the bitboard
294
294
* @return a 128 bitboard mirrored
295
295
*/
296
- static __m128i horizontal_mirror_mm (const __m128i b )
296
+ static vectorcall __m128i horizontal_mirror_mm (const __m128i b )
297
297
{
298
298
const __m128i mask = _mm_set1_epi16 (0x0F0F );
299
299
const __m128i rev = _mm_set_epi8 (H_MIRROR );
@@ -306,7 +306,7 @@ static __m128i horizontal_mirror_mm(const __m128i b)
306
306
* @param b the bitboard
307
307
* @return a 128 bitboard mirrored
308
308
*/
309
- static __m128i vertical_mirror_mm (const __m128i b )
309
+ static vectorcall __m128i vertical_mirror_mm (const __m128i b )
310
310
{
311
311
return _mm_shuffle_epi8 (b , _mm_set_epi8 (V_MIRROR ));
312
312
}
@@ -317,7 +317,7 @@ static __m128i vertical_mirror_mm(const __m128i b)
317
317
* @param b the bitboard
318
318
* @return a 128 bitboard transposed
319
319
*/
320
- static __m128i transpose_mm (__m128i b )
320
+ static vectorcall __m128i transpose_mm (__m128i b )
321
321
{
322
322
const __m128i mask00AA = _mm_set1_epi16 (0x00AA );
323
323
const __m128i maskCCCC = _mm_set1_epi32 (0x0000CCCC );
@@ -342,7 +342,7 @@ static __m128i transpose_mm(__m128i b)
342
342
* @param b input boards
343
343
* @param sym output boards
344
344
*/
345
- static void horizontal_mirror_avx2 (const __m256i * b , __m256i * sym )
345
+ static vectorcall void horizontal_mirror_avx2 (const __m256i * b , __m256i * sym )
346
346
{
347
347
const __m256i mask = _mm256_set1_epi16 (0x0F0F );
348
348
const __m256i rev = _mm256_set_epi8 (H_MIRROR , H_MIRROR );
@@ -356,7 +356,7 @@ static void horizontal_mirror_avx2(const __m256i *b, __m256i *sym)
356
356
* @param b input boards
357
357
* @param sym output boards
358
358
*/
359
- static void vertical_mirror_avx2 (const __m256i * b , __m256i * sym )
359
+ static vectorcall void vertical_mirror_avx2 (const __m256i * b , __m256i * sym )
360
360
{
361
361
const __m256i mask = _mm256_set_epi8 ( V_MIRROR , V_MIRROR );
362
362
* sym = _mm256_shuffle_epi8 (* b , mask );
@@ -1273,7 +1273,7 @@ uint64_t get_full_lines(const uint64_t disc, uint64_t full[4])
1273
1273
// too slow on zen3 cpu
1274
1274
#if 0 && USE_SIMD && defined(__AVX2__ )
1275
1275
1276
- static uint64_t get_stable_by_contact (const uint64_t central_mask , const uint64_t previous_stable , const __m256i full )
1276
+ static uint64_t vectorcall get_stable_by_contact (const uint64_t central_mask , const uint64_t previous_stable , const __m256i full )
1277
1277
{
1278
1278
__m128i stable , old_stable , central_mask_v2 ;
1279
1279
__m256i stable_v4 ;
0 commit comments