diff --git a/interface/gemv.c b/interface/gemv.c index d031339463..34b6addd3d 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -70,11 +70,22 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT #if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { - return - MN < 25600L ? 1 - : MN < 63001L ? MIN(ncpu, 4) - : MN < 459684L ? MIN(ncpu, 16) - : ncpu; + #ifdef DOUBLE + return (MN < 8100L) ? 1 + : (MN < 12100L) ? MIN(ncpu, 2) + : (MN < 36100L) ? MIN(ncpu, 4) + : (MN < 84100L) ? MIN(ncpu, 8) + : (MN < 348100L) ? MIN(ncpu, 16) + : (MN < 435600L) ? MIN(ncpu, 24) + : (MN < 810000L) ? MIN(ncpu, 32) + : (MN < 1050625L) ? MIN(ncpu, 40) + : ncpu; + #else + return (MN < 25600L) ? 1 + : (MN < 63001L) ? MIN(ncpu, 4) + : (MN < 459684L) ? MIN(ncpu, 16) + : ncpu; + #endif } #endif @@ -96,11 +107,11 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) { return num_cpu_avail(4); return 1; #endif -#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) +#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16) return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); #elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); -#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) +#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16) if (strcmp(gotoblas_corename(), "neoversev1") == 0) { return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); }