@@ -89,6 +89,24 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
89
89
}
90
90
#endif
91
91
92
+ //thread throttling for dgemv
93
+ #if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV1 )
94
+ static inline int get_dgemv_optimal_nthreads_neoversev1 (BLASLONG MN , int ncpu ) {
95
+
96
+ return
97
+ MN < 8100L ? 1
98
+ : MN < 12100L ? MIN (ncpu , 2 )
99
+ : MN < 36100L ? MIN (ncpu , 4 )
100
+ : MN < 84100L ? MIN (ncpu , 8 )
101
+ : MN < 348100L ? MIN (ncpu , 16 )
102
+ : MN < 435600L ? MIN (ncpu , 24 )
103
+ : MN < 810000L ? MIN (ncpu , 32 )
104
+ : MN < 1050625 ? MIN (ncpu , 40 )
105
+ : ncpu ;
106
+
107
+ }
108
+ #endif
109
+
92
110
static inline int get_gemv_optimal_nthreads (BLASLONG MN ) {
93
111
int ncpu = num_cpu_avail (3 );
94
112
#if defined(_WIN64 ) && defined(_M_ARM64 )
@@ -98,6 +116,8 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
98
116
#endif
99
117
#if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
100
118
return get_gemv_optimal_nthreads_neoversev1 (MN , ncpu );
119
+ #elif defined(NEOVERSEV1 ) && !defined(COMPLEX ) && defined(DOUBLE ) && !defined(BFLOAT16 )
120
+ return get_dgemv_optimal_nthreads_neoversev1 (MN , ncpu );
101
121
#elif defined(NEOVERSEV2 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
102
122
return get_gemv_optimal_nthreads_neoversev2 (MN , ncpu );
103
123
#elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
0 commit comments