Skip to content

Commit a744897

Browse files
shubham.chaudharishubham.chaudhari
shubham.chaudhari
authored and
shubham.chaudhari
committed
Add thread throttling profile for DGEMV on NEOVERSEV1
1 parent 37b8547 commit a744897

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

interface/gemv.c

+20
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,24 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
8989
}
9090
#endif
9191

92+
//thread throttling for dgemv
93+
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
94+
static inline int get_dgemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
95+
96+
return
97+
MN < 8100L ? 1
98+
: MN < 12100L ? MIN(ncpu, 2)
99+
: MN < 36100L ? MIN(ncpu, 4)
100+
: MN < 84100L ? MIN(ncpu, 8)
101+
: MN < 348100L ? MIN(ncpu, 16)
102+
: MN < 435600L ? MIN(ncpu, 24)
103+
: MN < 810000L ? MIN(ncpu, 32)
104+
: MN < 1050625 ? MIN(ncpu, 40)
105+
: ncpu;
106+
107+
}
108+
#endif
109+
92110
static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
93111
int ncpu = num_cpu_avail(3);
94112
#if defined(_WIN64) && defined(_M_ARM64)
@@ -98,6 +116,8 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
98116
#endif
99117
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
100118
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
119+
#elif defined(NEOVERSEV1) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16)
120+
return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu);
101121
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
102122
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
103123
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)

0 commit comments

Comments
 (0)