Skip to content

Commit 6898e4f

Browse files
authored
Feature/avx512 support (#30)
* Initial implementation of 16x parallel support for SHA256 hashing on AVX512 * Updated tests * Add support for detection of AVX512 capabilities * Add Write support for arbitrary blocks and proper length adding for Sum * Fix test and remove formatting * Remove old comments * Cache final digest on client * Updated version with more optimized assembly listing/formatting
1 parent c985c18 commit 6898e4f

5 files changed

+2064
-44
lines changed

cpuid.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package sha256
1717

1818
// True when SIMD instructions are available.
19+
var avx512 = haveAVX512()
1920
var avx2 = haveAVX2()
2021
var avx = haveAVX()
2122
var ssse3 = haveSSSE3()
@@ -46,6 +47,43 @@ func haveAVX2() bool {
4647
return false
4748
}
4849

50+
// haveAVX512 returns true when there is AVX512 support
51+
func haveAVX512() bool {
52+
mfi, _, _, _ := cpuid(0)
53+
54+
// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
55+
if mfi >= 7 {
56+
_, _, c, _ := cpuid(1)
57+
58+
// Only detect AVX-512 features if XGETBV is supported
59+
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
60+
// Check for OS support
61+
eax, _ := xgetbv(0)
62+
_, ebx, _, _ := cpuidex(7, 0)
63+
64+
// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
65+
// ZMM16-ZMM31 state are enabled by OS)
66+
/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
67+
if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
68+
if ebx&(1<<16) == 0 {
69+
return false // no AVX512F
70+
}
71+
if ebx&(1<<17) == 0 {
72+
return false // no AVX512DQ
73+
}
74+
if ebx&(1<<30) == 0 {
75+
return false // no AVX512BW
76+
}
77+
if ebx&(1<<31) == 0 {
78+
return false // no AVX512VL
79+
}
80+
return true
81+
}
82+
}
83+
}
84+
return false
85+
}
86+
4987
// haveSSSE3 returns true when there is SSSE3 support
5088
func haveSSSE3() bool {
5189

0 commit comments

Comments
 (0)