Skip to content

Commit 96edf03

Browse files
dimula73serge-sans-paille
authored andcommitted
Fix detection of SSE/AVX/AVX512 when they are explicitly disabled by OS
Some CPU vulnerability mitigations may disable AVX functionality on the hardware level via the XCR0 register. We should check that manually to verify that OS actually allows us to use this feature. See https://bugs.kde.org/show_bug.cgi?id=484622 Fix #1025
1 parent 3294464 commit 96edf03

File tree

1 file changed

+72
-19
lines changed

1 file changed

+72
-19
lines changed

include/xsimd/config/xsimd_cpuid.hpp

Lines changed: 72 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,35 @@ namespace xsimd
114114
#endif
115115

116116
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
117+
118+
auto get_xcr0_low = []() noexcept
119+
{
120+
uint32_t xcr0;
121+
122+
#if defined(_MSC_VER) && _MSC_VER >= 1400
123+
124+
xcr0 = (uint32_t)_xgetbv(0);
125+
126+
#elif defined(__GNUC__)
127+
128+
__asm__(
129+
"xorl %%ecx, %%ecx\n"
130+
"xgetbv\n"
131+
: "=a"(xcr0)
132+
:
133+
#if defined(__i386__)
134+
: "ecx", "edx"
135+
#else
136+
: "rcx", "rdx"
137+
#endif
138+
);
139+
140+
#else /* _MSC_VER < 1400 */
141+
#error "_MSC_VER < 1400 is not supported"
142+
#endif /* _MSC_VER && _MSC_VER >= 1400 */
143+
return xcr0;
144+
};
145+
117146
auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
118147
{
119148

@@ -148,43 +177,67 @@ namespace xsimd
148177

149178
get_cpuid(regs1, 0x1);
150179

151-
sse2 = regs1[3] >> 26 & 1;
152-
sse3 = regs1[2] >> 0 & 1;
153-
ssse3 = regs1[2] >> 9 & 1;
154-
sse4_1 = regs1[2] >> 19 & 1;
155-
sse4_2 = regs1[2] >> 20 & 1;
156-
fma3_sse42 = regs1[2] >> 12 & 1;
180+
// OS can explicitly disable the usage of SSE/AVX extensions
181+
// by setting an appropriate flag in CR0 register
182+
//
183+
// https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html
184+
185+
unsigned sse_state_os_enabled = 1;
186+
unsigned avx_state_os_enabled = 1;
187+
unsigned avx512_state_os_enabled = 1;
188+
189+
// OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit
190+
// 18] to enable XSETBV/XGETBV instructions to access XCR0 and
191+
// to support processor extended state management using
192+
// XSAVE/XRSTOR.
193+
bool osxsave = regs1[2] >> 27 & 1;
194+
if (osxsave)
195+
{
196+
197+
uint32_t xcr0 = get_xcr0_low();
198+
199+
sse_state_os_enabled = xcr0 >> 1 & 1;
200+
avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled;
201+
avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled;
202+
}
203+
204+
sse2 = regs1[3] >> 26 & sse_state_os_enabled;
205+
sse3 = regs1[2] >> 0 & sse_state_os_enabled;
206+
ssse3 = regs1[2] >> 9 & sse_state_os_enabled;
207+
sse4_1 = regs1[2] >> 19 & sse_state_os_enabled;
208+
sse4_2 = regs1[2] >> 20 & sse_state_os_enabled;
209+
fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled;
157210

158-
avx = regs1[2] >> 28 & 1;
211+
avx = regs1[2] >> 28 & avx_state_os_enabled;
159212
fma3_avx = avx && fma3_sse42;
160213

161214
int regs8[4];
162215
get_cpuid(regs8, 0x80000001);
163-
fma4 = regs8[2] >> 16 & 1;
216+
fma4 = regs8[2] >> 16 & avx_state_os_enabled;
164217

165218
// sse4a = regs[2] >> 6 & 1;
166219

167220
// xop = regs[2] >> 11 & 1;
168221

169222
int regs7[4];
170223
get_cpuid(regs7, 0x7);
171-
avx2 = regs7[1] >> 5 & 1;
224+
avx2 = regs7[1] >> 5 & avx_state_os_enabled;
172225

173226
int regs7a[4];
174227
get_cpuid(regs7a, 0x7, 0x1);
175-
avxvnni = regs7a[0] >> 4 & 1;
228+
avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
176229

177230
fma3_avx2 = avx2 && fma3_sse42;
178231

179-
avx512f = regs7[1] >> 16 & 1;
180-
avx512cd = regs7[1] >> 28 & 1;
181-
avx512dq = regs7[1] >> 17 & 1;
182-
avx512bw = regs7[1] >> 30 & 1;
183-
avx512er = regs7[1] >> 27 & 1;
184-
avx512pf = regs7[1] >> 26 & 1;
185-
avx512ifma = regs7[1] >> 21 & 1;
186-
avx512vbmi = regs7[2] >> 1 & 1;
187-
avx512vnni_bw = regs7[2] >> 11 & 1;
232+
avx512f = regs7[1] >> 16 & avx512_state_os_enabled;
233+
avx512cd = regs7[1] >> 28 & avx512_state_os_enabled;
234+
avx512dq = regs7[1] >> 17 & avx512_state_os_enabled;
235+
avx512bw = regs7[1] >> 30 & avx512_state_os_enabled;
236+
avx512er = regs7[1] >> 27 & avx512_state_os_enabled;
237+
avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
238+
avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
239+
avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
240+
avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
188241
avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
189242
#endif
190243
}

0 commit comments

Comments
 (0)