Skip to content

Commit 39c5b77

Browse files
committed
[runtime] fix tbm detection
1 parent d48a21e commit 39c5b77

File tree

4 files changed

+39
-11
lines changed

4 files changed

+39
-11
lines changed

src/lib.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,7 @@ mod v16 {
184184
define_ty! { i8x2, i8, i8 }
185185
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
186186

187-
define_casts!(
188-
(i8x2, i64x2, as_i64x2)
189-
);
187+
define_casts!((i8x2, i64x2, as_i64x2));
190188
}
191189

192190
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]

src/x86/runtime.rs

+25-1
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,31 @@ fn detect_features() -> usize {
211211
value = set_bit(value, __Feature::sse4_2 as u32);
212212
}
213213
if test_bit(proc_info_ecx, 21) {
214-
value = set_bit(value, __Feature::tbm as u32);
214+
// The TBM bit is only meaningfully set in AMD CPUs:
215+
let ebx: u32;
216+
let edx: u32;
217+
let ecx: u32;
218+
// EAX = 0: Basic Information. The vendor ID is stored in 12 u8 ascii
219+
// chars, returned in EBX, EDX, and ECX (in that order):
220+
unsafe {
221+
asm!("cpuid"
222+
: "={ebx}"(ebx), "={ecx}"(ecx), "=(edx)"(edx)
223+
: "{eax}"(0x0000_0000_u32), "{ecx}"(0 as u32)
224+
: :);
225+
}
226+
let ebx: [u8; 4] = unsafe { ::std::mem::transmute(ebx) };
227+
let edx: [u8; 4] = unsafe { ::std::mem::transmute(edx) };
228+
let ecx: [u8; 4] = unsafe { ::std::mem::transmute(ecx) };
229+
#[cfg_attr(rustfmt, rustfmt_skip)]
230+
let vendor_id = [
231+
ebx[0], ebx[1], ebx[2], ebx[3],
232+
ecx[0], ecx[1], ecx[2], ecx[3],
233+
edx[0], edx[1], edx[2], edx[3],
234+
];
235+
let vendor_id_amd = b"AuthenticAMD";
236+
if vendor_id == *vendor_id_amd {
237+
value = set_bit(value, __Feature::tbm as u32);
238+
}
215239
}
216240
if test_bit(proc_info_ecx, 23) {
217241
value = set_bit(value, __Feature::popcnt as u32);

src/x86/sse2.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,9 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
17921792
#[inline(always)]
17931793
#[target_feature = "+sse2"]
17941794
#[cfg_attr(test, assert_instr(cvtsd2si))]
1795-
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) }
1795+
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 {
1796+
_mm_cvtsd_si64(a)
1797+
}
17961798

17971799
/// Convert the lower double-precision (64-bit) floating-point element in `b`
17981800
/// to a single-precision (32-bit) floating-point element, store the result in
@@ -1857,7 +1859,9 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
18571859
#[inline(always)]
18581860
#[target_feature = "+sse2"]
18591861
#[cfg_attr(test, assert_instr(cvttsd2si))]
1860-
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) }
1862+
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
1863+
_mm_cvttsd_si64(a)
1864+
}
18611865

18621866
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
18631867
/// packed 32-bit integers with truncation.

src/x86/sse41.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
244244
pmaxud(a, b)
245245
}
246246

247-
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers using unsigned saturation
247+
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
248+
/// using unsigned saturation
248249
#[inline(always)]
249250
#[target_feature = "+sse4.1"]
250251
#[cfg_attr(test, assert_instr(packusdw))]
@@ -276,7 +277,8 @@ pub unsafe fn _mm_cvtepi8_epi32(a: i8x16) -> i32x4 {
276277
simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]).as_i32x4()
277278
}
278279

279-
/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed 64-bit integers
280+
/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
281+
/// 64-bit integers
280282
#[inline(always)]
281283
#[target_feature = "+sse4.1"]
282284
#[cfg_attr(test, assert_instr(pmovsxbq))]
@@ -800,7 +802,7 @@ mod tests {
800802
assert_eq!(r, e);
801803
}
802804

803-
#[simd_test = "sse4.1"]
805+
#[simd_test = "sse4.1"]
804806
unsafe fn _mm_cvtepi8_epi16() {
805807
let a = i8x16::splat(10);
806808
let r = sse41::_mm_cvtepi8_epi16(a);
@@ -842,7 +844,7 @@ mod tests {
842844
let r = sse41::_mm_cvtepi16_epi32(a);
843845
let e = i32x4::splat(10);
844846
assert_eq!(r, e);
845-
let a = i16x8::splat(-10);
847+
let a = i16x8::splat(-10);
846848
let r = sse41::_mm_cvtepi16_epi32(a);
847849
let e = i32x4::splat(-10);
848850
assert_eq!(r, e);
@@ -854,7 +856,7 @@ mod tests {
854856
let r = sse41::_mm_cvtepi16_epi64(a);
855857
let e = i64x2::splat(10);
856858
assert_eq!(r, e);
857-
let a = i16x8::splat(-10);
859+
let a = i16x8::splat(-10);
858860
let r = sse41::_mm_cvtepi16_epi64(a);
859861
let e = i64x2::splat(-10);
860862
assert_eq!(r, e);

0 commit comments

Comments
 (0)