[runtime] fix tbm detection

gnzlbg · gnzlbg · commit 39c5b77b6898 · 2017-11-07T00:09:39.000+01:00
diff --git a/src/lib.rs b/src/lib.rs
@@ -184,9 +184,7 @@ mod v16 {
     define_ty! { i8x2, i8, i8 }
     define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
 
-    define_casts!(
-        (i8x2, i64x2, as_i64x2)
-    );
+    define_casts!((i8x2, i64x2, as_i64x2));
 }
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
diff --git a/src/x86/runtime.rs b/src/x86/runtime.rs
@@ -211,7 +211,31 @@ fn detect_features() -> usize {
         value = set_bit(value, __Feature::sse4_2 as u32);
     }
     if test_bit(proc_info_ecx, 21) {
-        value = set_bit(value, __Feature::tbm as u32);
+        // The TBM bit is only meaningfully set in AMD CPUs:
+        let ebx: u32;
+        let edx: u32;
+        let ecx: u32;
+        // EAX = 0: Basic Information. The vendor ID is stored in 12 u8 ascii
+        // chars, returned in EBX, EDX, and ECX (in that order):
+        unsafe {
+            asm!("cpuid"
+                 : "={ebx}"(ebx), "={ecx}"(ecx), "=(edx)"(edx)
+                 : "{eax}"(0x0000_0000_u32), "{ecx}"(0 as u32)
+                 : :);
+        }
+        let ebx: [u8; 4] = unsafe { ::std::mem::transmute(ebx) };
+        let edx: [u8; 4] = unsafe { ::std::mem::transmute(edx) };
+        let ecx: [u8; 4] = unsafe { ::std::mem::transmute(ecx) };
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let vendor_id = [
+            ebx[0], ebx[1], ebx[2], ebx[3],
+            ecx[0], ecx[1], ecx[2], ecx[3],
+            edx[0], edx[1], edx[2], edx[3],
+        ];
+        let vendor_id_amd = b"AuthenticAMD";
+        if vendor_id == *vendor_id_amd {
+            value = set_bit(value, __Feature::tbm as u32);
+        }
     }
     if test_bit(proc_info_ecx, 23) {
         value = set_bit(value, __Feature::popcnt as u32);
diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs
@@ -1792,7 +1792,9 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtsd2si))]
-pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) }
+pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 {
+    _mm_cvtsd_si64(a)
+}
 
 /// Convert the lower double-precision (64-bit) floating-point element in `b`
 /// to a single-precision (32-bit) floating-point element, store the result in
@@ -1857,7 +1859,9 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvttsd2si))]
-pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) }
+pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
+    _mm_cvttsd_si64(a)
+}
 
 /// Convert packed single-precision (32-bit) floating-point elements in `a` to
 /// packed 32-bit integers with truncation.
diff --git a/src/x86/sse41.rs b/src/x86/sse41.rs
@@ -244,7 +244,8 @@ pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
     pmaxud(a, b)
 }
 
-/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers using unsigned saturation
+/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// using unsigned saturation
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 #[cfg_attr(test, assert_instr(packusdw))]
@@ -276,7 +277,8 @@ pub unsafe fn _mm_cvtepi8_epi32(a: i8x16) -> i32x4 {
     simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]).as_i32x4()
 }
 
-/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed 64-bit integers
+/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
+/// 64-bit integers
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 #[cfg_attr(test, assert_instr(pmovsxbq))]
@@ -800,7 +802,7 @@ mod tests {
         assert_eq!(r, e);
     }
 
-     #[simd_test = "sse4.1"]
+    #[simd_test = "sse4.1"]
     unsafe fn _mm_cvtepi8_epi16() {
         let a = i8x16::splat(10);
         let r = sse41::_mm_cvtepi8_epi16(a);
@@ -842,7 +844,7 @@ mod tests {
         let r = sse41::_mm_cvtepi16_epi32(a);
         let e = i32x4::splat(10);
         assert_eq!(r, e);
-        let a =  i16x8::splat(-10);
+        let a = i16x8::splat(-10);
         let r = sse41::_mm_cvtepi16_epi32(a);
         let e = i32x4::splat(-10);
         assert_eq!(r, e);
@@ -854,7 +856,7 @@ mod tests {
         let r = sse41::_mm_cvtepi16_epi64(a);
         let e = i64x2::splat(10);
         assert_eq!(r, e);
-        let a =  i16x8::splat(-10);
+        let a = i16x8::splat(-10);
         let r = sse41::_mm_cvtepi16_epi64(a);
         let e = i64x2::splat(-10);
         assert_eq!(r, e);

Original file line number	Diff line number	Diff line change
`@@ -184,9 +184,7 @@ mod v16 {`
`184`	`184`	`define_ty! { i8x2, i8, i8 }`
`185`	`185`	`define_impl! { i8x2, i8, 2, i8x2, x0, x1 }`
`186`	`186`
`187`		`- define_casts!(`
`188`		`- (i8x2, i64x2, as_i64x2)`
`189`		`- );`
	`187`	`+ define_casts!((i8x2, i64x2, as_i64x2));`
`190`	`188`	`}`
`191`	`189`
`192`	`190`	`#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]`