Port code to "SIMD groundwork part 1"

cesarb · cesarb · commit 9e5a416d79a9 · 2015-08-06T23:37:28.000-03:00
Port the SIMD code to rust-lang/rust#27169
diff --git a/Cargo.toml b/Cargo.toml
@@ -10,16 +10,12 @@ license = "MIT"
 
 [features]
 bench = []
-simd = ["simdty"]
+simd = []
 simd_opt = ["simd"]
 simd_asm = ["simd_opt"]
 
 [dependencies]
 constant_time_eq = "0.1.0"
 
-[dependencies.simdty]
-version = "0.0.3"
-optional = true
-
 [dev-dependencies]
 rustc-serialize = "0.3.15"
diff --git a/src/lib.rs b/src/lib.rs
@@ -27,16 +27,17 @@
 //! A pure Rust implementation of BLAKE2 based on the draft RFC.
 
 #![cfg_attr(all(feature = "bench", test), feature(test))]
-#![cfg_attr(feature = "simd", feature(link_llvm_intrinsics, simd, simd_ffi))]
+#![cfg_attr(feature = "simd", feature(platform_intrinsics, simd_basics))]
+#![cfg_attr(feature = "simd_opt", feature(cfg_target_feature))]
 #![cfg_attr(feature = "simd_asm", feature(asm))]
 
 #[cfg(all(feature = "bench", test))] extern crate test;
-#[cfg(feature = "simd")] extern crate simdty;
 
 extern crate constant_time_eq;
 
 mod as_bytes;
 mod bytes;
+mod simdty;
 mod simd;
 
 #[macro_use]
diff --git a/src/simd.rs b/src/simd.rs
@@ -27,28 +27,64 @@
 #[cfg(feature = "simd_opt")]
 use std::mem::transmute;
 
-#[cfg(feature = "simd")]
 pub use simdty::{u32x4, u64x4};
 
-#[cfg(not(feature = "simd"))]
-#[derive(Clone, Copy, Debug)]
-#[repr(C)]
-pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
+#[cfg(feature = "simd")]
+extern "platform-intrinsic" {
+    fn simd_add<T>(x: T, y: T) -> T;
+    fn simd_shl<T>(x: T, y: T) -> T;
+    fn simd_shr<T>(x: T, y: T) -> T;
+    fn simd_xor<T>(x: T, y: T) -> T;
+}
 
-#[cfg(not(feature = "simd"))]
-#[derive(Clone, Copy, Debug)]
-#[repr(C)]
-pub struct u64x4(pub u64, pub u64, pub u64, pub u64);
+#[cfg(feature = "simd_opt")]
+extern "platform-intrinsic" {
+    fn simd_shuffle8<T, Elem>(v: T, w: T,
+                             i0: u32, i1: u32, i2: u32, i3: u32,
+                             i4: u32, i5: u32, i6: u32, i7: u32) -> T;
 
-#[cfg(not(feature = "simd"))]
-use std::ops::BitXor;
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    fn simd_shuffle16<T, Elem>(v: T, w: T,
+                                i0: u32,  i1: u32,  i2: u32,  i3: u32,
+                                i4: u32,  i5: u32,  i6: u32,  i7: u32,
+                                i8: u32,  i9: u32, i10: u32, i11: u32,
+                               i12: u32, i13: u32, i14: u32, i15: u32,
+                               ) -> T;
+}
 
-macro_rules! impl_bitxor {
+use std::ops::{Add, BitXor, Shl, Shr};
+
+macro_rules! impl_ops {
     ($vec:ident) => {
-        #[cfg(not(feature = "simd"))]
+        impl Add for $vec {
+            type Output = Self;
+
+            #[cfg(feature = "simd")]
+            #[inline(always)]
+            fn add(self, rhs: Self) -> Self::Output {
+                unsafe { simd_add(self, rhs) }
+            }
+
+            #[cfg(not(feature = "simd"))]
+            #[inline(always)]
+            fn add(self, rhs: Self) -> Self::Output {
+                $vec(self.0.wrapping_add(rhs.0),
+                     self.1.wrapping_add(rhs.1),
+                     self.2.wrapping_add(rhs.2),
+                     self.3.wrapping_add(rhs.3))
+            }
+        }
+
         impl BitXor for $vec {
             type Output = Self;
 
+            #[cfg(feature = "simd")]
+            #[inline(always)]
+            fn bitxor(self, rhs: Self) -> Self::Output {
+                unsafe { simd_xor(self, rhs) }
+            }
+
+            #[cfg(not(feature = "simd"))]
             #[inline(always)]
             fn bitxor(self, rhs: Self) -> Self::Output {
                 $vec(self.0 ^ rhs.0,
@@ -57,11 +93,49 @@ macro_rules! impl_bitxor {
                      self.3 ^ rhs.3)
             }
         }
+
+        impl Shl<$vec> for $vec {
+            type Output = Self;
+
+            #[cfg(feature = "simd")]
+            #[inline(always)]
+            fn shl(self, rhs: Self) -> Self::Output {
+                unsafe { simd_shl(self, rhs) }
+            }
+
+            #[cfg(not(feature = "simd"))]
+            #[inline(always)]
+            fn shl(self, rhs: Self) -> Self::Output {
+                $vec(self.0 << rhs.0,
+                     self.1 << rhs.1,
+                     self.2 << rhs.2,
+                     self.3 << rhs.3)
+            }
+        }
+
+        impl Shr<$vec> for $vec {
+            type Output = Self;
+
+            #[cfg(feature = "simd")]
+            #[inline(always)]
+            fn shr(self, rhs: Self) -> Self::Output {
+                unsafe { simd_shr(self, rhs) }
+            }
+
+            #[cfg(not(feature = "simd"))]
+            #[inline(always)]
+            fn shr(self, rhs: Self) -> Self::Output {
+                $vec(self.0 >> rhs.0,
+                     self.1 >> rhs.1,
+                     self.2 >> rhs.2,
+                     self.3 >> rhs.3)
+            }
+        }
     }
 }
 
-impl_bitxor!(u32x4);
-impl_bitxor!(u64x4);
+impl_ops!(u32x4);
+impl_ops!(u64x4);
 
 pub trait Vector4<T>: Copy {
     fn gather(src: &[T], i0: usize, i1: usize, i2: usize, i3: usize) -> Self;
@@ -118,26 +192,16 @@ macro_rules! impl_vector4_common {
                  self.3.to_le())
         }
 
-        #[cfg(feature = "simd")]
         #[inline(always)]
         fn wrapping_add(self, rhs: Self) -> Self { self + rhs }
 
-        #[cfg(not(feature = "simd"))]
-        #[inline(always)]
-        fn wrapping_add(self, rhs: Self) -> Self {
-            $vec(self.0.wrapping_add(rhs.0),
-                 self.1.wrapping_add(rhs.1),
-                 self.2.wrapping_add(rhs.2),
-                 self.3.wrapping_add(rhs.3))
-        }
-
         #[cfg(feature = "simd")]
         #[inline(always)]
         fn rotate_right_any(self, n: u32) -> Self {
             let r = n as $word;
             let l = $bits - r;
 
-            (self >> $vec(r, r, r, r)) | (self << $vec(l, l, l, l))
+            (self >> $vec(r, r, r, r)) ^ (self << $vec(l, l, l, l))
         }
 
         #[cfg(not(feature = "simd"))]
@@ -174,10 +238,11 @@ fn u32x4_rotate_right_16(vec: u32x4) -> u32x4 {
     use simdty::u16x8;
     unsafe {
         let tmp: u16x8 = transmute(vec);
-        transmute(u16x8(tmp.1, tmp.0,
-                        tmp.3, tmp.2,
-                        tmp.5, tmp.4,
-                        tmp.7, tmp.6))
+        transmute(simd_shuffle8::<u16x8, u16>(tmp, tmp,
+                                              1, 0,
+                                              3, 2,
+                                              5, 4,
+                                              7, 6))
     }
 }
 
@@ -205,10 +270,11 @@ fn u64x4_rotate_right_32(vec: u64x4) -> u64x4 {
     use simdty::u32x8;
     unsafe {
         let tmp: u32x8 = transmute(vec);
-        transmute(u32x8(tmp.1, tmp.0,
-                        tmp.3, tmp.2,
-                        tmp.5, tmp.4,
-                        tmp.7, tmp.6))
+        transmute(simd_shuffle8::<u32x8, u32>(tmp, tmp,
+                                              1, 0,
+                                              3, 2,
+                                              5, 4,
+                                              7, 6))
     }
 }
 
@@ -219,10 +285,11 @@ fn u64x4_rotate_right_16(vec: u64x4) -> u64x4 {
     use simdty::u16x16;
     unsafe {
         let tmp: u16x16 = transmute(vec);
-        transmute(u16x16(tmp.1,  tmp.2,  tmp.3,  tmp.0,
-                         tmp.5,  tmp.6,  tmp.7,  tmp.4,
-                         tmp.9,  tmp.10, tmp.11, tmp.8,
-                         tmp.13, tmp.14, tmp.15, tmp.12))
+        transmute(simd_shuffle16::<u16x16, u16>(tmp, tmp,
+                                                 1,  2,  3,  0,
+                                                 5,  6,  7,  4,
+                                                 9, 10, 11,  8,
+                                                13, 14, 15, 12))
     }
 }
 
diff --git a/src/simdty.rs b/src/simdty.rs
@@ -0,0 +1,76 @@
+// Copyright (c) 2015 Cesar Eduardo Barros
+//
+// Permission is hereby granted, free of charge, to any
+// person obtaining a copy of this software and associated
+// documentation files (the "Software"), to deal in the
+// Software without restriction, including without
+// limitation the rights to use, copy, modify, merge,
+// publish, distribute, sublicense, and/or sell copies of
+// the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice
+// shall be included in all copies or substantial portions
+// of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
+#[cfg(feature = "simd")]
+macro_rules! decl_vec {
+    ($($decl:item)*) => {
+        $(
+            #[allow(non_camel_case_types)]
+            #[derive(Clone, Copy, Debug)]
+            #[repr(simd)]
+            $decl
+        )*
+    }
+}
+
+#[cfg(not(feature = "simd"))]
+macro_rules! decl_vec {
+    ($($decl:item)*) => {
+        $(
+            #[derive(Clone, Copy, Debug)]
+            #[repr(C)]
+            $decl
+        )*
+    }
+}
+
+decl_vec!{
+    pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
+    pub struct u64x4(pub u64, pub u64, pub u64, pub u64);
+}
+
+#[cfg(feature = "simd_opt")]
+decl_vec!{
+    pub struct u16x8(pub u16, pub u16, pub u16, pub u16,
+                     pub u16, pub u16, pub u16, pub u16);
+    pub struct u32x8(pub u32, pub u32, pub u32, pub u32,
+                     pub u32, pub u32, pub u32, pub u32);
+}
+
+#[cfg(feature = "simd_opt")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+decl_vec!{
+    pub struct u16x16(pub u16, pub u16, pub u16, pub u16,
+                      pub u16, pub u16, pub u16, pub u16,
+                      pub u16, pub u16, pub u16, pub u16,
+                      pub u16, pub u16, pub u16, pub u16);
+}
+
+#[cfg(feature = "simd_asm")]
+#[cfg(target_arch = "arm")]
+decl_vec!{
+    pub struct u64x2(pub u64, pub u64);
+}