cesarb
diff --git a/‎src/blake2.rs
+6-6 b/‎src/blake2.rs
+6-6
diff --git a/‎src/lib.rs
+2 b/‎src/lib.rs
+2
diff --git a/‎src/simd.rs
+88-81 b/‎src/simd.rs
+88-81
@@ -106,9 +106,9 @@ macro_rules! blake2_impl {
         const IV: [$word; 8] = $IV;
 
         #[inline(always)]
-        fn iv0() -> $vec { $vec(IV[0], IV[1], IV[2], IV[3]) }
+        fn iv0() -> $vec { $vec::new(IV[0], IV[1], IV[2], IV[3]) }
         #[inline(always)]
-        fn iv1() -> $vec { $vec(IV[4], IV[5], IV[6], IV[7]) }
+        fn iv1() -> $vec { $vec::new(IV[4], IV[5], IV[6], IV[7]) }
 
         /// Convenience function for all-in-one computation.
         pub fn $func(nn: usize, k: &[u8], data: &[u8]) -> $result {
@@ -129,7 +129,7 @@ macro_rules! blake2_impl {
                 let p0 = 0x01010000 ^ ((kk as $word) << 8) ^ (nn as $word);
                 let mut state = $state {
                     m: [0; 16],
-                    h: [iv0() ^ $vec(p0, 0, 0, 0), iv1()],
+                    h: [iv0() ^ $vec::new(p0, 0, 0, 0), iv1()],
                     t: 0,
                     nn: nn,
                 };
@@ -145,8 +145,8 @@ macro_rules! blake2_impl {
             pub fn with_parameter_block(p: &[$word; 8]) -> Self {
                 $state {
                     m: [0; 16],
-                    h: [iv0() ^ $vec(p[0], p[1], p[2], p[3]),
-                        iv1() ^ $vec(p[4], p[5], p[6], p[7])],
+                    h: [iv0() ^ $vec::new(p[0], p[1], p[2], p[3]),
+                        iv1() ^ $vec::new(p[4], p[5], p[6], p[7])],
                     t: 0,
                     nn: p[0] as u8 as usize,
                 }
@@ -263,7 +263,7 @@ macro_rules! blake2_impl {
                     h[0],
                     h[1],
                     iv0(),
-                    iv1() ^ $vec(t0, t1, f0, f1),
+                    iv1() ^ $vec::new(t0, t1, f0, f1),
                 ];
 
                 $state::round(&mut v, m, &SIGMA[0]);
 
@@ -37,7 +37,9 @@ extern crate constant_time_eq;
 
 mod as_bytes;
 mod bytes;
+
 mod simdty;
+#[cfg(feature = "simd")] mod simdint;
 mod simd;
 
 #[macro_use]
 
@@ -24,33 +24,8 @@
 // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 // DEALINGS IN THE SOFTWARE.
 
-#[cfg(feature = "simd_opt")]
-use std::mem::transmute;
-
 pub use simdty::{u32x4, u64x4};
-
-#[cfg(feature = "simd")]
-extern "platform-intrinsic" {
-    fn simd_add<T>(x: T, y: T) -> T;
-    fn simd_shl<T>(x: T, y: T) -> T;
-    fn simd_shr<T>(x: T, y: T) -> T;
-    fn simd_xor<T>(x: T, y: T) -> T;
-}
-
-#[cfg(feature = "simd_opt")]
-extern "platform-intrinsic" {
-    fn simd_shuffle8<T, Elem>(v: T, w: T,
-                             i0: u32, i1: u32, i2: u32, i3: u32,
-                             i4: u32, i5: u32, i6: u32, i7: u32) -> T;
-
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    fn simd_shuffle16<T, Elem>(v: T, w: T,
-                                i0: u32,  i1: u32,  i2: u32,  i3: u32,
-                                i4: u32,  i5: u32,  i6: u32,  i7: u32,
-                                i8: u32,  i9: u32, i10: u32, i11: u32,
-                               i12: u32, i13: u32, i14: u32, i15: u32,
-                               ) -> T;
-}
+#[cfg(feature = "simd")] use simdint;
 
 use std::ops::{Add, BitXor, Shl, Shr};
 
@@ -62,16 +37,16 @@ macro_rules! impl_ops {
             #[cfg(feature = "simd")]
             #[inline(always)]
             fn add(self, rhs: Self) -> Self::Output {
-                unsafe { simd_add(self, rhs) }
+                unsafe { simdint::simd_add(self, rhs) }
             }
 
             #[cfg(not(feature = "simd"))]
             #[inline(always)]
             fn add(self, rhs: Self) -> Self::Output {
-                $vec(self.0.wrapping_add(rhs.0),
-                     self.1.wrapping_add(rhs.1),
-                     self.2.wrapping_add(rhs.2),
-                     self.3.wrapping_add(rhs.3))
+                $vec::new(self.0.wrapping_add(rhs.0),
+                          self.1.wrapping_add(rhs.1),
+                          self.2.wrapping_add(rhs.2),
+                          self.3.wrapping_add(rhs.3))
             }
         }
 
@@ -81,16 +56,16 @@ macro_rules! impl_ops {
             #[cfg(feature = "simd")]
             #[inline(always)]
             fn bitxor(self, rhs: Self) -> Self::Output {
-                unsafe { simd_xor(self, rhs) }
+                unsafe { simdint::simd_xor(self, rhs) }
             }
 
             #[cfg(not(feature = "simd"))]
             #[inline(always)]
             fn bitxor(self, rhs: Self) -> Self::Output {
-                $vec(self.0 ^ rhs.0,
-                     self.1 ^ rhs.1,
-                     self.2 ^ rhs.2,
-                     self.3 ^ rhs.3)
+                $vec::new(self.0 ^ rhs.0,
+                          self.1 ^ rhs.1,
+                          self.2 ^ rhs.2,
+                          self.3 ^ rhs.3)
             }
         }
 
@@ -100,16 +75,16 @@ macro_rules! impl_ops {
             #[cfg(feature = "simd")]
             #[inline(always)]
             fn shl(self, rhs: Self) -> Self::Output {
-                unsafe { simd_shl(self, rhs) }
+                unsafe { simdint::simd_shl(self, rhs) }
             }
 
             #[cfg(not(feature = "simd"))]
             #[inline(always)]
             fn shl(self, rhs: Self) -> Self::Output {
-                $vec(self.0 << rhs.0,
-                     self.1 << rhs.1,
-                     self.2 << rhs.2,
-                     self.3 << rhs.3)
+                $vec::new(self.0 << rhs.0,
+                          self.1 << rhs.1,
+                          self.2 << rhs.2,
+                          self.3 << rhs.3)
             }
         }
 
@@ -119,16 +94,16 @@ macro_rules! impl_ops {
             #[cfg(feature = "simd")]
             #[inline(always)]
             fn shr(self, rhs: Self) -> Self::Output {
-                unsafe { simd_shr(self, rhs) }
+                unsafe { simdint::simd_shr(self, rhs) }
             }
 
             #[cfg(not(feature = "simd"))]
             #[inline(always)]
             fn shr(self, rhs: Self) -> Self::Output {
-                $vec(self.0 >> rhs.0,
-                     self.1 >> rhs.1,
-                     self.2 >> rhs.2,
-                     self.3 >> rhs.3)
+                $vec::new(self.0 >> rhs.0,
+                          self.1 >> rhs.1,
+                          self.2 >> rhs.2,
+                          self.3 >> rhs.3)
             }
         }
     }
@@ -163,7 +138,7 @@ macro_rules! impl_vector4_common {
         #[inline(always)]
         fn gather(src: &[$word], i0: usize, i1: usize,
                                  i2: usize, i3: usize) -> Self {
-            $vec(src[i0], src[i1], src[i2], src[i3])
+            $vec::new(src[i0], src[i1], src[i2], src[i3])
         }
 
         #[cfg(target_endian = "little")]
@@ -173,10 +148,10 @@ macro_rules! impl_vector4_common {
         #[cfg(not(target_endian = "little"))]
         #[inline(always)]
         fn from_le(self) -> Self {
-            $vec($word::from_le(self.0),
-                 $word::from_le(self.1),
-                 $word::from_le(self.2),
-                 $word::from_le(self.3))
+            $vec::new($word::from_le(self.0),
+                      $word::from_le(self.1),
+                      $word::from_le(self.2),
+                      $word::from_le(self.3))
         }
 
         #[cfg(target_endian = "little")]
@@ -186,10 +161,10 @@ macro_rules! impl_vector4_common {
         #[cfg(not(target_endian = "little"))]
         #[inline(always)]
         fn to_le(self) -> Self {
-            $vec(self.0.to_le(),
-                 self.1.to_le(),
-                 self.2.to_le(),
-                 self.3.to_le())
+            $vec::new(self.0.to_le(),
+                      self.1.to_le(),
+                      self.2.to_le(),
+                      self.3.to_le())
         }
 
         #[inline(always)]
@@ -201,31 +176,52 @@ macro_rules! impl_vector4_common {
             let r = n as $word;
             let l = $bits - r;
 
-            (self >> $vec(r, r, r, r)) ^ (self << $vec(l, l, l, l))
+            (self >> $vec::new(r, r, r, r)) ^ (self << $vec::new(l, l, l, l))
         }
 
         #[cfg(not(feature = "simd"))]
         #[inline(always)]
         fn rotate_right_any(self, n: u32) -> Self {
-            $vec(self.0.rotate_right(n),
-                 self.1.rotate_right(n),
-                 self.2.rotate_right(n),
-                 self.3.rotate_right(n))
+            $vec::new(self.0.rotate_right(n),
+                      self.1.rotate_right(n),
+                      self.2.rotate_right(n),
+                      self.3.rotate_right(n))
+        }
+
+        #[cfg(feature = "simd")]
+        #[inline(always)]
+        fn shuffle_left_1(self) -> Self {
+            unsafe { simdint::simd_shuffle4(self, self, 1, 2, 3, 0) }
         }
 
+        #[cfg(not(feature = "simd"))]
         #[inline(always)]
         fn shuffle_left_1(self) -> Self {
-            $vec(self.1, self.2, self.3, self.0)
+            $vec::new(self.1, self.2, self.3, self.0)
+        }
+
+        #[cfg(feature = "simd")]
+        #[inline(always)]
+        fn shuffle_left_2(self) -> Self {
+            unsafe { simdint::simd_shuffle4(self, self, 2, 3, 0, 1) }
         }
 
+        #[cfg(not(feature = "simd"))]
         #[inline(always)]
         fn shuffle_left_2(self) -> Self {
-            $vec(self.2, self.3, self.0, self.1)
+            $vec::new(self.2, self.3, self.0, self.1)
+        }
+
+        #[cfg(feature = "simd")]
+        #[inline(always)]
+        fn shuffle_left_3(self) -> Self {
+            unsafe { simdint::simd_shuffle4(self, self, 3, 0, 1, 2)  }
         }
 
+        #[cfg(not(feature = "simd"))]
         #[inline(always)]
         fn shuffle_left_3(self) -> Self {
-            $vec(self.3, self.0, self.1, self.2)
+            $vec::new(self.3, self.0, self.1, self.2)
         }
     }
 }
@@ -236,13 +232,16 @@ macro_rules! impl_vector4_common {
 #[inline(always)]
 fn u32x4_rotate_right_16(vec: u32x4) -> u32x4 {
     use simdty::u16x8;
+    use std::mem::transmute;
+
     unsafe {
         let tmp: u16x8 = transmute(vec);
-        transmute(simd_shuffle8::<u16x8, u16>(tmp, tmp,
-                                              1, 0,
-                                              3, 2,
-                                              5, 4,
-                                              7, 6))
+        let tmp: u16x8 = simdint::simd_shuffle8(tmp, tmp,
+                                                1, 0,
+                                                3, 2,
+                                                5, 4,
+                                                7, 6);
+        transmute(tmp)
     }
 }
 
@@ -268,13 +267,16 @@ impl Vector4<u32> for u32x4 {
 #[inline(always)]
 fn u64x4_rotate_right_32(vec: u64x4) -> u64x4 {
     use simdty::u32x8;
+    use std::mem::transmute;
+
     unsafe {
         let tmp: u32x8 = transmute(vec);
-        transmute(simd_shuffle8::<u32x8, u32>(tmp, tmp,
-                                              1, 0,
-                                              3, 2,
-                                              5, 4,
-                                              7, 6))
+        let tmp: u32x8 = simdint::simd_shuffle8(tmp, tmp,
+                                                1, 0,
+                                                3, 2,
+                                                5, 4,
+                                                7, 6);
+        transmute(tmp)
     }
 }
 
@@ -283,13 +285,16 @@ fn u64x4_rotate_right_32(vec: u64x4) -> u64x4 {
 #[inline(always)]
 fn u64x4_rotate_right_16(vec: u64x4) -> u64x4 {
     use simdty::u16x16;
+    use std::mem::transmute;
+
     unsafe {
         let tmp: u16x16 = transmute(vec);
-        transmute(simd_shuffle16::<u16x16, u16>(tmp, tmp,
-                                                 1,  2,  3,  0,
-                                                 5,  6,  7,  4,
-                                                 9, 10, 11,  8,
-                                                13, 14, 15, 12))
+        let tmp: u16x16 = simdint::simd_shuffle16(tmp, tmp,
+                                                   1,  2,  3,  0,
+                                                   5,  6,  7,  4,
+                                                   9, 10, 11,  8,
+                                                  13, 14, 15, 12);
+        transmute(tmp)
     }
 }
 
@@ -314,9 +319,11 @@ fn vext_u64_u8(a: u64x2, b: u8) -> u64x2 {
 #[cfg(target_arch = "arm")]
 #[inline(always)]
 fn u64x4_rotate_right_u8(vec: u64x4, n: u8) -> u64x4 {
-    let tmp0 = vext_u64_u8(u64x2(vec.0, vec.1), n);
-    let tmp1 = vext_u64_u8(u64x2(vec.2, vec.3), n);
-    u64x4(tmp0.0, tmp0.1, tmp1.0, tmp1.1)
+    unsafe {
+        let tmp0 = vext_u64_u8(simdint::simd_shuffle2(vec, vec, 0, 1), n);
+        let tmp1 = vext_u64_u8(simdint::simd_shuffle2(vec, vec, 2, 3), n);
+        simdint::simd_shuffle4(tmp0, tmp1, 0, 1, 2, 3)
+    }
 }
 
 impl Vector4<u64> for u64x4 {