From ef44f34be0092d44ccf449f593937b6d2e0931ce Mon Sep 17 00:00:00 2001
From: SparrowLii <liyuan179@huawei.com>
Date: Fri, 20 Aug 2021 17:48:06 +0800
Subject: [PATCH 1/7] add vldx neon instructions

---
 .../core_arch/src/aarch64/neon/generated.rs   |   126 +
 crates/core_arch/src/aarch64/neon/mod.rs      |    50 +-
 .../src/arm_shared/neon/generated.rs          | 14982 +++++++++-------
 crates/core_arch/src/arm_shared/neon/mod.rs   |   250 +
 crates/stdarch-gen/neon.spec                  |    75 +
 crates/stdarch-gen/src/main.rs                |   476 +-
 crates/stdarch-verify/src/lib.rs              |    54 +
 7 files changed, 9100 insertions(+), 6913 deletions(-)

diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index 28ea2592a0..886e47fcf5 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -4514,6 +4514,84 @@ pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
     vqaddd_s64_(a, b)
 }
 
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64")]
+        fn vld1_f64_x2_(a: *const f64) -> float64x1x2_t;
+    }
+    vld1_f64_x2_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64")]
+        fn vld1q_f64_x2_(a: *const f64) -> float64x2x2_t;
+    }
+    vld1q_f64_x2_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64")]
+        fn vld1_f64_x3_(a: *const f64) -> float64x1x3_t;
+    }
+    vld1_f64_x3_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64")]
+        fn vld1q_f64_x3_(a: *const f64) -> float64x2x3_t;
+    }
+    vld1q_f64_x3_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64")]
+        fn vld1_f64_x4_(a: *const f64) -> float64x1x4_t;
+    }
+    vld1_f64_x4_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64")]
+        fn vld1q_f64_x4_(a: *const f64) -> float64x2x4_t;
+    }
+    vld1q_f64_x4_(a)
+}
+
 /// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
@@ -12857,6 +12935,54 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_f64_x2() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let r: [f64; 2] = transmute(vld1_f64_x2(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_f64_x2() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(3., 4.)];
+        let r: [f64x2; 2] = transmute(vld1q_f64_x2(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_f64_x3() {
+        let a: [f64; 4] = [0., 1., 2., 3.];
+        let e: [f64; 3] = [1., 2., 3.];
+        let r: [f64; 3] = transmute(vld1_f64_x3(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_f64_x3() {
+        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
+        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.)];
+        let r: [f64x2; 3] = transmute(vld1q_f64_x3(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_f64_x4() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64; 4] = [1., 2., 3., 4.];
+        let r: [f64; 4] = transmute(vld1_f64_x4(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_f64_x4() {
+        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
+        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.), f64x2::new(7., 8.)];
+        let r: [f64x2; 4] = transmute(vld1q_f64_x4(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vmul_f64() {
         let a: f64 = 1.0;
diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs
index 95aea69ef7..09f67edaaa 100644
--- a/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/crates/core_arch/src/aarch64/neon/mod.rs
@@ -25,44 +25,34 @@ types! {
     pub struct float64x2_t(f64, f64);
 }
 
-/// ARM-specific type containing two `int8x16_t` vectors.
+/// ARM-specific type containing two `float64x1_t` vectors.
 #[derive(Copy, Clone)]
-pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t);
-/// ARM-specific type containing three `int8x16_t` vectors.
+pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
+/// ARM-specific type containing three `float64x1_t` vectors.
 #[derive(Copy, Clone)]
-pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t);
-/// ARM-specific type containing four `int8x16_t` vectors.
+pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
+/// ARM-specific type containing four `float64x1_t` vectors.
 #[derive(Copy, Clone)]
-pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t);
-
-/// ARM-specific type containing two `uint8x16_t` vectors.
-#[derive(Copy, Clone)]
-pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t);
-/// ARM-specific type containing three `uint8x16_t` vectors.
-#[derive(Copy, Clone)]
-pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t);
-/// ARM-specific type containing four `uint8x16_t` vectors.
-#[derive(Copy, Clone)]
-pub struct uint8x16x4_t(
-    pub uint8x16_t,
-    pub uint8x16_t,
-    pub uint8x16_t,
-    pub uint8x16_t,
+pub struct float64x1x4_t(
+    pub float64x1_t,
+    pub float64x1_t,
+    pub float64x1_t,
+    pub float64x1_t,
 );
 
-/// ARM-specific type containing two `poly8x16_t` vectors.
+/// ARM-specific type containing two `float64x2_t` vectors.
 #[derive(Copy, Clone)]
-pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t);
-/// ARM-specific type containing three `poly8x16_t` vectors.
+pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
+/// ARM-specific type containing three `float64x2_t` vectors.
 #[derive(Copy, Clone)]
-pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t);
-/// ARM-specific type containing four `poly8x16_t` vectors.
+pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
+/// ARM-specific type containing four `float64x2_t` vectors.
 #[derive(Copy, Clone)]
-pub struct poly8x16x4_t(
-    pub poly8x16_t,
-    pub poly8x16_t,
-    pub poly8x16_t,
-    pub poly8x16_t,
+pub struct float64x2x4_t(
+    pub float64x2_t,
+    pub float64x2_t,
+    pub float64x2_t,
+    pub float64x2_t,
 );
 
 #[allow(improper_ctypes)]
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index 7dc5b53e10..28bf79bd87 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -5858,4818 +5858,5128 @@ pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 vqaddq_s64_(a, b)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0i8")]
+        fn vld1_s8_x2_(a: *const i8) -> int8x8x2_t;
+    }
+vld1_s8_x2_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0i16")]
+        fn vld1_s16_x2_(a: *const i16) -> int16x4x2_t;
+    }
+vld1_s16_x2_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0i32")]
+        fn vld1_s32_x2_(a: *const i32) -> int32x2x2_t;
+    }
+vld1_s32_x2_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0i64")]
+        fn vld1_s64_x2_(a: *const i64) -> int64x1x2_t;
+    }
+vld1_s64_x2_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0i8")]
+        fn vld1q_s8_x2_(a: *const i8) -> int8x16x2_t;
+    }
+vld1q_s8_x2_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0i16")]
+        fn vld1q_s16_x2_(a: *const i16) -> int16x8x2_t;
+    }
+vld1q_s16_x2_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0i32")]
+        fn vld1q_s32_x2_(a: *const i32) -> int32x4x2_t;
+    }
+vld1q_s32_x2_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0i64")]
+        fn vld1q_s64_x2_(a: *const i64) -> int64x2x2_t;
+    }
+vld1q_s64_x2_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v8i8.p0i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8")]
+        fn vld1_s8_x3_(a: *const i8) -> int8x8x3_t;
+    }
+vld1_s8_x3_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v4i16.p0i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16")]
+        fn vld1_s16_x3_(a: *const i16) -> int16x4x3_t;
+    }
+vld1_s16_x3_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v2i32.p0i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32")]
+        fn vld1_s32_x3_(a: *const i32) -> int32x2x3_t;
+    }
+vld1_s32_x3_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v1i64.p0i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64")]
+        fn vld1_s64_x3_(a: *const i64) -> int64x1x3_t;
+    }
+vld1_s64_x3_(a)
 }
 
-/// Polynomial multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
-pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
     #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v8i8")]
-        fn vmul_p8_(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v16i8.p0i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8")]
+        fn vld1q_s8_x3_(a: *const i8) -> int8x16x3_t;
     }
-vmul_p8_(a, b)
+vld1q_s8_x3_(a)
 }
 
-/// Polynomial multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
-pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
     #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v16i8")]
-        fn vmulq_p8_(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v8i16.p0i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16")]
+        fn vld1q_s16_x3_(a: *const i16) -> int16x8x3_t;
     }
-vmulq_p8_(a, b)
+vld1q_s16_x3_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
-pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v4i32.p0i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32")]
+        fn vld1q_s32_x3_(a: *const i32) -> int32x4x3_t;
+    }
+vld1q_s32_x3_(a)
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
-pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v2i64.p0i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64")]
+        fn vld1q_s64_x3_(a: *const i64) -> int64x2x3_t;
+    }
+vld1q_s64_x3_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    simd_mul(a, vdup_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v8i8.p0i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8")]
+        fn vld1_s8_x4_(a: *const i8) -> int8x8x4_t;
+    }
+vld1_s8_x4_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    simd_mul(a, vdupq_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v4i16.p0i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16")]
+        fn vld1_s16_x4_(a: *const i16) -> int16x4x4_t;
+    }
+vld1_s16_x4_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    simd_mul(a, vdup_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v2i32.p0i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32")]
+        fn vld1_s32_x4_(a: *const i32) -> int32x2x4_t;
+    }
+vld1_s32_x4_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    simd_mul(a, vdupq_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v1i64.p0i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64")]
+        fn vld1_s64_x4_(a: *const i64) -> int64x1x4_t;
+    }
+vld1_s64_x4_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
-    simd_mul(a, vdup_n_u16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v16i8.p0i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8")]
+        fn vld1q_s8_x4_(a: *const i8) -> int8x16x4_t;
+    }
+vld1q_s8_x4_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
-    simd_mul(a, vdupq_n_u16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v8i16.p0i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16")]
+        fn vld1q_s16_x4_(a: *const i16) -> int16x8x4_t;
+    }
+vld1q_s16_x4_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
-    simd_mul(a, vdup_n_u32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v4i32.p0i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32")]
+        fn vld1q_s32_x4_(a: *const i32) -> int32x4x4_t;
+    }
+vld1q_s32_x4_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
-pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
-    simd_mul(a, vdupq_n_u32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v2i64.p0i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64")]
+        fn vld1q_s64_x4_(a: *const i64) -> int64x2x4_t;
+    }
+vld1q_s64_x4_(a)
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
-pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
-    simd_mul(a, vdup_n_f32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
+    transmute(vld1_s8_x2(transmute(a)))
 }
 
-/// Vector multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
-pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
-    simd_mul(a, vdupq_n_f32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
+    transmute(vld1_s16_x2(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
+    transmute(vld1_s32_x2(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_imm3!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
+    transmute(vld1_s64_x2(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
+    transmute(vld1q_s8_x2(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_imm3!(LANE);
-    simd_mul(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
+    transmute(vld1q_s16_x2(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_imm1!(LANE);
-    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
+    transmute(vld1q_s32_x2(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
+    transmute(vld1q_s64_x2(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_imm1!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
+    transmute(vld1_s8_x3(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
+    transmute(vld1_s16_x3(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
+    transmute(vld1_s32_x3(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
-    static_assert_imm3!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
+    transmute(vld1_s64_x3(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
+    transmute(vld1q_s8_x3(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_imm3!(LANE);
-    simd_mul(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
+    transmute(vld1q_s16_x3(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert_imm1!(LANE);
-    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
+    transmute(vld1q_s32_x3(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
+    transmute(vld1q_s64_x3(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
-    static_assert_imm1!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
+    transmute(vld1_s8_x4(transmute(a)))
 }
 
-/// Multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
+    transmute(vld1_s16_x4(transmute(a)))
 }
 
-/// Floating-point multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_imm1!(LANE);
-    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
+    transmute(vld1_s32_x4(transmute(a)))
 }
 
-/// Floating-point multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
+    transmute(vld1_s64_x4(transmute(a)))
 }
 
-/// Floating-point multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
-    static_assert_imm1!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
+    transmute(vld1q_s8_x4(transmute(a)))
 }
 
-/// Floating-point multiply
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_imm2!(LANE);
-    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
+    transmute(vld1q_s16_x4(transmute(a)))
 }
 
-/// Signed multiply long
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
-pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v8i8")]
-        fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t;
-    }
-vmull_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
+    transmute(vld1q_s32_x4(transmute(a)))
 }
 
-/// Signed multiply long
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
-pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v4i16")]
-        fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-vmull_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
+    transmute(vld1q_s64_x4(transmute(a)))
 }
 
-/// Signed multiply long
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
-pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v2i32")]
-        fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-vmull_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
+    transmute(vld1_s8_x2(transmute(a)))
 }
 
-/// Unsigned multiply long
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
-pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v8i8")]
-        fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t;
-    }
-vmull_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
+    transmute(vld1_s8_x3(transmute(a)))
 }
 
-/// Unsigned multiply long
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
-pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v4i16")]
-        fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t;
-    }
-vmull_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
+    transmute(vld1_s8_x4(transmute(a)))
 }
 
-/// Unsigned multiply long
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
-pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v2i32")]
-        fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t;
-    }
-vmull_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
+    transmute(vld1q_s8_x2(transmute(a)))
 }
 
-/// Polynomial multiply long
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmull))]
-pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull.v8i8")]
-        fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
-    }
-vmull_p8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
+    transmute(vld1q_s8_x3(transmute(a)))
 }
 
-/// Vector long multiply with scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
-pub unsafe fn vmullh_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vmull_s16(a, vdup_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
+    transmute(vld1q_s8_x4(transmute(a)))
 }
 
-/// Vector long multiply with scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
-pub unsafe fn vmulls_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vmull_s32(a, vdup_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
+    transmute(vld1_s16_x2(transmute(a)))
 }
 
-/// Vector long multiply with scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
-pub unsafe fn vmullh_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
-    vmull_u16(a, vdup_n_u16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
+    transmute(vld1_s16_x3(transmute(a)))
 }
 
-/// Vector long multiply with scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
-pub unsafe fn vmulls_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
-    vmull_u32(a, vdup_n_u32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
+    transmute(vld1_s16_x4(transmute(a)))
 }
 
-/// Vector long multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_imm2!(LANE);
-    vmull_s16(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
+    transmute(vld1q_s16_x2(transmute(a)))
 }
 
-/// Vector long multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
-    static_assert_imm3!(LANE);
-    vmull_s16(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
+    transmute(vld1q_s16_x3(transmute(a)))
 }
 
-/// Vector long multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_imm1!(LANE);
-    vmull_s32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
+    transmute(vld1q_s16_x4(transmute(a)))
 }
 
-/// Vector long multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
-    static_assert_imm2!(LANE);
-    vmull_s32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0f32")]
+        fn vld1_f32_x2_(a: *const f32) -> float32x2x2_t;
+    }
+vld1_f32_x2_(a)
 }
 
-/// Vector long multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    static_assert_imm2!(LANE);
-    vmull_u16(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0f32")]
+        fn vld1q_f32_x2_(a: *const f32) -> float32x4x2_t;
+    }
+vld1q_f32_x2_(a)
 }
 
-/// Vector long multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
-    static_assert_imm3!(LANE);
-    vmull_u16(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0f32")]
+        fn vld1_f32_x3_(a: *const f32) -> float32x2x3_t;
+    }
+vld1_f32_x3_(a)
 }
 
-/// Vector long multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    static_assert_imm1!(LANE);
-    vmull_u32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0f32")]
+        fn vld1q_f32_x3_(a: *const f32) -> float32x4x3_t;
+    }
+vld1q_f32_x3_(a)
 }
 
-/// Vector long multiply by scalar
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
-    static_assert_imm2!(LANE);
-    vmull_u32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
-}
-
-/// Floating-point fused Multiply-Add to accumulator(vector)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
-pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
-        fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0f32")]
+        fn vld1_f32_x4_(a: *const f32) -> float32x2x4_t;
     }
-vfma_f32_(b, c, a)
+vld1_f32_x4_(a)
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
-pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
+pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
-        fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0f32")]
+        fn vld1q_f32_x4_(a: *const f32) -> float32x4x4_t;
     }
-vfmaq_f32_(b, c, a)
+vld1q_f32_x4_(a)
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
-pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vfma_f32(a, b, vdup_n_f32(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_mul(a, b)
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
-pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vfmaq_f32(a, b, vdupq_n_f32(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_mul(a, b)
 }
 
-/// Floating-point fused multiply-subtract from accumulator
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
-pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    let b: float32x2_t = simd_neg(b);
-    vfma_f32(a, b, c)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_mul(a, b)
 }
 
-/// Floating-point fused multiply-subtract from accumulator
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
-pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    let b: float32x4_t = simd_neg(b);
-    vfmaq_f32(a, b, c)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_mul(a, b)
 }
 
-/// Floating-point fused Multiply-subtract to accumulator(vector)
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
-pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vfms_f32(a, b, vdup_n_f32(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_mul(a, b)
 }
 
-/// Floating-point fused Multiply-subtract to accumulator(vector)
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
-pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vfmsq_f32(a, b, vdupq_n_f32(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Polynomial multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
+pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v8i8")]
+        fn vmul_p8_(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
+    }
+vmul_p8_(a, b)
 }
 
-/// Subtract
+/// Polynomial multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
+pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v16i8")]
+        fn vmulq_p8_(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
+    }
+vmulq_p8_(a, b)
 }
 
-/// Subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_mul(a, b)
 }
 
-/// Subtract
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    simd_mul(a, vdup_n_s16(b))
 }
 
-/// Subtract
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    simd_mul(a, vdupq_n_s16(b))
 }
 
-/// Subtract
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    simd_mul(a, vdup_n_s32(b))
 }
 
-/// Subtract
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    simd_mul(a, vdupq_n_s32(b))
 }
 
-/// Subtract
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
+    simd_mul(a, vdup_n_u16(b))
 }
 
-/// Subtract
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
-pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
+    simd_mul(a, vdupq_n_u16(b))
 }
 
-/// Subtract
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))]
-pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
+    simd_mul(a, vdup_n_u32(b))
 }
 
-/// Subtract
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))]
-pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
+pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
+    simd_mul(a, vdupq_n_u32(b))
 }
 
-/// Subtract returning high narrow
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
-pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
-    let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
+    simd_mul(a, vdup_n_f32(b))
 }
 
-/// Subtract returning high narrow
+/// Vector multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
-pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
-    let c: i32x4 = i32x4::new(16, 16, 16, 16);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
+pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
+    simd_mul(a, vdupq_n_f32(b))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
-pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
-    let c: i64x2 = i64x2::new(32, 32);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
-pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_imm3!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
-pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    let c: u32x4 = u32x4::new(16, 16, 16, 16);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
-pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    let c: u64x2 = u64x2::new(32, 32);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE);
+    simd_mul(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
-pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let d: int8x8_t = vsubhn_s16(b, c);
-    simd_shuffle16!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
-pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let d: int16x4_t = vsubhn_s32(b, c);
-    simd_shuffle8!(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
-pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let d: int32x2_t = vsubhn_s64(b, c);
-    simd_shuffle4!(a, d, [0, 1, 2, 3])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
-pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let d: uint8x8_t = vsubhn_u16(b, c);
-    simd_shuffle16!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
-pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let d: uint16x4_t = vsubhn_u32(b, c);
-    simd_shuffle8!(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Subtract returning high narrow
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
-pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let d: uint32x2_t = vsubhn_u64(b, c);
-    simd_shuffle4!(a, d, [0, 1, 2, 3])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
+    static_assert_imm3!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Signed halving subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
-pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i8")]
-        fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-vhsub_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Signed halving subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
-pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v16i8")]
-        fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-vhsubq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_imm3!(LANE);
+    simd_mul(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Signed halving subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
-pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i16")]
-        fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vhsub_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_imm1!(LANE);
+    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
 }
 
-/// Signed halving subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
-pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i16")]
-        fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vhsubq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
 }
 
-/// Signed halving subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
-pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v2i32")]
-        fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-vhsub_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
+    static_assert_imm1!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Signed halving subtract
+/// Multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
-pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i32")]
-        fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-vhsubq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Signed halving subtract
+/// Floating-point multiply
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
-pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_imm2!(LANE);
+    simd_mul(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Signed multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i8")]
-        fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v8i8")]
+        fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t;
     }
-vhsub_s8_(a, b)
+vmull_s8_(a, b)
 }
 
-/// Signed halving subtract
+/// Signed multiply long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
-pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v16i8")]
-        fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v4i16")]
+        fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
-vhsubq_s8_(a, b)
+vmull_s16_(a, b)
 }
 
-/// Signed halving subtract
+/// Signed multiply long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
-pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i16")]
-        fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v2i32")]
+        fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
-vhsub_s16_(a, b)
+vmull_s32_(a, b)
 }
 
-/// Signed halving subtract
+/// Unsigned multiply long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
-pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i16")]
-        fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v8i8")]
+        fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t;
     }
-vhsubq_s16_(a, b)
+vmull_u8_(a, b)
 }
 
-/// Signed halving subtract
+/// Unsigned multiply long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
-pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v2i32")]
-        fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v4i16")]
+        fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t;
     }
-vhsub_s32_(a, b)
+vmull_u16_(a, b)
 }
 
-/// Signed halving subtract
+/// Unsigned multiply long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
-pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i32")]
-        fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v2i32")]
+        fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t;
     }
-vhsubq_s32_(a, b)
+vmull_u32_(a, b)
 }
 
-/// Signed Subtract Wide
+/// Polynomial multiply long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
-pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmull))]
+pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull.v8i8")]
+        fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
+    }
+vmull_p8_(a, b)
 }
 
-/// Signed Subtract Wide
+/// Vector long multiply with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
-pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+pub unsafe fn vmullh_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vmull_s16(a, vdup_n_s16(b))
 }
 
-/// Signed Subtract Wide
+/// Vector long multiply with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
-pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
+pub unsafe fn vmulls_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vmull_s32(a, vdup_n_s32(b))
 }
 
-/// Unsigned Subtract Wide
+/// Vector long multiply with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
-pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+pub unsafe fn vmullh_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
+    vmull_u16(a, vdup_n_u16(b))
 }
 
-/// Unsigned Subtract Wide
+/// Vector long multiply with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
-pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
+pub unsafe fn vmulls_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
+    vmull_u32(a, vdup_n_u32(b))
 }
 
-/// Unsigned Subtract Wide
+/// Vector long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
-pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    vmull_s16(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Signed Subtract Long
+/// Vector long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
-pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    let c: int16x8_t = simd_cast(a);
-    let d: int16x8_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(LANE);
+    vmull_s16(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Signed Subtract Long
+/// Vector long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
-pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    let c: int32x4_t = simd_cast(a);
-    let d: int32x4_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(LANE);
+    vmull_s32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
 }
 
-/// Signed Subtract Long
+/// Vector long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
-pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    let c: int64x2_t = simd_cast(a);
-    let d: int64x2_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(LANE);
+    vmull_s32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
 }
 
-/// Unsigned Subtract Long
+/// Vector long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
-pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    let c: uint16x8_t = simd_cast(a);
-    let d: uint16x8_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    static_assert_imm2!(LANE);
+    vmull_u16(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Unsigned Subtract Long
+/// Vector long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
-pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    let c: uint32x4_t = simd_cast(a);
-    let d: uint32x4_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
+    static_assert_imm3!(LANE);
+    vmull_u16(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
 }
 
-/// Unsigned Subtract Long
+/// Vector long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
-pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    let c: uint64x2_t = simd_cast(a);
-    let d: uint64x2_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    static_assert_imm1!(LANE);
+    vmull_u32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
 }
 
-/// Maximum (vector)
+/// Vector long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
-pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i8")]
-        fn vmax_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vmax_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
+    static_assert_imm2!(LANE);
+    vmull_u32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
 }
 
-/// Maximum (vector)
+/// Floating-point fused Multiply-Add to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
-pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
+pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v16i8")]
-        fn vmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
+        fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
-vmaxq_s8_(a, b)
+vfma_f32_(b, c, a)
 }
 
-/// Maximum (vector)
+/// Floating-point fused Multiply-Add to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
-pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
+pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i16")]
-        fn vmax_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
+        fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
-vmax_s16_(a, b)
+vfmaq_f32_(b, c, a)
 }
 
-/// Maximum (vector)
+/// Floating-point fused Multiply-Add to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
-pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i16")]
-        fn vmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vmaxq_s16_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
+pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vfma_f32(a, b, vdup_n_f32(c))
 }
 
-/// Maximum (vector)
+/// Floating-point fused Multiply-Add to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
-pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v2i32")]
-        fn vmax_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vmax_s32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
+pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vfmaq_f32(a, b, vdupq_n_f32(c))
 }
 
-/// Maximum (vector)
+/// Floating-point fused multiply-subtract from accumulator
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
-pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i32")]
-        fn vmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vmaxq_s32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
+pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    let b: float32x2_t = simd_neg(b);
+    vfma_f32(a, b, c)
 }
 
-/// Maximum (vector)
+/// Floating-point fused multiply-subtract from accumulator
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
-pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i8")]
-        fn vmax_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-vmax_u8_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
+pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    let b: float32x4_t = simd_neg(b);
+    vfmaq_f32(a, b, c)
 }
 
-/// Maximum (vector)
+/// Floating-point fused Multiply-subtract to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
-pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v16i8")]
-        fn vmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-vmaxq_u8_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
+pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vfms_f32(a, b, vdup_n_f32(c))
 }
 
-/// Maximum (vector)
+/// Floating-point fused Multiply-subtract to accumulator(vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
+pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vfmsq_f32(a, b, vdupq_n_f32(c))
+}
+
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
-pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i16")]
-        fn vmax_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vmax_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_sub(a, b)
 }
 
-/// Maximum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
-pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i16")]
-        fn vmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vmaxq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_sub(a, b)
 }
 
-/// Maximum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
-pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v2i32")]
-        fn vmax_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-vmax_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_sub(a, b)
 }
 
-/// Maximum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
-pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i32")]
-        fn vmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-vmaxq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_sub(a, b)
 }
 
-/// Maximum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
-pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f32")]
-        fn vmax_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vmax_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_sub(a, b)
 }
 
-/// Maximum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
-pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")]
-        fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vmaxq_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_sub(a, b)
 }
 
-/// Floating-point Maximun Number (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
-pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
-        fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vmaxnm_f32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_sub(a, b)
 }
 
-/// Floating-point Maximun Number (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
-pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
-        fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vmaxnmq_f32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
-pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i8")]
-        fn vmin_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vmin_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
-pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v16i8")]
-        fn vminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vminq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
-pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i16")]
-        fn vmin_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vmin_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
-pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i16")]
-        fn vminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vminq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
-pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v2i32")]
-        fn vmin_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vmin_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
-pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i32")]
-        fn vminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vminq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
-pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i8")]
-        fn vmin_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-vmin_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
-pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v16i8")]
-        fn vminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-vminq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))]
+pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
-pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i16")]
-        fn vmin_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vmin_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))]
+pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
-pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i16")]
-        fn vminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vminq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))]
+pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_sub(a, b)
 }
 
-/// Minimum (vector)
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
-pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v2i32")]
-        fn vmin_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-vmin_u32_(a, b)
-}
-
-/// Minimum (vector)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
-pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i32")]
-        fn vminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-vminq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Minimum (vector)
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
-pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f32")]
-        fn vmin_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vmin_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    let c: i32x4 = i32x4::new(16, 16, 16, 16);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Minimum (vector)
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
-pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")]
-        fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vminq_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    let c: i64x2 = i64x2::new(32, 32);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Floating-point Minimun Number (vector)
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
-pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")]
-        fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vminnm_f32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Floating-point Minimun Number (vector)
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
-pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")]
-        fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vminnmq_f32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    let c: u32x4 = u32x4::new(16, 16, 16, 16);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Signed saturating doubling multiply long
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
-pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v4i32")]
-        fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-vqdmull_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
+pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    let c: u64x2 = u64x2::new(32, 32);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Signed saturating doubling multiply long
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
-pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v2i64")]
-        fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-vqdmull_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let d: int8x8_t = vsubhn_s16(b, c);
+    simd_shuffle16!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Vector saturating doubling long multiply with scalar
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
-pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vqdmull_s16(a, vdup_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let d: int16x4_t = vsubhn_s32(b, c);
+    simd_shuffle8!(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector saturating doubling long multiply with scalar
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
-pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vqdmull_s32(a, vdup_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let d: int32x2_t = vsubhn_s64(b, c);
+    simd_shuffle4!(a, d, [0, 1, 2, 3])
 }
 
-/// Vector saturating doubling long multiply by scalar
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_imm2!(N);
-    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let d: uint8x8_t = vsubhn_u16(b, c);
+    simd_shuffle16!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Vector saturating doubling long multiply by scalar
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_imm1!(N);
-    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let d: uint16x4_t = vsubhn_u32(b, c);
+    simd_shuffle8!(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Signed saturating doubling multiply-add long
+/// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
-pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
+pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let d: uint32x2_t = vsubhn_u64(b, c);
+    simd_shuffle4!(a, d, [0, 1, 2, 3])
 }
 
-/// Signed saturating doubling multiply-add long
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
-pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i8")]
+        fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+vhsub_u8_(a, b)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
-pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_n_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v16i8")]
+        fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    }
+vhsubq_u8_(a, b)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
-pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_n_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i16")]
+        fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+vhsub_u16_(a, b)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 2))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_imm2!(N);
-    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i16")]
+        fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    }
+vhsubq_u16_(a, b)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_imm1!(N);
-    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
-}
-
-/// Signed saturating doubling multiply-subtract long
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
-pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_s16(b, c))
-}
-
-/// Signed saturating doubling multiply-subtract long
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
-pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_s32(b, c))
-}
-
-/// Vector widening saturating doubling multiply subtract with scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
-pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_n_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v2i32")]
+        fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+vhsub_u32_(a, b)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
-pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_n_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
+pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i32")]
+        fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+vhsubq_u32_(a, b)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 2))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_imm2!(N);
-    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i8")]
+        fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+vhsub_s8_(a, b)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_imm1!(N);
-    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v16i8")]
+        fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+vhsubq_s8_(a, b)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
-pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i16")]
-        fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i16")]
+        fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqdmulh_s16_(a, b)
+vhsub_s16_(a, b)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
-pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v8i16")]
-        fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i16")]
+        fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqdmulhq_s16_(a, b)
+vhsubq_s16_(a, b)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
-pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v2i32")]
-        fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v2i32")]
+        fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqdmulh_s32_(a, b)
+vhsub_s32_(a, b)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Signed halving subtract
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
-pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
+pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i32")]
-        fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i32")]
+        fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqdmulhq_s32_(a, b)
+vhsubq_s32_(a, b)
 }
 
-/// Vector saturating doubling multiply high with scalar
+/// Signed Subtract Wide
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
-pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    let b: int16x4_t = vdup_n_s16(b);
-    vqdmulh_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
+pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Vector saturating doubling multiply high with scalar
+/// Signed Subtract Wide
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
-pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    let b: int32x2_t = vdup_n_s32(b);
-    vqdmulh_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
+pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Vector saturating doubling multiply high with scalar
+/// Signed Subtract Wide
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
-pub unsafe fn vqdmulhq_nq_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    let b: int16x8_t = vdupq_n_s16(b);
-    vqdmulhq_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))]
+pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Vector saturating doubling multiply high with scalar
+/// Unsigned Subtract Wide
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
-pub unsafe fn vqdmulhq_nq_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    let b: int32x4_t = vdupq_n_s32(b);
-    vqdmulhq_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
+pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Signed saturating extract narrow
+/// Unsigned Subtract Wide
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
-pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v8i8")]
-        fn vqmovn_s16_(a: int16x8_t) -> int8x8_t;
-    }
-vqmovn_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
+pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Signed saturating extract narrow
+/// Unsigned Subtract Wide
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
-pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v4i16")]
-        fn vqmovn_s32_(a: int32x4_t) -> int16x4_t;
-    }
-vqmovn_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))]
+pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Signed saturating extract narrow
+/// Signed Subtract Long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
-pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v2i32")]
-        fn vqmovn_s64_(a: int64x2_t) -> int32x2_t;
-    }
-vqmovn_s64_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
+pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
+    let c: int16x8_t = simd_cast(a);
+    let d: int16x8_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Unsigned saturating extract narrow
+/// Signed Subtract Long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
-pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v8i8")]
-        fn vqmovn_u16_(a: uint16x8_t) -> uint8x8_t;
-    }
-vqmovn_u16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
+pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    let c: int32x4_t = simd_cast(a);
+    let d: int32x4_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Unsigned saturating extract narrow
+/// Signed Subtract Long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
-pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v4i16")]
-        fn vqmovn_u32_(a: uint32x4_t) -> uint16x4_t;
-    }
-vqmovn_u32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))]
+pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    let c: int64x2_t = simd_cast(a);
+    let d: int64x2_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Unsigned saturating extract narrow
+/// Unsigned Subtract Long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
-pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v2i32")]
-        fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t;
-    }
-vqmovn_u64_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
+pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
+    let c: uint16x8_t = simd_cast(a);
+    let d: uint16x8_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Unsigned Subtract Long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
-pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
+pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    let c: uint32x4_t = simd_cast(a);
+    let d: uint32x4_t = simd_cast(b);
+    simd_sub(c, d)
+}
+
+/// Unsigned Subtract Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))]
+pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    let c: uint64x2_t = simd_cast(a);
+    let d: uint64x2_t = simd_cast(b);
+    simd_sub(c, d)
+}
+
+/// Maximum (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v8i8")]
-        fn vqmovun_s16_(a: int16x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i8")]
+        fn vmax_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vqmovun_s16_(a)
+vmax_s8_(a, b)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
-pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v4i16")]
-        fn vqmovun_s32_(a: int32x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v16i8")]
+        fn vmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vqmovun_s32_(a)
+vmaxq_s8_(a, b)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
-pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v2i32")]
-        fn vqmovun_s64_(a: int64x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i16")]
+        fn vmax_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqmovun_s64_(a)
+vmax_s16_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i16")]
-        fn vqrdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i16")]
+        fn vmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqrdmulh_s16_(a, b)
+vmaxq_s16_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v8i16")]
-        fn vqrdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v2i32")]
+        fn vmax_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqrdmulhq_s16_(a, b)
+vmax_s32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
+pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v2i32")]
-        fn vqrdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i32")]
+        fn vmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqrdmulh_s32_(a, b)
+vmaxq_s32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i32")]
-        fn vqrdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i8")]
+        fn vmax_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
-vqrdmulhq_s32_(a, b)
+vmax_u8_(a, b)
 }
 
-/// Vector saturating rounding doubling multiply high with scalar
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    vqrdmulh_s16(a, vdup_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v16i8")]
+        fn vmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    }
+vmaxq_u8_(a, b)
 }
 
-/// Vector saturating rounding doubling multiply high with scalar
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    vqrdmulhq_s16(a, vdupq_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i16")]
+        fn vmax_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+vmax_u16_(a, b)
 }
 
-/// Vector saturating rounding doubling multiply high with scalar
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    vqrdmulh_s32(a, vdup_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i16")]
+        fn vmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    }
+vmaxq_u16_(a, b)
 }
 
-/// Vector saturating rounding doubling multiply high with scalar
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    vqrdmulhq_s32(a, vdupq_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v2i32")]
+        fn vmax_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+vmax_u32_(a, b)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_imm2!(LANE);
-    let b: int16x4_t = simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulh_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
+pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i32")]
+        fn vmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+vmaxq_u32_(a, b)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_imm3!(LANE);
-    let b: int16x4_t = simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulh_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
+pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f32")]
+        fn vmax_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+vmax_f32_(a, b)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_imm2!(LANE);
-    let b: int16x8_t = simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
+pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")]
+        fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+vmaxq_f32_(a, b)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
+/// Floating-point Maximun Number (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_imm3!(LANE);
-    let b: int16x8_t = simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s16(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
+pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
+        fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+vmaxnm_f32_(a, b)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
+/// Floating-point Maximun Number (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_imm1!(LANE);
-    let b: int32x2_t = simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]);
-    vqrdmulh_s32(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
+pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
+        fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+vmaxnmq_f32_(a, b)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_imm2!(LANE);
-    let b: int32x2_t = simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]);
-    vqrdmulh_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i8")]
+        fn vmin_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+vmin_s8_(a, b)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_imm1!(LANE);
-    let b: int32x4_t = simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v16i8")]
+        fn vminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+vminq_s8_(a, b)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_imm2!(LANE);
-    let b: int32x4_t = simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i16")]
+        fn vmin_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+vmin_s16_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    vqadd_s16(a, vqrdmulh_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i16")]
+        fn vminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+vminq_s16_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    vqaddq_s16(a, vqrdmulhq_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v2i32")]
+        fn vmin_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+vmin_s32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    vqadd_s32(a, vqrdmulh_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
+pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i32")]
+        fn vminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+vminq_s32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    vqaddq_s32(a, vqrdmulhq_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i8")]
+        fn vmin_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+vmin_u8_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_imm2!(LANE);
-    vqadd_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v16i8")]
+        fn vminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    }
+vminq_u8_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_imm3!(LANE);
-    vqadd_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i16")]
+        fn vmin_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+vmin_u16_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_imm2!(LANE);
-    vqaddq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i16")]
+        fn vminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    }
+vminq_u16_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_imm3!(LANE);
-    vqaddq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v2i32")]
+        fn vmin_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+vmin_u32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_imm1!(LANE);
-    vqadd_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
+pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i32")]
+        fn vminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+vminq_u32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_imm2!(LANE);
-    vqadd_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
+pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f32")]
+        fn vmin_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+vmin_f32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_imm1!(LANE);
-    vqaddq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
+pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")]
+        fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+vminq_f32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Floating-point Minimun Number (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_imm2!(LANE);
-    vqaddq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
+pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")]
+        fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+vminnm_f32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Floating-point Minimun Number (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    vqsub_s16(a, vqrdmulh_s16(b, c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
+pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")]
+        fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+vminnmq_f32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    vqsubq_s16(a, vqrdmulhq_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
+pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v4i32")]
+        fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
+    }
+vqdmull_s16_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    vqsub_s32(a, vqrdmulh_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
+pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v2i64")]
+        fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
+    }
+vqdmull_s32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Vector saturating doubling long multiply with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    vqsubq_s32(a, vqrdmulhq_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
+pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vqdmull_s16(a, vdup_n_s16(b))
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Vector saturating doubling long multiply with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_imm2!(LANE);
-    vqsub_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
+pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vqdmull_s32(a, vdup_n_s32(b))
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Vector saturating doubling long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_imm3!(LANE);
-    vqsub_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Vector saturating doubling long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_imm2!(LANE);
-    vqsubq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_imm3!(LANE);
-    vqsubq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
+pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_s16(b, c))
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_imm1!(LANE);
-    vqsub_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
+pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_s32(b, c))
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Vector widening saturating doubling multiply accumulate with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_imm2!(LANE);
-    vqsub_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
+pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_n_s16(b, c))
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Vector widening saturating doubling multiply accumulate with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_imm1!(LANE);
-    vqsubq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
+pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_n_s32(b, c))
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Vector widening saturating doubling multiply accumulate with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 2))]
 #[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_imm2!(LANE);
-    vqsubq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
+pub unsafe fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
 }
 
-/// Signed saturating rounding shift left
+/// Vector widening saturating doubling multiply accumulate with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
-pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")]
-        fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vqrshl_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
 }
 
-/// Signed saturating rounding shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
-pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")]
-        fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vqrshlq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
+pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_s16(b, c))
 }
 
-/// Signed saturating rounding shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
-pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")]
-        fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vqrshl_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
+pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_s32(b, c))
 }
 
-/// Signed saturating rounding shift left
+/// Vector widening saturating doubling multiply subtract with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
-pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")]
-        fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vqrshlq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
+pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_n_s16(b, c))
 }
 
-/// Signed saturating rounding shift left
+/// Vector widening saturating doubling multiply subtract with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
-pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")]
-        fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vqrshl_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
+pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_n_s32(b, c))
 }
 
-/// Signed saturating rounding shift left
+/// Vector widening saturating doubling multiply subtract with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
-pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")]
-        fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vqrshlq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 2))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
 }
 
-/// Signed saturating rounding shift left
+/// Vector widening saturating doubling multiply subtract with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
-pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")]
-        fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-vqrshl_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
 }
 
-/// Signed saturating rounding shift left
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
-pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")]
-        fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i16")]
+        fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqrshlq_s64_(a, b)
+vqdmulh_s16_(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
-pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")]
-        fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v8i16")]
+        fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqrshl_u8_(a, b)
+vqdmulhq_s16_(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
-pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")]
-        fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v2i32")]
+        fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqrshlq_u8_(a, b)
+vqdmulh_s32_(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
-pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")]
-        fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i32")]
+        fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqrshl_u16_(a, b)
+vqdmulhq_s32_(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Vector saturating doubling multiply high with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
-pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")]
-        fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-vqrshlq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmulh_s16(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Vector saturating doubling multiply high with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
-pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")]
-        fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-vqrshl_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmulh_s32(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Vector saturating doubling multiply high with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
-pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")]
-        fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-vqrshlq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+pub unsafe fn vqdmulhq_nq_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    let b: int16x8_t = vdupq_n_s16(b);
+    vqdmulhq_s16(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Vector saturating doubling multiply high with scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
-pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")]
-        fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-vqrshl_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
+pub unsafe fn vqdmulhq_nq_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    let b: int32x4_t = vdupq_n_s32(b);
+    vqdmulhq_s32(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Signed saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
-pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
+pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")]
-        fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v8i8")]
+        fn vqmovn_s16_(a: int16x8_t) -> int8x8_t;
     }
-vqrshlq_u64_(a, b)
+vqmovn_s16_(a)
 }
 
-/// Signed saturating rounded shift right narrow
+/// Signed saturating extract narrow
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
+pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
-        fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v4i16")]
+        fn vqmovn_s32_(a: int32x4_t) -> int16x4_t;
     }
-vqrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
+vqmovn_s32_(a)
 }
 
-/// Signed saturating rounded shift right narrow
+/// Signed saturating extract narrow
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
+pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")]
-        fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v2i32")]
+        fn vqmovn_s64_(a: int64x2_t) -> int32x2_t;
     }
-vqrshrn_n_s16_(a, N)
+vqmovn_s64_(a)
 }
 
-/// Signed saturating rounded shift right narrow
+/// Unsigned saturating extract narrow
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
+pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
-        fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v8i8")]
+        fn vqmovn_u16_(a: uint16x8_t) -> uint8x8_t;
     }
-vqrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
+vqmovn_u16_(a)
 }
 
-/// Signed saturating rounded shift right narrow
+/// Unsigned saturating extract narrow
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
+pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")]
-        fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v4i16")]
+        fn vqmovn_u32_(a: uint32x4_t) -> uint16x4_t;
     }
-vqrshrn_n_s32_(a, N)
+vqmovn_u32_(a)
 }
 
-/// Signed saturating rounded shift right narrow
+/// Unsigned saturating extract narrow
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
+pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
-        fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v2i32")]
+        fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t;
     }
-vqrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
+vqmovn_u64_(a)
 }
 
-/// Signed saturating rounded shift right narrow
+/// Signed saturating extract unsigned narrow
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
+pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")]
-        fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v8i8")]
+        fn vqmovun_s16_(a: int16x8_t) -> uint8x8_t;
     }
-vqrshrn_n_s64_(a, N)
+vqmovun_s16_(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
+/// Signed saturating extract unsigned narrow
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
+pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
-        fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v4i16")]
+        fn vqmovun_s32_(a: int32x4_t) -> uint16x4_t;
     }
-vqrshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16))
+vqmovun_s32_(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
+/// Signed saturating extract unsigned narrow
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
+pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")]
-        fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v2i32")]
+        fn vqmovun_s64_(a: int64x2_t) -> uint32x2_t;
     }
-vqrshrn_n_u16_(a, N)
+vqmovun_s64_(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
-        fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i16")]
+        fn vqrdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqrshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32))
+vqrdmulh_s16_(a, b)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")]
-        fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v8i16")]
+        fn vqrdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqrshrn_n_u32_(a, N)
+vqrdmulhq_s16_(a, b)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
-        fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v2i32")]
+        fn vqrdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqrshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64))
+vqrdmulh_s32_(a, b)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")]
-        fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i32")]
+        fn vqrdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqrshrn_n_u64_(a, N)
+vqrdmulhq_s32_(a, b)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Vector saturating rounding doubling multiply high with scalar
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
-        fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
-    }
-vqrshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    vqrdmulh_s16(a, vdup_n_s16(b))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Vector saturating rounding doubling multiply high with scalar
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")]
-        fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
-    }
-vqrshrun_n_s16_(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    vqrdmulhq_s16(a, vdupq_n_s16(b))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Vector saturating rounding doubling multiply high with scalar
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
-        fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
-    }
-vqrshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    vqrdmulh_s32(a, vdup_n_s32(b))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Vector saturating rounding doubling multiply high with scalar
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")]
-        fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
-    }
-vqrshrun_n_s32_(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    vqrdmulhq_s32(a, vdupq_n_s32(b))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Vector rounding saturating doubling multiply high by scalar
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
-        fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
-    }
-vqrshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulh_s16(a, b)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Vector rounding saturating doubling multiply high by scalar
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")]
-        fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
-    }
-vqrshrun_n_s64_(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_imm3!(LANE);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulh_s16(a, b)
 }
 
-/// Signed saturating shift left
+/// Vector rounding saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
-pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i8")]
-        fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vqshl_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_imm2!(LANE);
+    let b: int16x8_t = simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulhq_s16(a, b)
 }
 
-/// Signed saturating shift left
+/// Vector rounding saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
-pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v16i8")]
-        fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vqshlq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE);
+    let b: int16x8_t = simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulhq_s16(a, b)
 }
 
-/// Signed saturating shift left
+/// Vector rounding saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
-pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i16")]
-        fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vqshl_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]);
+    vqrdmulh_s32(a, b)
 }
 
-/// Signed saturating shift left
+/// Vector rounding saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
-pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i16")]
-        fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vqshlq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]);
+    vqrdmulh_s32(a, b)
 }
 
-/// Signed saturating shift left
+/// Vector rounding saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
-pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i32")]
-        fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vqshl_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    let b: int32x4_t = simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulhq_s32(a, b)
 }
 
-/// Signed saturating shift left
+/// Vector rounding saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
-pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i32")]
-        fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vqshlq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    let b: int32x4_t = simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulhq_s32(a, b)
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
-pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v1i64")]
-        fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-vqshl_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    vqadd_s16(a, vqrdmulh_s16(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
-pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i64")]
-        fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-vqshlq_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    vqaddq_s16(a, vqrdmulhq_s16(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
-pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i8")]
-        fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
-    }
-vqshl_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    vqadd_s32(a, vqrdmulh_s32(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
-pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v16i8")]
-        fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    }
-vqshlq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    vqaddq_s32(a, vqrdmulhq_s32(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
-pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i16")]
-        fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    }
-vqshl_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    vqadd_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
-pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i16")]
-        fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-vqshlq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
+    static_assert_imm3!(LANE);
+    vqadd_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
-pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i32")]
-        fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-vqshl_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
+    static_assert_imm2!(LANE);
+    vqaddq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
-pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i32")]
-        fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-vqshlq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE);
+    vqaddq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
-pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v1i64")]
-        fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-vqshl_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    vqadd_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
-pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i64")]
-        fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-    }
-vqshlq_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    vqadd_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_imm3!(N);
-    vqshl_s8(a, vdup_n_s8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    vqaddq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_imm3!(N);
-    vqshlq_s8(a, vdupq_n_s8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    vqaddq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_imm4!(N);
-    vqshl_s16(a, vdup_n_s16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    vqsub_s16(a, vqrdmulh_s16(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_imm4!(N);
-    vqshlq_s16(a, vdupq_n_s16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    vqsubq_s16(a, vqrdmulhq_s16(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_imm5!(N);
-    vqshl_s32(a, vdup_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    vqsub_s32(a, vqrdmulh_s32(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_imm5!(N);
-    vqshlq_s32(a, vdupq_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    vqsubq_s32(a, vqrdmulhq_s32(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert_imm6!(N);
-    vqshl_s64(a, vdup_n_s64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    vqsub_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_imm6!(N);
-    vqshlq_s64(a, vdupq_n_s64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
+    static_assert_imm3!(LANE);
+    vqsub_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_imm3!(N);
-    vqshl_u8(a, vdup_n_s8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
+    static_assert_imm2!(LANE);
+    vqsubq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_imm3!(N);
-    vqshlq_u8(a, vdupq_n_s8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE);
+    vqsubq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_imm4!(N);
-    vqshl_u16(a, vdup_n_s16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    vqsub_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_imm4!(N);
-    vqshlq_u16(a, vdupq_n_s16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    vqsub_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_imm5!(N);
-    vqshl_u32(a, vdup_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    vqsubq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_imm5!(N);
-    vqshlq_u32(a, vdupq_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    vqsubq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert_imm6!(N);
-    vqshl_u64(a, vdup_n_s64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")]
+        fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+vqrshl_s8_(a, b)
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_imm6!(N);
-    vqshlq_u64(a, vdupq_n_s64(N.try_into().unwrap()))
-}
-
-/// Signed saturating shift right narrow
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
-        fn vqshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")]
+        fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vqshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
+vqrshlq_s8_(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v8i8")]
-        fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")]
+        fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqshrn_n_s16_(a, N)
+vqrshl_s16_(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
-        fn vqshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")]
+        fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
+vqrshlq_s16_(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v4i16")]
-        fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")]
+        fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqshrn_n_s32_(a, N)
+vqrshl_s32_(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
-        fn vqshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")]
+        fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
+vqrshlq_s32_(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v2i32")]
-        fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")]
+        fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-vqshrn_n_s64_(a, N)
+vqrshl_s64_(a, b)
 }
 
-/// Unsigned saturating shift right narrow
+/// Signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
+pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
-        fn vqshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")]
+        fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-vqshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16))
+vqrshlq_s64_(a, b)
 }
 
-/// Unsigned saturating shift right narrow
+/// Unsigned signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v8i8")]
-        fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")]
+        fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
     }
-vqshrn_n_u16_(a, N)
+vqrshl_u8_(a, b)
 }
 
-/// Unsigned saturating shift right narrow
+/// Unsigned signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
-        fn vqshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")]
+        fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
-vqshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32))
+vqrshlq_u8_(a, b)
 }
 
-/// Unsigned saturating shift right narrow
+/// Unsigned signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v4i16")]
-        fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")]
+        fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
-vqshrn_n_u32_(a, N)
+vqrshl_u16_(a, b)
 }
 
-/// Unsigned saturating shift right narrow
+/// Unsigned signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
-        fn vqshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")]
+        fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
-vqshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64))
+vqrshlq_u16_(a, b)
 }
 
-/// Unsigned saturating shift right narrow
+/// Unsigned signed saturating rounding shift left
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v2i32")]
-        fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")]
+        fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
     }
-vqshrn_n_u64_(a, N)
+vqrshl_u32_(a, b)
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Unsigned signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")]
+        fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+    }
+vqrshlq_u32_(a, b)
+}
+
+/// Unsigned signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")]
+        fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+    }
+vqrshl_u64_(a, b)
+}
+
+/// Unsigned signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
+pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")]
+        fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+    }
+vqrshlq_u64_(a, b)
+}
+
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
-        fn vqshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
+        fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
-vqshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
+vqrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v8i8")]
-        fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")]
+        fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
     }
-vqshrun_n_s16_(a, N)
+vqrshrn_n_s16_(a, N)
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
-        fn vqshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
+        fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
-vqshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
+vqrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v4i16")]
-        fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")]
+        fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
     }
-vqshrun_n_s32_(a, N)
+vqrshrn_n_s32_(a, N)
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
-        fn vqshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
+        fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
-vqshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64))
+vqrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v2i32")]
-        fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")]
+        fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
     }
-vqshrun_n_s64_(a, N)
+vqrshrn_n_s64_(a, N)
 }
 
-/// Reciprocal square-root estimate.
+/// Unsigned signed saturating rounded shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
-pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
-        fn vrsqrte_f32_(a: float32x2_t) -> float32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
+        fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
     }
-vrsqrte_f32_(a)
+vqrshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16))
 }
 
-/// Reciprocal square-root estimate.
+/// Unsigned signed saturating rounded shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
-pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v4f32")]
-        fn vrsqrteq_f32_(a: float32x4_t) -> float32x4_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")]
+        fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
     }
-vrsqrteq_f32_(a)
+vqrshrn_n_u16_(a, N)
 }
 
-/// Reciprocal estimate.
+/// Unsigned signed saturating rounded shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
-pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f32")]
-        fn vrecpe_f32_(a: float32x2_t) -> float32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
+        fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
     }
-vrecpe_f32_(a)
+vqrshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32))
 }
 
-/// Reciprocal estimate.
+/// Unsigned signed saturating rounded shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
-pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v4f32")]
-        fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")]
+        fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
     }
-vrecpeq_f32_(a)
+vqrshrn_n_u32_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned signed saturating rounded shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
+        fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+    }
+vqrshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned signed saturating rounded shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")]
+        fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
+    }
+vqrshrn_n_u64_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
+        fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+    }
+vqrshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")]
+        fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
+    }
+vqrshrun_n_s16_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
+        fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+    }
+vqrshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")]
+        fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
+    }
+vqrshrun_n_s32_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
+        fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
+    }
+vqrshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")]
+        fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
+    }
+vqrshrun_n_s64_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i8")]
+        fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+vqshl_s8_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v16i8")]
+        fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+vqshlq_s8_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i16")]
+        fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+vqshl_s16_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i16")]
+        fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+vqshlq_s16_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i32")]
+        fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+vqshl_s32_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i32")]
+        fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+vqshlq_s32_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v1i64")]
+        fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+vqshl_s64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
+pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i64")]
+        fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+vqshlq_s64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i8")]
+        fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+    }
+vqshl_u8_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v16i8")]
+        fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+    }
+vqshlq_u8_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i16")]
+        fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+    }
+vqshl_u16_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i16")]
+        fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+    }
+vqshlq_u16_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i32")]
+        fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+    }
+vqshl_u32_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i32")]
+        fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+    }
+vqshlq_u32_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v1i64")]
+        fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+    }
+vqshl_u64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
+pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i64")]
+        fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+    }
+vqshlq_u64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_imm3!(N);
+    vqshl_s8(a, vdup_n_s8(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_imm3!(N);
+    vqshlq_s8(a, vdupq_n_s8(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_imm4!(N);
+    vqshl_s16(a, vdup_n_s16(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_imm4!(N);
+    vqshlq_s16(a, vdupq_n_s16(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_imm5!(N);
+    vqshl_s32(a, vdup_n_s32(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_imm5!(N);
+    vqshlq_s32(a, vdupq_n_s32(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert_imm6!(N);
+    vqshl_s64(a, vdup_n_s64(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_imm6!(N);
+    vqshlq_s64(a, vdupq_n_s64(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_imm3!(N);
+    vqshl_u8(a, vdup_n_s8(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_imm3!(N);
+    vqshlq_u8(a, vdupq_n_s8(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_imm4!(N);
+    vqshl_u16(a, vdup_n_s16(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_imm4!(N);
+    vqshlq_u16(a, vdupq_n_s16(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_imm5!(N);
+    vqshl_u32(a, vdup_n_s32(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_imm5!(N);
+    vqshlq_u32(a, vdupq_n_s32(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert_imm6!(N);
+    vqshl_u64(a, vdup_n_s64(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_imm6!(N);
+    vqshlq_u64(a, vdupq_n_s64(N.try_into().unwrap()))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
+        fn vqshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+vqshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v8i8")]
+        fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+vqshrn_n_s16_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
+        fn vqshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+vqshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v4i16")]
+        fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+vqshrn_n_s32_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
+        fn vqshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+vqshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v2i32")]
+        fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+vqshrn_n_s64_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
+        fn vqshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
+    }
+vqshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16))
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift right narrow
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v8i8")]
+        fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
+    }
+vqshrn_n_u16_(a, N)
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
+        fn vqshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
+    }
+vqshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32))
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v4i16")]
+        fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
+    }
+vqshrn_n_u32_(a, N)
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
+        fn vqshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+    }
+vqshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64))
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v2i32")]
+        fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
+    }
+vqshrn_n_u64_(a, N)
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
+        fn vqshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+    }
+vqshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v8i8")]
+        fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
+    }
+vqshrun_n_s16_(a, N)
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
+        fn vqshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+    }
+vqshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v4i16")]
+        fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
+    }
+vqshrun_n_s32_(a, N)
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
+        fn vqshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
+    }
+vqshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64))
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v2i32")]
+        fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
+    }
+vqshrun_n_s64_(a, N)
+}
+
+/// Reciprocal square-root estimate.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
+pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
+        fn vrsqrte_f32_(a: float32x2_t) -> float32x2_t;
+    }
+vrsqrte_f32_(a)
 }
 
-/// Vector reinterpret cast operation
+/// Reciprocal square-root estimate.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
+pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v4f32")]
+        fn vrsqrteq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+vrsqrteq_f32_(a)
 }
 
-/// Vector reinterpret cast operation
+/// Reciprocal estimate.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
+pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f32")]
+        fn vrecpe_f32_(a: float32x2_t) -> float32x2_t;
+    }
+vrecpe_f32_(a)
 }
 
-/// Vector reinterpret cast operation
+/// Reciprocal estimate.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
+pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v4f32")]
+        fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+vrecpeq_f32_(a)
 }
 
 /// Vector reinterpret cast operation
@@ -10678,7 +10988,7 @@ pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -10688,7 +10998,7 @@ pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -10698,7 +11008,7 @@ pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -10708,7 +11018,7 @@ pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -10718,7 +11028,7 @@ pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
+pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -10728,7 +11038,7 @@ pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     transmute(a)
 }
 
@@ -10738,7 +11048,7 @@ pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -10748,7 +11058,7 @@ pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -10758,7 +11068,7 @@ pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -10768,7 +11078,7 @@ pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -10778,7 +11088,7 @@ pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -10788,7 +11098,7 @@ pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     transmute(a)
 }
 
@@ -10798,7 +11108,7 @@ pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -10808,7 +11118,7 @@ pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -10818,7 +11128,7 @@ pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
+pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -10828,7 +11138,7 @@ pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -10838,7 +11148,7 @@ pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -10848,7 +11158,7 @@ pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     transmute(a)
 }
 
@@ -10858,7 +11168,7 @@ pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
+pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -10868,7 +11178,7 @@ pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -10878,7 +11188,7 @@ pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -10888,7 +11198,7 @@ pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -10898,7 +11208,7 @@ pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -10908,7 +11218,7 @@ pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
     transmute(a)
 }
 
@@ -10918,7 +11228,7 @@ pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -10928,7 +11238,7 @@ pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -10938,7 +11248,7 @@ pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -10948,7 +11258,7 @@ pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -10958,7 +11268,7 @@ pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
     transmute(a)
 }
 
@@ -10968,7 +11278,7 @@ pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     transmute(a)
 }
 
@@ -10978,7 +11288,7 @@ pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -10988,7 +11298,7 @@ pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -10998,7 +11308,7 @@ pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
+pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -11008,7 +11318,7 @@ pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
+pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -11018,7 +11328,7 @@ pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
+pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -11028,7 +11338,7 @@ pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -11038,7 +11348,7 @@ pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
+pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -11048,7 +11358,7 @@ pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -11058,7 +11368,7 @@ pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
+pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -11068,7 +11378,7 @@ pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -11078,7 +11388,7 @@ pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -11088,7 +11398,7 @@ pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
+pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -11098,7 +11408,7 @@ pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
+pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -11108,7 +11418,7 @@ pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -11118,7 +11428,7 @@ pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -11128,7 +11438,7 @@ pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -11138,7 +11448,7 @@ pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
+pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -11148,7 +11458,7 @@ pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -11158,7 +11468,7 @@ pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -11168,7 +11478,7 @@ pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -11178,7 +11488,7 @@ pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -11188,7 +11498,7 @@ pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -11198,7 +11508,7 @@ pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -11208,7 +11518,7 @@ pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -11218,7 +11528,7 @@ pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -11228,7 +11538,7 @@ pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -11238,7 +11548,7 @@ pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -11248,7 +11558,7 @@ pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -11258,7 +11568,7 @@ pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -11268,7 +11578,7 @@ pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -11278,7 +11588,7 @@ pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -11288,7 +11598,7 @@ pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -11298,7 +11608,7 @@ pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -11308,7 +11618,7 @@ pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -11318,7 +11628,7 @@ pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -11328,7 +11638,7 @@ pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     transmute(a)
 }
 
@@ -11338,7 +11648,7 @@ pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     transmute(a)
 }
 
@@ -11348,7 +11658,7 @@ pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     transmute(a)
 }
 
@@ -11358,7 +11668,7 @@ pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -11368,7 +11678,7 @@ pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -11378,7 +11688,7 @@ pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
+pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -11388,7 +11698,7 @@ pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
+pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -11398,7 +11708,7 @@ pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
+pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -11408,7 +11718,7 @@ pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -11418,7 +11728,7 @@ pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -11428,7 +11738,7 @@ pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
+pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -11438,7 +11748,7 @@ pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
+pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     transmute(a)
 }
 
@@ -11448,7 +11758,7 @@ pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     transmute(a)
 }
 
@@ -11458,7 +11768,7 @@ pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -11468,7 +11778,7 @@ pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -11478,7 +11788,7 @@ pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -11488,7 +11798,7 @@ pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -11498,7 +11808,7 @@ pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -11508,7 +11818,7 @@ pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
+pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -11518,7 +11828,7 @@ pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
+pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     transmute(a)
 }
 
@@ -11528,7 +11838,7 @@ pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     transmute(a)
 }
 
@@ -11538,7 +11848,7 @@ pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -11548,7 +11858,7 @@ pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -11558,7 +11868,7 @@ pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
+pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -11568,7 +11878,7 @@ pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
+pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -11578,7 +11888,7 @@ pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
+pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -11588,7 +11898,7 @@ pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -11598,7 +11908,7 @@ pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     transmute(a)
 }
 
@@ -11608,7 +11918,7 @@ pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     transmute(a)
 }
 
@@ -11618,7 +11928,7 @@ pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -11628,7 +11938,7 @@ pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -11638,7 +11948,7 @@ pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -11648,7 +11958,7 @@ pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -11658,7 +11968,7 @@ pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -11668,7 +11978,7 @@ pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -11678,7 +11988,7 @@ pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     transmute(a)
 }
 
@@ -11688,7 +11998,7 @@ pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     transmute(a)
 }
 
@@ -11698,7 +12008,7 @@ pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -11708,7 +12018,7 @@ pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -11718,7 +12028,7 @@ pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -11728,7 +12038,7 @@ pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -11738,7 +12048,7 @@ pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -11748,7 +12058,7 @@ pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -11758,7 +12068,7 @@ pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -11768,7 +12078,7 @@ pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -11778,7 +12088,7 @@ pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -11788,7 +12098,7 @@ pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     transmute(a)
 }
 
@@ -11798,7 +12108,7 @@ pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -11808,7 +12118,7 @@ pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
+pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -11818,7 +12128,7 @@ pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -11828,7 +12138,7 @@ pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     transmute(a)
 }
 
@@ -11838,7 +12148,7 @@ pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -11848,7 +12158,7 @@ pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -11858,7 +12168,7 @@ pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -11868,7 +12178,7 @@ pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
+pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     transmute(a)
 }
 
@@ -11878,7 +12188,7 @@ pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -11888,7 +12198,7 @@ pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -11898,7 +12208,7 @@ pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -11908,7 +12218,7 @@ pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     transmute(a)
 }
 
@@ -11918,7 +12228,7 @@ pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -11928,7 +12238,7 @@ pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -11938,7 +12248,7 @@ pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -11948,7 +12258,7 @@ pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
+pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     transmute(a)
 }
 
@@ -11958,7 +12268,7 @@ pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
+pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     transmute(a)
 }
 
@@ -11968,7 +12278,7 @@ pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     transmute(a)
 }
 
@@ -11978,7 +12288,7 @@ pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -11988,7 +12298,7 @@ pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     transmute(a)
 }
 
@@ -11998,7 +12308,7 @@ pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
+pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -12008,7 +12318,7 @@ pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
+pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -12018,7 +12328,7 @@ pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
+pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     transmute(a)
 }
 
@@ -12028,7 +12338,7 @@ pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
+pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     transmute(a)
 }
 
@@ -12038,7 +12348,7 @@ pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     transmute(a)
 }
 
@@ -12048,7 +12358,7 @@ pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
+pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     transmute(a)
 }
 
@@ -12058,7 +12368,7 @@ pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -12068,7 +12378,7 @@ pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -12078,7 +12388,7 @@ pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
+pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     transmute(a)
 }
 
@@ -12088,7 +12398,7 @@ pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     transmute(a)
 }
 
@@ -12098,7 +12408,7 @@ pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
+pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     transmute(a)
 }
 
@@ -12108,7 +12418,7 @@ pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
+pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     transmute(a)
 }
 
@@ -12118,7 +12428,7 @@ pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -12128,7 +12438,7 @@ pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -12138,7 +12448,7 @@ pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
+pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     transmute(a)
 }
 
@@ -12148,7 +12458,7 @@ pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     transmute(a)
 }
 
@@ -12158,7 +12468,7 @@ pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     transmute(a)
 }
 
@@ -12168,7 +12478,7 @@ pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     transmute(a)
 }
 
@@ -12178,7 +12488,7 @@ pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -12188,7 +12498,7 @@ pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -12198,7 +12508,7 @@ pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     transmute(a)
 }
 
@@ -12208,7 +12518,7 @@ pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
+pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     transmute(a)
 }
 
@@ -12218,7 +12528,7 @@ pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     transmute(a)
 }
 
@@ -12228,7 +12538,7 @@ pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     transmute(a)
 }
 
@@ -12238,7 +12548,7 @@ pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -12248,7 +12558,7 @@ pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
+pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     transmute(a)
 }
 
@@ -12258,7 +12568,7 @@ pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -12268,7 +12578,7 @@ pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     transmute(a)
 }
 
@@ -12278,7 +12588,7 @@ pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -12288,7 +12598,7 @@ pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
+pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     transmute(a)
 }
 
@@ -12298,7 +12608,7 @@ pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -12308,7 +12618,7 @@ pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     transmute(a)
 }
 
@@ -12318,7 +12628,7 @@ pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -12328,7 +12638,7 @@ pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     transmute(a)
 }
 
@@ -12338,6626 +12648,7684 @@ pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
-pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     transmute(a)
 }
 
-/// Signed rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
-pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i8")]
-        fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vrshl_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Signed rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
-pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v16i8")]
-        fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vrshlq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Signed rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
-pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i16")]
-        fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vrshl_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Signed rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
-pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i16")]
-        fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vrshlq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Signed rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
-pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i32")]
-        fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vrshl_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Signed rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
-pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i32")]
-        fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vrshlq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Signed rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
-pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v1i64")]
-        fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-vrshl_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Signed rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
-pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i64")]
-        fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-vrshlq_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
-pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i8")]
-        fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
-    }
-vrshl_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
-pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v16i8")]
-        fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    }
-vrshlq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
-pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i16")]
-        fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    }
-vrshl_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
-pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i16")]
-        fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-vrshlq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
-pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i32")]
-        fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-vrshl_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
-pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i32")]
-        fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-vrshlq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
-pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v1i64")]
-        fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-vrshl_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
-pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i64")]
-        fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-    }
-vrshlq_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    vrshl_s8(a, vdup_n_s8((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    vrshlq_s8(a, vdupq_n_s8((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    vrshl_s16(a, vdup_n_s16((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    vrshlq_s16(a, vdupq_n_s16((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    vrshl_s32(a, vdup_n_s32((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    vrshlq_s32(a, vdupq_n_s32((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    vrshl_s64(a, vdup_n_s64((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    vrshlq_s64(a, vdupq_n_s64((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    vrshl_u8(a, vdup_n_s8((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    vrshlq_u8(a, vdupq_n_s8((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    vrshl_u16(a, vdup_n_s16((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    vrshlq_u16(a, vdupq_n_s16((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    vrshl_u32(a, vdup_n_s32((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    vrshlq_u32(a, vdupq_n_s32((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    vrshl_u64(a, vdup_n_s64((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    vrshlq_u64(a, vdupq_n_s64((-N).try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Rounding shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")]
-        fn vrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-vrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
-}
-
-/// Rounding shift right narrow
-#[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v8i8")]
-        fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-vrshrn_n_s16_(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Rounding shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")]
-        fn vrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-vrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Rounding shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v4i16")]
-        fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-vrshrn_n_s32_(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Rounding shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")]
-        fn vrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-vrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Rounding shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
-#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v2i32")]
-        fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-vrshrn_n_s64_(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Rounding shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    transmute(vrshrn_n_s16::<N>(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Rounding shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    transmute(vrshrn_n_s32::<N>(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Rounding shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    transmute(vrshrn_n_s64::<N>(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_add(a, vrshr_n_s8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_add(a, vrshrq_n_s8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_add(a, vrshr_n_s16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_add(a, vrshrq_n_s16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_add(a, vrshr_n_s32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_add(a, vrshrq_n_s32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_add(a, vrshr_n_s64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_add(a, vrshrq_n_s64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_add(a, vrshr_n_u8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_add(a, vrshrq_n_u8::<N>(b))
-}
-
-/// Unsigned rounding shift right and accumulate
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_add(a, vrshr_n_u16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_add(a, vrshrq_n_u16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_add(a, vrshr_n_u32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right and accumulate
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_add(a, vrshrq_n_u32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))]
+pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Unsigned rounding shift right and accumulate
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_add(a, vrshr_n_u64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i8")]
+        fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+vrshl_s8_(a, b)
 }
 
-/// Unsigned rounding shift right and accumulate
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_add(a, vrshrq_n_u64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v16i8")]
+        fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+vrshlq_s8_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
-    static_assert_imm3!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i16")]
+        fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+vrshl_s16_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
-    static_assert_imm2!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i16")]
+        fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+vrshlq_s16_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
-    static_assert_imm1!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i32")]
+        fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+vrshl_s32_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
-    static_assert!(LANE : i32 where LANE == 0);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i32")]
+        fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+vrshlq_s32_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
-    static_assert_imm3!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v1i64")]
+        fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+vrshl_s64_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
-    static_assert_imm2!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
+pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i64")]
+        fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+vrshlq_s64_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
-    static_assert_imm1!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i8")]
+        fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+    }
+vrshl_u8_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(LANE : i32 where LANE == 0);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v16i8")]
+        fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+    }
+vrshlq_u8_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
-    static_assert_imm3!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i16")]
+        fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+    }
+vrshl_u16_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
-    static_assert_imm2!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i16")]
+        fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+    }
+vrshlq_u16_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift left
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "crypto,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(LANE : i32 where LANE == 0);
-    simd_insert(b, LANE as u32, a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i32")]
+        fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+    }
+vrshl_u32_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
-    static_assert_imm4!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i32")]
+        fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+    }
+vrshlq_u32_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
-    static_assert_imm3!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v1i64")]
+        fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+    }
+vrshl_u64_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
-    static_assert_imm2!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
+pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i64")]
+        fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+    }
+vrshlq_u64_(a, b)
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
-    static_assert_imm1!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    vrshl_s8(a, vdup_n_s8((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
-    static_assert_imm4!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    vrshlq_s8(a, vdupq_n_s8((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
-    static_assert_imm3!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    vrshl_s16(a, vdup_n_s16((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
-    static_assert_imm2!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    vrshlq_s16(a, vdupq_n_s16((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
-    static_assert_imm1!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    vrshl_s32(a, vdup_n_s32((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
-    static_assert_imm4!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    vrshlq_s32(a, vdupq_n_s32((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
-    static_assert_imm3!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    vrshl_s64(a, vdup_n_s64((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Signed rounding shift right
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "crypto,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
-    static_assert_imm1!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    vrshlq_s64(a, vdupq_n_s64((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x2_t {
-    static_assert_imm1!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    vrshl_u8(a, vdup_n_s8((-N).try_into().unwrap()))
 }
 
-/// Insert vector element from another vector element
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32x4_t {
-    static_assert_imm2!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    vrshlq_u8(a, vdupq_n_s8((-N).try_into().unwrap()))
 }
 
-/// Signed Shift left
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
-pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i8")]
-        fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vshl_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    vrshl_u16(a, vdup_n_s16((-N).try_into().unwrap()))
 }
 
-/// Signed Shift left
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
-pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v16i8")]
-        fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vshlq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    vrshlq_u16(a, vdupq_n_s16((-N).try_into().unwrap()))
 }
 
-/// Signed Shift left
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
-pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i16")]
-        fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vshl_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    vrshl_u32(a, vdup_n_s32((-N).try_into().unwrap()))
 }
 
-/// Signed Shift left
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
-pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i16")]
-        fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vshlq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    vrshlq_u32(a, vdupq_n_s32((-N).try_into().unwrap()))
 }
 
-/// Signed Shift left
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
-pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i32")]
-        fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vshl_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    vrshl_u64(a, vdup_n_s64((-N).try_into().unwrap()))
 }
 
-/// Signed Shift left
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
-pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i32")]
-        fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vshlq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    vrshlq_u64(a, vdupq_n_s64((-N).try_into().unwrap()))
 }
 
-/// Signed Shift left
+/// Rounding shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
-pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v1i64")]
-        fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")]
+        fn vrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
-vshl_s64_(a, b)
+vrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
 }
 
-/// Signed Shift left
+/// Rounding shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
-pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i64")]
-        fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v8i8")]
+        fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
     }
-vshlq_s64_(a, b)
+vrshrn_n_s16_(a, N)
 }
 
-/// Unsigned Shift left
+/// Rounding shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
-pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i8")]
-        fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")]
+        fn vrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
-vshl_u8_(a, b)
+vrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 }
 
-/// Unsigned Shift left
+/// Rounding shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
-pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v16i8")]
-        fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v4i16")]
+        fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
     }
-vshlq_u8_(a, b)
+vrshrn_n_s32_(a, N)
 }
 
-/// Unsigned Shift left
+/// Rounding shift right narrow
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
-pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i16")]
-        fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")]
+        fn vrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
-vshl_u16_(a, b)
+vrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 }
 
-/// Unsigned Shift left
+/// Rounding shift right narrow
 #[inline]
+#[cfg(target_arch = "aarch64")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
-pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
     extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i16")]
-        fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v2i32")]
+        fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
     }
-vshlq_u16_(a, b)
+vrshrn_n_s64_(a, N)
 }
 
-/// Unsigned Shift left
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
-pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i32")]
-        fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-vshl_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    transmute(vrshrn_n_s16::<N>(transmute(a)))
 }
 
-/// Unsigned Shift left
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
-pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i32")]
-        fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-vshlq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    transmute(vrshrn_n_s32::<N>(transmute(a)))
 }
 
-/// Unsigned Shift left
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
-pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v1i64")]
-        fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-vshl_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    transmute(vrshrn_n_s64::<N>(transmute(a)))
 }
 
-/// Unsigned Shift left
+/// Signed rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
-pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i64")]
-        fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-    }
-vshlq_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_add(a, vrshr_n_s8::<N>(b))
 }
 
-/// Shift left
+/// Signed rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_imm3!(N);
-    simd_shl(a, vdup_n_s8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_add(a, vrshrq_n_s8::<N>(b))
 }
 
-/// Shift left
+/// Signed rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_imm3!(N);
-    simd_shl(a, vdupq_n_s8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_add(a, vrshr_n_s16::<N>(b))
 }
 
-/// Shift left
+/// Signed rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_imm4!(N);
-    simd_shl(a, vdup_n_s16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_add(a, vrshrq_n_s16::<N>(b))
 }
 
-/// Shift left
+/// Signed rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_imm4!(N);
-    simd_shl(a, vdupq_n_s16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_add(a, vrshr_n_s32::<N>(b))
 }
 
-/// Shift left
+/// Signed rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_imm5!(N);
-    simd_shl(a, vdup_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_add(a, vrshrq_n_s32::<N>(b))
 }
 
-/// Shift left
+/// Signed rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_imm5!(N);
-    simd_shl(a, vdupq_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_add(a, vrshr_n_s64::<N>(b))
 }
 
-/// Shift left
+/// Signed rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_imm3!(N);
-    simd_shl(a, vdup_n_u8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_add(a, vrshrq_n_s64::<N>(b))
 }
 
-/// Shift left
+/// Unsigned rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_imm3!(N);
-    simd_shl(a, vdupq_n_u8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_add(a, vrshr_n_u8::<N>(b))
 }
 
-/// Shift left
+/// Unsigned rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_imm4!(N);
-    simd_shl(a, vdup_n_u16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_add(a, vrshrq_n_u8::<N>(b))
 }
 
-/// Shift left
+/// Unsigned rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_imm4!(N);
-    simd_shl(a, vdupq_n_u16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_add(a, vrshr_n_u16::<N>(b))
 }
 
-/// Shift left
+/// Unsigned rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_imm5!(N);
-    simd_shl(a, vdup_n_u32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_add(a, vrshrq_n_u16::<N>(b))
 }
 
-/// Shift left
+/// Unsigned rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_imm5!(N);
-    simd_shl(a, vdupq_n_u32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_add(a, vrshr_n_u32::<N>(b))
 }
 
-/// Shift left
+/// Unsigned rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert_imm6!(N);
-    simd_shl(a, vdup_n_s64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_add(a, vrshrq_n_u32::<N>(b))
 }
 
-/// Shift left
+/// Unsigned rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_imm6!(N);
-    simd_shl(a, vdupq_n_s64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_add(a, vrshr_n_u64::<N>(b))
 }
 
-/// Shift left
+/// Unsigned rounding shift right and accumulate
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert_imm6!(N);
-    simd_shl(a, vdup_n_u64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_add(a, vrshrq_n_u64::<N>(b))
 }
 
-/// Shift left
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_imm6!(N);
-    simd_shl(a, vdupq_n_u64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
+    static_assert_imm3!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Signed shift left long
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_n_s8<const N: i32>(a: int8x8_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 0 && N <= 8);
-    simd_shl(simd_cast(a), vdupq_n_s16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Signed shift left long
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_n_s16<const N: i32>(a: int16x4_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 0 && N <= 16);
-    simd_shl(simd_cast(a), vdupq_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Signed shift left long
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_n_s32<const N: i32>(a: int32x2_t) -> int64x2_t {
-    static_assert!(N : i32 where N >= 0 && N <= 32);
-    simd_shl(simd_cast(a), vdupq_n_s64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Signed shift left long
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_n_u8<const N: i32>(a: uint8x8_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 0 && N <= 8);
-    simd_shl(simd_cast(a), vdupq_n_u16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
+    static_assert_imm3!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Signed shift left long
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_n_u16<const N: i32>(a: uint16x4_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 0 && N <= 16);
-    simd_shl(simd_cast(a), vdupq_n_u32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
+    static_assert_imm2!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Signed shift left long
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_n_u32<const N: i32>(a: uint32x2_t) -> uint64x2_t {
-    static_assert!(N : i32 where N >= 0 && N <= 32);
-    simd_shl(simd_cast(a), vdupq_n_u64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shr(a, vdup_n_s8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shr(a, vdupq_n_s8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
+    static_assert_imm3!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shr(a, vdup_n_s16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
+    static_assert_imm2!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shr(a, vdupq_n_s16(N.try_into().unwrap()))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "crypto,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shr(a, vdup_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
+    static_assert_imm4!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shr(a, vdupq_n_s32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_shr(a, vdup_n_s64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_shr(a, vdupq_n_s64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shr(a, vdup_n_u8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
+    static_assert_imm4!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shr(a, vdupq_n_u8(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
+    static_assert_imm3!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shr(a, vdup_n_u16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
+    static_assert_imm2!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shr(a, vdupq_n_u16(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shr(a, vdup_n_u32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
+    static_assert_imm4!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shr(a, vdupq_n_u32(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
+    static_assert_imm3!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "crypto,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(b, LANE as u32, a)
+}
+
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_shr(a, vdup_n_u64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_shr(a, vdupq_n_u64(N.try_into().unwrap()))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32x4_t {
+    static_assert_imm2!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Shift right narrow
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_cast(simd_shr(a, vdupq_n_s16(N.try_into().unwrap())))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i8")]
+        fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+vshl_s8_(a, b)
 }
 
-/// Shift right narrow
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_cast(simd_shr(a, vdupq_n_s32(N.try_into().unwrap())))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v16i8")]
+        fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+vshlq_s8_(a, b)
 }
 
-/// Shift right narrow
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_cast(simd_shr(a, vdupq_n_s64(N.try_into().unwrap())))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i16")]
+        fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+vshl_s16_(a, b)
 }
 
-/// Shift right narrow
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_cast(simd_shr(a, vdupq_n_u16(N.try_into().unwrap())))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i16")]
+        fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+vshlq_s16_(a, b)
 }
 
-/// Shift right narrow
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_cast(simd_shr(a, vdupq_n_u32(N.try_into().unwrap())))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i32")]
+        fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+vshl_s32_(a, b)
 }
 
-/// Shift right narrow
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_cast(simd_shr(a, vdupq_n_u64(N.try_into().unwrap())))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i32")]
+        fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+vshlq_s32_(a, b)
 }
 
-/// Signed shift right and accumulate
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_add(a, vshr_n_s8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v1i64")]
+        fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+vshl_s64_(a, b)
 }
 
-/// Signed shift right and accumulate
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_add(a, vshrq_n_s8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
+pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i64")]
+        fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+vshlq_s64_(a, b)
 }
 
-/// Signed shift right and accumulate
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_add(a, vshr_n_s16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i8")]
+        fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+    }
+vshl_u8_(a, b)
 }
 
-/// Signed shift right and accumulate
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_add(a, vshrq_n_s16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v16i8")]
+        fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+    }
+vshlq_u8_(a, b)
 }
 
-/// Signed shift right and accumulate
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_add(a, vshr_n_s32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i16")]
+        fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+    }
+vshl_u16_(a, b)
 }
 
-/// Signed shift right and accumulate
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_add(a, vshrq_n_s32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i16")]
+        fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+    }
+vshlq_u16_(a, b)
 }
 
-/// Signed shift right and accumulate
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_add(a, vshr_n_s64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i32")]
+        fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+    }
+vshl_u32_(a, b)
 }
 
-/// Signed shift right and accumulate
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_add(a, vshrq_n_s64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i32")]
+        fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+    }
+vshlq_u32_(a, b)
 }
 
-/// Unsigned shift right and accumulate
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_add(a, vshr_n_u8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v1i64")]
+        fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+    }
+vshl_u64_(a, b)
 }
 
-/// Unsigned shift right and accumulate
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_add(a, vshrq_n_u8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
+pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i64")]
+        fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+    }
+vshlq_u64_(a, b)
 }
 
-/// Unsigned shift right and accumulate
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_add(a, vshr_n_u16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_imm3!(N);
+    simd_shl(a, vdup_n_s8(N.try_into().unwrap()))
 }
 
-/// Unsigned shift right and accumulate
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_add(a, vshrq_n_u16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_imm3!(N);
+    simd_shl(a, vdupq_n_s8(N.try_into().unwrap()))
 }
 
-/// Unsigned shift right and accumulate
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_add(a, vshr_n_u32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_imm4!(N);
+    simd_shl(a, vdup_n_s16(N.try_into().unwrap()))
 }
 
-/// Unsigned shift right and accumulate
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_add(a, vshrq_n_u32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_imm4!(N);
+    simd_shl(a, vdupq_n_s16(N.try_into().unwrap()))
 }
 
-/// Unsigned shift right and accumulate
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_add(a, vshr_n_u64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_imm5!(N);
+    simd_shl(a, vdup_n_s32(N.try_into().unwrap()))
 }
 
-/// Unsigned shift right and accumulate
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    simd_add(a, vshrq_n_u64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_imm5!(N);
+    simd_shl(a, vdupq_n_s32(N.try_into().unwrap()))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
-pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    let d: uint8x8_t = vabd_u8(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_imm3!(N);
+    simd_shl(a, vdup_n_u8(N.try_into().unwrap()))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
-pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    let d: uint16x4_t = vabd_u16(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_imm3!(N);
+    simd_shl(a, vdupq_n_u8(N.try_into().unwrap()))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
-pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    let d: uint32x2_t = vabd_u32(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_imm4!(N);
+    simd_shl(a, vdup_n_u16(N.try_into().unwrap()))
 }
 
-/// Signed Absolute difference and Accumulate Long
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
-pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    let d: int8x8_t = vabd_s8(b, c);
-    let e: uint8x8_t = simd_cast(d);
-    simd_add(a, simd_cast(e))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_imm4!(N);
+    simd_shl(a, vdupq_n_u16(N.try_into().unwrap()))
 }
 
-/// Signed Absolute difference and Accumulate Long
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
-pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    let d: int16x4_t = vabd_s16(b, c);
-    let e: uint16x4_t = simd_cast(d);
-    simd_add(a, simd_cast(e))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_imm5!(N);
+    simd_shl(a, vdup_n_u32(N.try_into().unwrap()))
 }
 
-/// Signed Absolute difference and Accumulate Long
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
-pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    let d: int32x2_t = vabd_s32(b, c);
-    let e: uint32x2_t = simd_cast(d);
-    simd_add(a, simd_cast(e))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_imm5!(N);
+    simd_shl(a, vdupq_n_u32(N.try_into().unwrap()))
 }
 
-/// Singned saturating Absolute value
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
-pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i8")]
-        fn vqabs_s8_(a: int8x8_t) -> int8x8_t;
-    }
-vqabs_s8_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert_imm6!(N);
+    simd_shl(a, vdup_n_s64(N.try_into().unwrap()))
 }
 
-/// Singned saturating Absolute value
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
-pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v16i8")]
-        fn vqabsq_s8_(a: int8x16_t) -> int8x16_t;
-    }
-vqabsq_s8_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_imm6!(N);
+    simd_shl(a, vdupq_n_s64(N.try_into().unwrap()))
 }
 
-/// Singned saturating Absolute value
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
-pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i16")]
-        fn vqabs_s16_(a: int16x4_t) -> int16x4_t;
-    }
-vqabs_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert_imm6!(N);
+    simd_shl(a, vdup_n_u64(N.try_into().unwrap()))
 }
 
-/// Singned saturating Absolute value
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
-pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i16")]
-        fn vqabsq_s16_(a: int16x8_t) -> int16x8_t;
-    }
-vqabsq_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_imm6!(N);
+    simd_shl(a, vdupq_n_u64(N.try_into().unwrap()))
 }
 
-/// Singned saturating Absolute value
+/// Signed shift left long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
-pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i32")]
-        fn vqabs_s32_(a: int32x2_t) -> int32x2_t;
-    }
-vqabs_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_n_s8<const N: i32>(a: int8x8_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 0 && N <= 8);
+    simd_shl(simd_cast(a), vdupq_n_s16(N.try_into().unwrap()))
 }
 
-/// Singned saturating Absolute value
+/// Signed shift left long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
-pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i32")]
-        fn vqabsq_s32_(a: int32x4_t) -> int32x4_t;
-    }
-vqabsq_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_n_s16<const N: i32>(a: int16x4_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 0 && N <= 16);
+    simd_shl(simd_cast(a), vdupq_n_s32(N.try_into().unwrap()))
 }
 
-#[cfg(test)]
-#[allow(overflowing_literals)]
-mod test {
-    use super::*;
-    use crate::core_arch::simd::*;
-    use std::mem::transmute;
-    use stdarch_test::simd_test;
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_n_s32<const N: i32>(a: int32x2_t) -> int64x2_t {
+    static_assert!(N : i32 where N >= 0 && N <= 32);
+    simd_shl(simd_cast(a), vdupq_n_s64(N.try_into().unwrap()))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s8() {
-        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_n_u8<const N: i32>(a: uint8x8_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 0 && N <= 8);
+    simd_shl(simd_cast(a), vdupq_n_u16(N.try_into().unwrap()))
+}
 
-        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_n_u16<const N: i32>(a: uint16x4_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 0 && N <= 16);
+    simd_shl(simd_cast(a), vdupq_n_u32(N.try_into().unwrap()))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s8() {
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let b: i8x16 = i8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_n_u32<const N: i32>(a: uint32x2_t) -> uint64x2_t {
+    static_assert!(N : i32 where N >= 0 && N <= 32);
+    simd_shl(simd_cast(a), vdupq_n_u64(N.try_into().unwrap()))
+}
 
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shr(a, vdup_n_s8(N.try_into().unwrap()))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s16() {
-        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shr(a, vdupq_n_s8(N.try_into().unwrap()))
+}
 
-        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
-        let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b)));
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shr(a, vdup_n_s16(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shr(a, vdupq_n_s16(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shr(a, vdup_n_s32(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shr(a, vdupq_n_s32(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_shr(a, vdup_n_s64(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_shr(a, vdupq_n_s64(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shr(a, vdup_n_u8(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shr(a, vdupq_n_u8(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shr(a, vdup_n_u16(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shr(a, vdupq_n_u16(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shr(a, vdup_n_u32(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shr(a, vdupq_n_u32(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_shr(a, vdup_n_u64(N.try_into().unwrap()))
+}
+
+/// Shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_shr(a, vdupq_n_u64(N.try_into().unwrap()))
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_cast(simd_shr(a, vdupq_n_s16(N.try_into().unwrap())))
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_cast(simd_shr(a, vdupq_n_s32(N.try_into().unwrap())))
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_cast(simd_shr(a, vdupq_n_s64(N.try_into().unwrap())))
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_cast(simd_shr(a, vdupq_n_u16(N.try_into().unwrap())))
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_cast(simd_shr(a, vdupq_n_u32(N.try_into().unwrap())))
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_cast(simd_shr(a, vdupq_n_u64(N.try_into().unwrap())))
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_add(a, vshr_n_s8::<N>(b))
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_add(a, vshrq_n_s8::<N>(b))
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_add(a, vshr_n_s16::<N>(b))
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_add(a, vshrq_n_s16::<N>(b))
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_add(a, vshr_n_s32::<N>(b))
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_add(a, vshrq_n_s32::<N>(b))
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_add(a, vshr_n_s64::<N>(b))
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_add(a, vshrq_n_s64::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_add(a, vshr_n_u8::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_add(a, vshrq_n_u8::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_add(a, vshr_n_u16::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_add(a, vshrq_n_u16::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_add(a, vshr_n_u32::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_add(a, vshrq_n_u32::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_add(a, vshr_n_u64::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    simd_add(a, vshrq_n_u64::<N>(b))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    let d: uint8x8_t = vabd_u8(b, c);
+    simd_add(a, simd_cast(d))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    let d: uint16x4_t = vabd_u16(b, c);
+    simd_add(a, simd_cast(d))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    let d: uint32x2_t = vabd_u32(b, c);
+    simd_add(a, simd_cast(d))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
+pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    let d: int8x8_t = vabd_s8(b, c);
+    let e: uint8x8_t = simd_cast(d);
+    simd_add(a, simd_cast(e))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
+pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    let d: int16x4_t = vabd_s16(b, c);
+    let e: uint16x4_t = simd_cast(d);
+    simd_add(a, simd_cast(e))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
+pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    let d: int32x2_t = vabd_s32(b, c);
+    let e: uint32x2_t = simd_cast(d);
+    simd_add(a, simd_cast(e))
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i8")]
+        fn vqabs_s8_(a: int8x8_t) -> int8x8_t;
+    }
+vqabs_s8_(a)
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v16i8")]
+        fn vqabsq_s8_(a: int8x16_t) -> int8x16_t;
+    }
+vqabsq_s8_(a)
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i16")]
+        fn vqabs_s16_(a: int16x4_t) -> int16x4_t;
+    }
+vqabs_s16_(a)
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i16")]
+        fn vqabsq_s16_(a: int16x8_t) -> int16x8_t;
+    }
+vqabsq_s16_(a)
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i32")]
+        fn vqabs_s32_(a: int32x2_t) -> int32x2_t;
+    }
+vqabs_s32_(a)
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
+pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i32")]
+        fn vqabsq_s32_(a: int32x4_t) -> int32x4_t;
+    }
+vqabsq_s32_(a)
+}
+
+#[cfg(test)]
+#[allow(overflowing_literals)]
+mod test {
+    use super::*;
+    use crate::core_arch::simd::*;
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vand_s8() {
+        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i8x8 = i8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
+        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vandq_s8() {
+        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
+        let b: i8x16 = i8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
+        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
+        let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
+        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vand_s16() {
+        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i16x4 = i16x4::new(0x0F, 0x0F, 0x0F, 0x0F);
+        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
+        let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vandq_s16() {
+        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i16x8 = i16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
+        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vand_s32() {
+        let a: i32x2 = i32x2::new(0x00, 0x01);
+        let b: i32x2 = i32x2::new(0x0F, 0x0F);
+        let e: i32x2 = i32x2::new(0x00, 0x01);
+        let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i32x2 = i32x2::new(0x00, 0x01);
+        let b: i32x2 = i32x2::new(0x00, 0x00);
+        let e: i32x2 = i32x2::new(0x00, 0x00);
+        let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vandq_s32() {
+        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i32x4 = i32x4::new(0x0F, 0x0F, 0x0F, 0x0F);
+        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
+        let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vand_u8() {
+        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u8x8 = u8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
+        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vandq_u8() {
+        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
+        let b: u8x16 = u8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
+        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
+        let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
+        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vand_u16() {
+        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u16x4 = u16x4::new(0x0F, 0x0F, 0x0F, 0x0F);
+        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
+        let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vandq_u16() {
+        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u16x8 = u16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
+        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vand_u32() {
+        let a: u32x2 = u32x2::new(0x00, 0x01);
+        let b: u32x2 = u32x2::new(0x0F, 0x0F);
+        let e: u32x2 = u32x2::new(0x00, 0x01);
+        let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u32x2 = u32x2::new(0x00, 0x01);
+        let b: u32x2 = u32x2::new(0x00, 0x00);
+        let e: u32x2 = u32x2::new(0x00, 0x00);
+        let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vandq_u32() {
+        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u32x4 = u32x4::new(0x0F, 0x0F, 0x0F, 0x0F);
+        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
+        let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vand_s64() {
+        let a: i64x1 = i64x1::new(0x00);
+        let b: i64x1 = i64x1::new(0x0F);
+        let e: i64x1 = i64x1::new(0x00);
+        let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x1 = i64x1::new(0x00);
+        let b: i64x1 = i64x1::new(0x00);
+        let e: i64x1 = i64x1::new(0x00);
+        let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vandq_s64() {
+        let a: i64x2 = i64x2::new(0x00, 0x01);
+        let b: i64x2 = i64x2::new(0x0F, 0x0F);
+        let e: i64x2 = i64x2::new(0x00, 0x01);
+        let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x2 = i64x2::new(0x00, 0x01);
+        let b: i64x2 = i64x2::new(0x00, 0x00);
+        let e: i64x2 = i64x2::new(0x00, 0x00);
+        let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vand_u64() {
+        let a: u64x1 = u64x1::new(0x00);
+        let b: u64x1 = u64x1::new(0x0F);
+        let e: u64x1 = u64x1::new(0x00);
+        let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u64x1 = u64x1::new(0x00);
+        let b: u64x1 = u64x1::new(0x00);
+        let e: u64x1 = u64x1::new(0x00);
+        let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vandq_u64() {
+        let a: u64x2 = u64x2::new(0x00, 0x01);
+        let b: u64x2 = u64x2::new(0x0F, 0x0F);
+        let e: u64x2 = u64x2::new(0x00, 0x01);
+        let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u64x2 = u64x2::new(0x00, 0x01);
+        let b: u64x2 = u64x2::new(0x00, 0x00);
+        let e: u64x2 = u64x2::new(0x00, 0x00);
+        let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorr_s8() {
+        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: i8x8 = transmute(vorr_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorrq_s8() {
+        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let r: i8x16 = transmute(vorrq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorr_s16() {
+        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: i16x4 = transmute(vorr_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorrq_s16() {
+        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: i16x8 = transmute(vorrq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorr_s32() {
+        let a: i32x2 = i32x2::new(0x00, 0x01);
+        let b: i32x2 = i32x2::new(0x00, 0x00);
+        let e: i32x2 = i32x2::new(0x00, 0x01);
+        let r: i32x2 = transmute(vorr_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorrq_s32() {
+        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: i32x4 = transmute(vorrq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorr_u8() {
+        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: u8x8 = transmute(vorr_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorrq_u8() {
+        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let r: u8x16 = transmute(vorrq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorr_u16() {
+        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: u16x4 = transmute(vorr_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorrq_u16() {
+        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: u16x8 = transmute(vorrq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorr_u32() {
+        let a: u32x2 = u32x2::new(0x00, 0x01);
+        let b: u32x2 = u32x2::new(0x00, 0x00);
+        let e: u32x2 = u32x2::new(0x00, 0x01);
+        let r: u32x2 = transmute(vorr_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorrq_u32() {
+        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: u32x4 = transmute(vorrq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorr_s64() {
+        let a: i64x1 = i64x1::new(0x00);
+        let b: i64x1 = i64x1::new(0x00);
+        let e: i64x1 = i64x1::new(0x00);
+        let r: i64x1 = transmute(vorr_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorrq_s64() {
+        let a: i64x2 = i64x2::new(0x00, 0x01);
+        let b: i64x2 = i64x2::new(0x00, 0x00);
+        let e: i64x2 = i64x2::new(0x00, 0x01);
+        let r: i64x2 = transmute(vorrq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorr_u64() {
+        let a: u64x1 = u64x1::new(0x00);
+        let b: u64x1 = u64x1::new(0x00);
+        let e: u64x1 = u64x1::new(0x00);
+        let r: u64x1 = transmute(vorr_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vorrq_u64() {
+        let a: u64x2 = u64x2::new(0x00, 0x01);
+        let b: u64x2 = u64x2::new(0x00, 0x00);
+        let e: u64x2 = u64x2::new(0x00, 0x01);
+        let r: u64x2 = transmute(vorrq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor_s8() {
+        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: i8x8 = transmute(veor_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veorq_s8() {
+        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let r: i8x16 = transmute(veorq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor_s16() {
+        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: i16x4 = transmute(veor_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veorq_s16() {
+        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: i16x8 = transmute(veorq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor_s32() {
+        let a: i32x2 = i32x2::new(0x00, 0x01);
+        let b: i32x2 = i32x2::new(0x00, 0x00);
+        let e: i32x2 = i32x2::new(0x00, 0x01);
+        let r: i32x2 = transmute(veor_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veorq_s32() {
+        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: i32x4 = transmute(veorq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor_u8() {
+        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: u8x8 = transmute(veor_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veorq_u8() {
+        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let r: u8x16 = transmute(veorq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor_u16() {
+        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: u16x4 = transmute(veor_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veorq_u16() {
+        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: u16x8 = transmute(veorq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor_u32() {
+        let a: u32x2 = u32x2::new(0x00, 0x01);
+        let b: u32x2 = u32x2::new(0x00, 0x00);
+        let e: u32x2 = u32x2::new(0x00, 0x01);
+        let r: u32x2 = transmute(veor_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veorq_u32() {
+        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: u32x4 = transmute(veorq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor_s64() {
+        let a: i64x1 = i64x1::new(0x00);
+        let b: i64x1 = i64x1::new(0x00);
+        let e: i64x1 = i64x1::new(0x00);
+        let r: i64x1 = transmute(veor_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veorq_s64() {
+        let a: i64x2 = i64x2::new(0x00, 0x01);
+        let b: i64x2 = i64x2::new(0x00, 0x00);
+        let e: i64x2 = i64x2::new(0x00, 0x01);
+        let r: i64x2 = transmute(veorq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor_u64() {
+        let a: u64x1 = u64x1::new(0x00);
+        let b: u64x1 = u64x1::new(0x00);
+        let e: u64x1 = u64x1::new(0x00);
+        let r: u64x1 = transmute(veor_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veorq_u64() {
+        let a: u64x2 = u64x2::new(0x00, 0x01);
+        let b: u64x2 = u64x2::new(0x00, 0x00);
+        let e: u64x2 = u64x2::new(0x00, 0x01);
+        let r: u64x2 = transmute(veorq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
+        let e: i8x8 = i8x8::new(15, 13, 11, 9, 7, 5, 3, 1);
+        let r: i8x8 = transmute(vabd_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+        let e: i8x16 = i8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15);
+        let r: i8x16 = transmute(vabdq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(16, 15, 14, 13);
+        let e: i16x4 = i16x4::new(15, 13, 11, 9);
+        let r: i16x4 = transmute(vabd_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
+        let e: i16x8 = i16x8::new(15, 13, 11, 9, 7, 5, 3, 1);
+        let r: i16x8 = transmute(vabdq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x2 = i32x2::new(16, 15);
+        let e: i32x2 = i32x2::new(15, 13);
+        let r: i32x2 = transmute(vabd_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x4 = i32x4::new(16, 15, 14, 13);
+        let e: i32x4 = i32x4::new(15, 13, 11, 9);
+        let r: i32x4 = transmute(vabdq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
+        let e: u8x8 = u8x8::new(15, 13, 11, 9, 7, 5, 3, 1);
+        let r: u8x8 = transmute(vabd_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+        let e: u8x16 = u8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15);
+        let r: u8x16 = transmute(vabdq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let b: u16x4 = u16x4::new(16, 15, 14, 13);
+        let e: u16x4 = u16x4::new(15, 13, 11, 9);
+        let r: u16x4 = transmute(vabd_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
+        let e: u16x8 = u16x8::new(15, 13, 11, 9, 7, 5, 3, 1);
+        let r: u16x8 = transmute(vabdq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x2 = u32x2::new(16, 15);
+        let e: u32x2 = u32x2::new(15, 13);
+        let r: u32x2 = transmute(vabd_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(16, 15, 14, 13);
+        let e: u32x4 = u32x4::new(15, 13, 11, 9);
+        let r: u32x4 = transmute(vabdq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_f32() {
+        let a: f32x2 = f32x2::new(1.0, 2.0);
+        let b: f32x2 = f32x2::new(9.0, 3.0);
+        let e: f32x2 = f32x2::new(8.0, 1.0);
+        let r: f32x2 = transmute(vabd_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_f32() {
+        let a: f32x4 = f32x4::new(1.0, 2.0, 5.0, -4.0);
+        let b: f32x4 = f32x4::new(9.0, 3.0, 2.0, 8.0);
+        let e: f32x4 = f32x4::new(8.0, 1.0, 3.0, 12.0);
+        let r: f32x4 = transmute(vabdq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s16() {
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vabdl_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 3, 4, 4, 3, 2, 1);
+        let b: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
+        let e: u16x8 = u16x8::new(9, 8, 7, 6, 6, 7, 8, 9);
+        let r: u16x8 = transmute(vabdl_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
+    }
 
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let b: u16x4 = u16x4::new(10, 10, 10, 10);
+        let e: u32x4 = u32x4::new(9, 8, 7, 6);
+        let r: u32x4 = transmute(vabdl_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s32() {
-        let a: i32x2 = i32x2::new(0x00, 0x01);
-        let b: i32x2 = i32x2::new(0x0F, 0x0F);
-        let e: i32x2 = i32x2::new(0x00, 0x01);
-        let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b)));
+    unsafe fn test_vabdl_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x2 = u32x2::new(10, 10);
+        let e: u64x2 = u64x2::new(9, 8);
+        let r: u64x2 = transmute(vabdl_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
+    }
 
-        let a: i32x2 = i32x2::new(0x00, 0x01);
-        let b: i32x2 = i32x2::new(0x00, 0x00);
-        let e: i32x2 = i32x2::new(0x00, 0x00);
-        let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 4, 3, 2, 1);
+        let b: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
+        let e: i16x8 = i16x8::new(9, 8, 7, 6, 6, 7, 8, 9);
+        let r: i16x8 = transmute(vabdl_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s32() {
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vabdl_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 11, 12);
+        let b: i16x4 = i16x4::new(10, 10, 10, 10);
+        let e: i32x4 = i32x4::new(9, 8, 1, 2);
+        let r: i32x4 = transmute(vabdl_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
+    }
 
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_s32() {
+        let a: i32x2 = i32x2::new(1, 11);
+        let b: i32x2 = i32x2::new(10, 10);
+        let e: i64x2 = i64x2::new(9, 1);
+        let r: i64x2 = transmute(vabdl_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u8() {
-        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b)));
+    unsafe fn test_vceq_u8() {
+        let a: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b)));
+        let a: u8x8 = u8x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u8x8 = u8x8::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
+        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
+        let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u8() {
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let b: u8x16 = u8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vceqq_u8() {
+        let a: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF);
+        let b: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b)));
+        let a: u8x16 = u8x16::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0xFF);
+        let b: u8x16 = u8x16::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0);
+        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
+        let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u16() {
-        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b)));
+    unsafe fn test_vceq_u16() {
+        let a: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03);
+        let b: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
-        let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b)));
+        let a: u16x4 = u16x4::new(0, 0, 0x02, 0x03);
+        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0x02, 0x04);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
+        let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u16() {
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vceqq_u16() {
+        let a: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b)));
+        let a: u16x8 = u16x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
+        let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u32() {
-        let a: u32x2 = u32x2::new(0x00, 0x01);
-        let b: u32x2 = u32x2::new(0x0F, 0x0F);
-        let e: u32x2 = u32x2::new(0x00, 0x01);
-        let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b)));
+    unsafe fn test_vceq_u32() {
+        let a: u32x2 = u32x2::new(0, 0x01);
+        let b: u32x2 = u32x2::new(0, 0x01);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: u32x2 = u32x2::new(0x00, 0x01);
-        let b: u32x2 = u32x2::new(0x00, 0x00);
-        let e: u32x2 = u32x2::new(0x00, 0x00);
-        let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b)));
+        let a: u32x2 = u32x2::new(0, 0);
+        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u32() {
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vceqq_u32() {
+        let a: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03);
+        let b: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b)));
+        let a: u32x4 = u32x4::new(0, 0, 0x02, 0x03);
+        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x02, 0x04);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
+        let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s64() {
-        let a: i64x1 = i64x1::new(0x00);
-        let b: i64x1 = i64x1::new(0x0F);
-        let e: i64x1 = i64x1::new(0x00);
-        let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b)));
+    unsafe fn test_vceq_s8() {
+        let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: i64x1 = i64x1::new(0x00);
-        let b: i64x1 = i64x1::new(0x00);
-        let e: i64x1 = i64x1::new(0x00);
-        let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b)));
+        let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
+        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
+        let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s64() {
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x0F, 0x0F);
-        let e: i64x2 = i64x2::new(0x00, 0x01);
-        let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vceqq_s8() {
+        let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
+        let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x00, 0x00);
-        let e: i64x2 = i64x2::new(0x00, 0x00);
-        let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b)));
+        let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F);
+        let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128);
+        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
+        let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u64() {
-        let a: u64x1 = u64x1::new(0x00);
-        let b: u64x1 = u64x1::new(0x0F);
-        let e: u64x1 = u64x1::new(0x00);
-        let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b)));
+    unsafe fn test_vceq_s16() {
+        let a: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03);
+        let b: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: u64x1 = u64x1::new(0x00);
-        let b: u64x1 = u64x1::new(0x00);
-        let e: u64x1 = u64x1::new(0x00);
-        let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b)));
+        let a: i16x4 = i16x4::new(-32768, -32768, 0x02, 0x03);
+        let b: i16x4 = i16x4::new(-32768, 0x7F_FF, 0x02, 0x04);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
+        let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u64() {
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x0F, 0x0F);
-        let e: u64x2 = u64x2::new(0x00, 0x01);
-        let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vceqq_s16() {
+        let a: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
 
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x00, 0x00);
-        let e: u64x2 = u64x2::new(0x00, 0x00);
-        let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b)));
+        let a: i16x8 = i16x8::new(-32768, -32768, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i16x8 = i16x8::new(-32768, 0x7F_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
+        let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s8() {
-        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i8x8 = transmute(vorr_s8(transmute(a), transmute(b)));
+    unsafe fn test_vceq_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, 0x01);
+        let b: i32x2 = i32x2::new(-2147483648, 0x01);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i32x2 = i32x2::new(-2147483648, -2147483648);
+        let b: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s8() {
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: i8x16 = transmute(vorrq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vceqq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03);
+        let b: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
-    }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s16() {
-        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i16x4 = transmute(vorr_s16(transmute(a), transmute(b)));
+        let a: i32x4 = i32x4::new(-2147483648, -2147483648, 0x02, 0x03);
+        let b: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 0x02, 0x04);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
+        let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s16() {
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i16x8 = transmute(vorrq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vceq_p8() {
+        let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b)));
         assert_eq!(r, e);
-    }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s32() {
-        let a: i32x2 = i32x2::new(0x00, 0x01);
-        let b: i32x2 = i32x2::new(0x00, 0x00);
-        let e: i32x2 = i32x2::new(0x00, 0x01);
-        let r: i32x2 = transmute(vorr_s32(transmute(a), transmute(b)));
+        let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
+        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
+        let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s32() {
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i32x4 = transmute(vorrq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vceqq_p8() {
+        let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
+        let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b)));
         assert_eq!(r, e);
-    }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u8() {
-        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u8x8 = transmute(vorr_u8(transmute(a), transmute(b)));
+        let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F);
+        let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128);
+        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
+        let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u8() {
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: u8x16 = transmute(vorrq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vceq_f32() {
+        let a: f32x2 = f32x2::new(1.2, 3.4);
+        let b: f32x2 = f32x2::new(1.2, 3.4);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vceq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u16() {
-        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u16x4 = transmute(vorr_u16(transmute(a), transmute(b)));
+    unsafe fn test_vceqq_f32() {
+        let a: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8);
+        let b: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vceqq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u16() {
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u16x8 = transmute(vorrq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vtst_s8() {
+        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vtst_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u32() {
-        let a: u32x2 = u32x2::new(0x00, 0x01);
-        let b: u32x2 = u32x2::new(0x00, 0x00);
-        let e: u32x2 = u32x2::new(0x00, 0x01);
-        let r: u32x2 = transmute(vorr_u32(transmute(a), transmute(b)));
+    unsafe fn test_vtstq_s8() {
+        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
+        let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
+        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vtstq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u32() {
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u32x4 = transmute(vorrq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vtst_s16() {
+        let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
+        let b: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vtst_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s64() {
-        let a: i64x1 = i64x1::new(0x00);
-        let b: i64x1 = i64x1::new(0x00);
-        let e: i64x1 = i64x1::new(0x00);
-        let r: i64x1 = transmute(vorr_s64(transmute(a), transmute(b)));
+    unsafe fn test_vtstq_s16() {
+        let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let b: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vtstq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s64() {
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x00, 0x00);
-        let e: i64x2 = i64x2::new(0x00, 0x01);
-        let r: i64x2 = transmute(vorrq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vtst_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, 0x00);
+        let b: i32x2 = i32x2::new(-2147483648, 0x00);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let r: u32x2 = transmute(vtst_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u64() {
-        let a: u64x1 = u64x1::new(0x00);
-        let b: u64x1 = u64x1::new(0x00);
-        let e: u64x1 = u64x1::new(0x00);
-        let r: u64x1 = transmute(vorr_u64(transmute(a), transmute(b)));
+    unsafe fn test_vtstq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
+        let b: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vtstq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u64() {
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x00, 0x00);
-        let e: u64x2 = u64x2::new(0x00, 0x01);
-        let r: u64x2 = transmute(vorrq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vtst_p8() {
+        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vtst_p8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s8() {
-        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i8x8 = transmute(veor_s8(transmute(a), transmute(b)));
+    unsafe fn test_vtstq_p8() {
+        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
+        let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
+        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vtstq_p8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s8() {
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: i8x16 = transmute(veorq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vtst_u8() {
+        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let b: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vtst_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s16() {
-        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i16x4 = transmute(veor_s16(transmute(a), transmute(b)));
+    unsafe fn test_vtstq_u8() {
+        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
+        let b: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
+        let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vtstq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s16() {
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i16x8 = transmute(veorq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vtst_u16() {
+        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
+        let b: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
+        let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vtst_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s32() {
-        let a: i32x2 = i32x2::new(0x00, 0x01);
-        let b: i32x2 = i32x2::new(0x00, 0x00);
-        let e: i32x2 = i32x2::new(0x00, 0x01);
-        let r: i32x2 = transmute(veor_s32(transmute(a), transmute(b)));
+    unsafe fn test_vtstq_u16() {
+        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let b: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vtstq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s32() {
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i32x4 = transmute(veorq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vtst_u32() {
+        let a: u32x2 = u32x2::new(0, 0x00);
+        let b: u32x2 = u32x2::new(0, 0x00);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vtst_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u8() {
-        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u8x8 = transmute(veor_u8(transmute(a), transmute(b)));
+    unsafe fn test_vtstq_u32() {
+        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
+        let b: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
+        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vtstq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u8() {
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: u8x16 = transmute(veorq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vabs_f32() {
+        let a: f32x2 = f32x2::new(-0.1, -2.2);
+        let e: f32x2 = f32x2::new(0.1, 2.2);
+        let r: f32x2 = transmute(vabs_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u16() {
-        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u16x4 = transmute(veor_u16(transmute(a), transmute(b)));
+    unsafe fn test_vabsq_f32() {
+        let a: f32x4 = f32x4::new(-0.1, -2.2, -3.3, -6.6);
+        let e: f32x4 = f32x4::new(0.1, 2.2, 3.3, 6.6);
+        let r: f32x4 = transmute(vabsq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u16() {
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u16x8 = transmute(veorq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcgt_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vcgt_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcgtq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u32() {
-        let a: u32x2 = u32x2::new(0x00, 0x01);
-        let b: u32x2 = u32x2::new(0x00, 0x00);
-        let e: u32x2 = u32x2::new(0x00, 0x01);
-        let r: u32x2 = transmute(veor_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcgt_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(0, 1, 2, 3);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vcgt_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u32() {
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u32x4 = transmute(veorq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcgtq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcgtq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s64() {
-        let a: i64x1 = i64x1::new(0x00);
-        let b: i64x1 = i64x1::new(0x00);
-        let e: i64x1 = i64x1::new(0x00);
-        let r: i64x1 = transmute(veor_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcgt_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x2 = i32x2::new(0, 1);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcgt_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s64() {
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x00, 0x00);
-        let e: i64x2 = i64x2::new(0x00, 0x01);
-        let r: i64x2 = transmute(veorq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcgtq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x4 = i32x4::new(0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgtq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u64() {
-        let a: u64x1 = u64x1::new(0x00);
-        let b: u64x1 = u64x1::new(0x00);
-        let e: u64x1 = u64x1::new(0x00);
-        let r: u64x1 = transmute(veor_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcgt_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vcgt_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u64() {
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x00, 0x00);
-        let e: u64x2 = u64x2::new(0x00, 0x01);
-        let r: u64x2 = transmute(veorq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcgtq_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcgtq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: i8x8 = i8x8::new(15, 13, 11, 9, 7, 5, 3, 1);
-        let r: i8x8 = transmute(vabd_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcgt_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let b: u16x4 = u16x4::new(0, 1, 2, 3);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vcgt_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-        let e: i8x16 = i8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15);
-        let r: i8x16 = transmute(vabdq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcgtq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcgtq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(16, 15, 14, 13);
-        let e: i16x4 = i16x4::new(15, 13, 11, 9);
-        let r: i16x4 = transmute(vabd_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcgt_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x2 = u32x2::new(0, 1);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcgt_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: i16x8 = i16x8::new(15, 13, 11, 9, 7, 5, 3, 1);
-        let r: i16x8 = transmute(vabdq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcgtq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgtq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(16, 15);
-        let e: i32x2 = i32x2::new(15, 13);
-        let r: i32x2 = transmute(vabd_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcgt_f32() {
+        let a: f32x2 = f32x2::new(1.2, 2.3);
+        let b: f32x2 = f32x2::new(0.1, 1.2);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcgt_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(16, 15, 14, 13);
-        let e: i32x4 = i32x4::new(15, 13, 11, 9);
-        let r: i32x4 = transmute(vabdq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcgtq_f32() {
+        let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
+        let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgtq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: u8x8 = u8x8::new(15, 13, 11, 9, 7, 5, 3, 1);
-        let r: u8x8 = transmute(vabd_u8(transmute(a), transmute(b)));
+    unsafe fn test_vclt_s8() {
+        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vclt_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-        let e: u8x16 = u8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15);
-        let r: u8x16 = transmute(vabdq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vcltq_s8() {
+        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcltq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(16, 15, 14, 13);
-        let e: u16x4 = u16x4::new(15, 13, 11, 9);
-        let r: u16x4 = transmute(vabd_u16(transmute(a), transmute(b)));
+    unsafe fn test_vclt_s16() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(1, 2, 3, 4);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vclt_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: u16x8 = u16x8::new(15, 13, 11, 9, 7, 5, 3, 1);
-        let r: u16x8 = transmute(vabdq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcltq_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcltq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(16, 15);
-        let e: u32x2 = u32x2::new(15, 13);
-        let r: u32x2 = transmute(vabd_u32(transmute(a), transmute(b)));
+    unsafe fn test_vclt_s32() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i32x2 = i32x2::new(1, 2);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vclt_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(16, 15, 14, 13);
-        let e: u32x4 = u32x4::new(15, 13, 11, 9);
-        let r: u32x4 = transmute(vabdq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcltq_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcltq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let b: f32x2 = f32x2::new(9.0, 3.0);
-        let e: f32x2 = f32x2::new(8.0, 1.0);
-        let r: f32x2 = transmute(vabd_f32(transmute(a), transmute(b)));
+    unsafe fn test_vclt_u8() {
+        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vclt_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 5.0, -4.0);
-        let b: f32x4 = f32x4::new(9.0, 3.0, 2.0, 8.0);
-        let e: f32x4 = f32x4::new(8.0, 1.0, 3.0, 12.0);
-        let r: f32x4 = transmute(vabdq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcltq_u8() {
+        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcltq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 4, 3, 2, 1);
-        let b: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: u16x8 = u16x8::new(9, 8, 7, 6, 6, 7, 8, 9);
-        let r: u16x8 = transmute(vabdl_u8(transmute(a), transmute(b)));
+    unsafe fn test_vclt_u16() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(1, 2, 3, 4);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vclt_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(10, 10, 10, 10);
-        let e: u32x4 = u32x4::new(9, 8, 7, 6);
-        let r: u32x4 = transmute(vabdl_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcltq_u16() {
+        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcltq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(10, 10);
-        let e: u64x2 = u64x2::new(9, 8);
-        let r: u64x2 = transmute(vabdl_u32(transmute(a), transmute(b)));
+    unsafe fn test_vclt_u32() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u32x2 = u32x2::new(1, 2);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vclt_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 4, 3, 2, 1);
-        let b: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: i16x8 = i16x8::new(9, 8, 7, 6, 6, 7, 8, 9);
-        let r: i16x8 = transmute(vabdl_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcltq_u32() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcltq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 11, 12);
-        let b: i16x4 = i16x4::new(10, 10, 10, 10);
-        let e: i32x4 = i32x4::new(9, 8, 1, 2);
-        let r: i32x4 = transmute(vabdl_s16(transmute(a), transmute(b)));
+    unsafe fn test_vclt_f32() {
+        let a: f32x2 = f32x2::new(0.1, 1.2);
+        let b: f32x2 = f32x2::new(1.2, 2.3);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vclt_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_s32() {
-        let a: i32x2 = i32x2::new(1, 11);
-        let b: i32x2 = i32x2::new(10, 10);
-        let e: i64x2 = i64x2::new(9, 1);
-        let r: i64x2 = transmute(vabdl_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcltq_f32() {
+        let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
+        let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcltq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u8() {
-        let a: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+    unsafe fn test_vcle_s8() {
+        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u8x8 = u8x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b)));
+        let r: u8x8 = transmute(vcle_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u8() {
-        let a: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF);
-        let b: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF);
+    unsafe fn test_vcleq_s8() {
+        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u8x16 = u8x16::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0xFF);
-        let b: u8x16 = u8x16::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b)));
+        let r: u8x16 = transmute(vcleq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u16() {
-        let a: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03);
+    unsafe fn test_vcle_s16() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(1, 2, 3, 4);
         let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u16x4 = u16x4::new(0, 0, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0x02, 0x04);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b)));
+        let r: u16x4 = transmute(vcle_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u16() {
-        let a: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+    unsafe fn test_vcleq_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u16x8 = u16x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b)));
+        let r: u16x8 = transmute(vcleq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u32() {
-        let a: u32x2 = u32x2::new(0, 0x01);
-        let b: u32x2 = u32x2::new(0, 0x01);
+    unsafe fn test_vcle_s32() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i32x2 = i32x2::new(1, 2);
         let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u32x2 = u32x2::new(0, 0);
-        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b)));
+        let r: u32x2 = transmute(vcle_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u32() {
-        let a: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03);
+    unsafe fn test_vcleq_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
         let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u32x4 = u32x4::new(0, 0, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x02, 0x04);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b)));
+        let r: u32x4 = transmute(vcleq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s8() {
-        let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+    unsafe fn test_vcle_u8() {
+        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b)));
+        let r: u8x8 = transmute(vcle_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
+    unsafe fn test_vcleq_u8() {
+        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b)));
+        let r: u8x16 = transmute(vcleq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03);
+    unsafe fn test_vcle_u16() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(1, 2, 3, 4);
         let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i16x4 = i16x4::new(-32768, -32768, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(-32768, 0x7F_FF, 0x02, 0x04);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b)));
+        let r: u16x4 = transmute(vcle_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+    unsafe fn test_vcleq_u16() {
+        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i16x8 = i16x8::new(-32768, -32768, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(-32768, 0x7F_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b)));
+        let r: u16x8 = transmute(vcleq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0x01);
-        let b: i32x2 = i32x2::new(-2147483648, 0x01);
+    unsafe fn test_vcle_u32() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u32x2 = u32x2::new(1, 2);
         let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i32x2 = i32x2::new(-2147483648, -2147483648);
-        let b: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b)));
+        let r: u32x2 = transmute(vcle_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03);
+    unsafe fn test_vcleq_u32() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
         let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b)));
+        let r: u32x4 = transmute(vcleq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
+    }
 
-        let a: i32x4 = i32x4::new(-2147483648, -2147483648, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 0x02, 0x04);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_f32() {
+        let a: f32x2 = f32x2::new(0.1, 1.2);
+        let b: f32x2 = f32x2::new(1.2, 2.3);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcle_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_p8() {
-        let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b)));
+    unsafe fn test_vcleq_f32() {
+        let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
+        let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcleq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
+    }
 
-        let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcge_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vcge_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_p8() {
-        let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
+    unsafe fn test_vcgeq_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b)));
+        let r: u8x16 = transmute(vcgeq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_f32() {
-        let a: f32x2 = f32x2::new(1.2, 3.4);
-        let b: f32x2 = f32x2::new(1.2, 3.4);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcge_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(0, 1, 2, 3);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vcge_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_f32() {
-        let a: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8);
-        let b: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vceqq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcgeq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcgeq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_s8() {
-        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vtst_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcge_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x2 = i32x2::new(0, 1);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcge_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vtstq_s8(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgeq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x4 = i32x4::new(0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgeq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
-        let b: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vtst_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcge_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vcge_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vtstq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcgeq_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcgeq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0x00);
-        let b: i32x2 = i32x2::new(-2147483648, 0x00);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vtst_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcge_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let b: u16x4 = u16x4::new(0, 1, 2, 3);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vcge_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
-        let b: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vtstq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcgeq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcgeq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_p8() {
-        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vtst_p8(transmute(a), transmute(b)));
+    unsafe fn test_vcge_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x2 = u32x2::new(0, 1);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcge_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_p8() {
-        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vtstq_p8(transmute(a), transmute(b)));
+    unsafe fn test_vcgeq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgeq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_u8() {
-        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vtst_u8(transmute(a), transmute(b)));
+    unsafe fn test_vcge_f32() {
+        let a: f32x2 = f32x2::new(1.2, 2.3);
+        let b: f32x2 = f32x2::new(0.1, 1.2);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcge_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_u8() {
-        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
-        let b: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
-        let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vtstq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vcgeq_f32() {
+        let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
+        let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgeq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_u16() {
-        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
-        let b: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vtst_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcls_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x8 = i8x8::new(0, 7, 7, 7, 7, 7, 7, 7);
+        let r: i8x8 = transmute(vcls_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_u16() {
-        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vtstq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vclsq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F);
+        let e: i8x16 = i8x16::new(0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0);
+        let r: i8x16 = transmute(vclsq_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_u32() {
-        let a: u32x2 = u32x2::new(0, 0x00);
-        let b: u32x2 = u32x2::new(0, 0x00);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vtst_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcls_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x00);
+        let e: i16x4 = i16x4::new(0, 15, 15, 15);
+        let r: i16x4 = transmute(vcls_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_u32() {
-        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
-        let b: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
-        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vtstq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vclsq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i16x8 = i16x8::new(0, 15, 15, 15, 15, 15, 15, 15);
+        let r: i16x8 = transmute(vclsq_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabs_f32() {
-        let a: f32x2 = f32x2::new(-0.1, -2.2);
-        let e: f32x2 = f32x2::new(0.1, 2.2);
-        let r: f32x2 = transmute(vabs_f32(transmute(a)));
+    unsafe fn test_vcls_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: i32x2 = i32x2::new(0, 31);
+        let r: i32x2 = transmute(vcls_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabsq_f32() {
-        let a: f32x4 = f32x4::new(-0.1, -2.2, -3.3, -6.6);
-        let e: f32x4 = f32x4::new(0.1, 2.2, 3.3, 6.6);
-        let r: f32x4 = transmute(vabsq_f32(transmute(a)));
+    unsafe fn test_vclsq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x00);
+        let e: i32x4 = i32x4::new(0, 31, 31, 31);
+        let r: i32x4 = transmute(vclsq_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcgt_s8(transmute(a), transmute(b)));
+    unsafe fn test_vclz_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
+        let e: i8x8 = i8x8::new(0, 0, 8, 7, 7, 7, 7, 7);
+        let r: i8x8 = transmute(vclz_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgtq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vclzq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7F);
+        let e: i8x16 = i8x16::new(0, 0, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1);
+        let r: i8x16 = transmute(vclzq_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcgt_s16(transmute(a), transmute(b)));
+    unsafe fn test_vclz_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: i16x4 = i16x4::new(0, 0, 16, 15);
+        let r: i16x4 = transmute(vclz_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgtq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vclzq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
+        let e: i16x8 = i16x8::new(0, 0, 16, 15, 15, 15, 15, 15);
+        let r: i16x8 = transmute(vclzq_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcgt_s32(transmute(a), transmute(b)));
+    unsafe fn test_vclz_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: i32x2 = i32x2::new(0, 0);
+        let r: i32x2 = transmute(vclz_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vclzq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: i32x4 = i32x4::new(0, 0, 32, 31);
+        let r: i32x4 = transmute(vclzq_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcgt_u8(transmute(a), transmute(b)));
+    unsafe fn test_vclz_u8() {
+        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
+        let e: u8x8 = u8x8::new(8, 8, 7, 7, 7, 7, 7, 7);
+        let r: u8x8 = transmute(vclz_u8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgtq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vclzq_u8() {
+        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF);
+        let e: u8x16 = u8x16::new(8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0);
+        let r: u8x16 = transmute(vclzq_u8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcgt_u16(transmute(a), transmute(b)));
+    unsafe fn test_vclz_u16() {
+        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x01);
+        let e: u16x4 = u16x4::new(16, 16, 15, 15);
+        let r: u16x4 = transmute(vclz_u16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgtq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vclzq_u16() {
+        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
+        let e: u16x8 = u16x8::new(16, 16, 15, 15, 15, 15, 15, 15);
+        let r: u16x8 = transmute(vclzq_u16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcgt_u32(transmute(a), transmute(b)));
+    unsafe fn test_vclz_u32() {
+        let a: u32x2 = u32x2::new(0, 0x00);
+        let e: u32x2 = u32x2::new(32, 32);
+        let r: u32x2 = transmute(vclz_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vclzq_u32() {
+        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x01);
+        let e: u32x4 = u32x4::new(32, 32, 31, 31);
+        let r: u32x4 = transmute(vclzq_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_f32() {
-        let a: f32x2 = f32x2::new(1.2, 2.3);
-        let b: f32x2 = f32x2::new(0.1, 1.2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcgt_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcagt_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let b: f32x2 = f32x2::new(-1.1, 0.0);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_f32() {
-        let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
-        let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcagtq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
+        let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vclt_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcage_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let b: f32x2 = f32x2::new(-1.1, 0.0);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcltq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcageq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
+        let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vclt_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcalt_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let b: f32x2 = f32x2::new(-1.1, 0.0);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vcalt_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcltq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcaltq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
+        let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcaltq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclt_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcale_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let b: f32x2 = f32x2::new(-1.1, 0.0);
+        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcale_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcltq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcaleq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
+        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcaleq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vclt_u8(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_s8() {
+        let a: u64 = 1;
+        let e: i8x8 = i8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x8 = transmute(vcreate_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcltq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_s32() {
+        let a: u64 = 1;
+        let e: i32x2 = i32x2::new(1, 0);
+        let r: i32x2 = transmute(vcreate_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vclt_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_s64() {
+        let a: u64 = 1;
+        let e: i64x1 = i64x1::new(1);
+        let r: i64x1 = transmute(vcreate_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcltq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_u8() {
+        let a: u64 = 1;
+        let e: u8x8 = u8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vcreate_u8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclt_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_u32() {
+        let a: u64 = 1;
+        let e: u32x2 = u32x2::new(1, 0);
+        let r: u32x2 = transmute(vcreate_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcltq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_u64() {
+        let a: u64 = 1;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcreate_u64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_f32() {
-        let a: f32x2 = f32x2::new(0.1, 1.2);
-        let b: f32x2 = f32x2::new(1.2, 2.3);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclt_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_p8() {
+        let a: u64 = 1;
+        let e: i8x8 = i8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x8 = transmute(vcreate_p8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_f32() {
-        let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
-        let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcltq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_p16() {
+        let a: u64 = 1;
+        let e: i16x4 = i16x4::new(1, 0, 0, 0);
+        let r: i16x4 = transmute(vcreate_p16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcle_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_p64() {
+        let a: u64 = 1;
+        let e: i64x1 = i64x1::new(1);
+        let r: i64x1 = transmute(vcreate_p64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcleq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcreate_f32() {
+        let a: u64 = 0;
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vcreate_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcle_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_f32_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let e: f32x2 = f32x2::new(1., 2.);
+        let r: f32x2 = transmute(vcvt_f32_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcleq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_f32_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let r: f32x4 = transmute(vcvtq_f32_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcle_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_f32_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let e: f32x2 = f32x2::new(1., 2.);
+        let r: f32x2 = transmute(vcvt_f32_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcleq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_f32_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let r: f32x4 = transmute(vcvtq_f32_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcle_u8(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_n_f32_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let e: f32x2 = f32x2::new(0.25, 0.5);
+        let r: f32x2 = transmute(vcvt_n_f32_s32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcleq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_n_f32_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
+        let r: f32x4 = transmute(vcvtq_n_f32_s32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcle_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_n_f32_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let e: f32x2 = f32x2::new(0.25, 0.5);
+        let r: f32x2 = transmute(vcvt_n_f32_u32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcleq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_n_f32_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
+        let r: f32x4 = transmute(vcvtq_n_f32_u32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcle_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_n_s32_f32() {
+        let a: f32x2 = f32x2::new(0.25, 0.5);
+        let e: i32x2 = i32x2::new(1, 2);
+        let r: i32x2 = transmute(vcvt_n_s32_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcleq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_n_s32_f32() {
+        let a: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
+        let e: i32x4 = i32x4::new(1, 2, 3, 4);
+        let r: i32x4 = transmute(vcvtq_n_s32_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_f32() {
-        let a: f32x2 = f32x2::new(0.1, 1.2);
-        let b: f32x2 = f32x2::new(1.2, 2.3);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcle_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_n_u32_f32() {
+        let a: f32x2 = f32x2::new(0.25, 0.5);
+        let e: u32x2 = u32x2::new(1, 2);
+        let r: u32x2 = transmute(vcvt_n_u32_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_f32() {
-        let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
-        let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcleq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_n_u32_f32() {
+        let a: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
+        let e: u32x4 = u32x4::new(1, 2, 3, 4);
+        let r: u32x4 = transmute(vcvtq_n_u32_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcge_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.1, 2.1);
+        let e: i32x2 = i32x2::new(-1, 2);
+        let r: i32x2 = transmute(vcvt_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgeq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-1, 2, -2, 3);
+        let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcge_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_u32_f32() {
+        let a: f32x2 = f32x2::new(1.1, 2.1);
+        let e: u32x2 = u32x2::new(1, 2);
+        let r: u32x2 = transmute(vcvt_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgeq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(1, 2, 2, 3);
+        let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcge_s32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_lane_s8() {
+        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i8x8 = transmute(vdup_lane_s8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgeq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_laneq_s8() {
+        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i8x16 = transmute(vdupq_laneq_s8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcge_u8(transmute(a), transmute(b)));
+    unsafe fn test_vdup_lane_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 4);
+        let e: i16x4 = i16x4::new(1, 1, 1, 1);
+        let r: i16x4 = transmute(vdup_lane_s16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgeq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i16x8 = transmute(vdupq_laneq_s16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcge_u16(transmute(a), transmute(b)));
+    unsafe fn test_vdup_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let e: i32x2 = i32x2::new(1, 1);
+        let r: i32x2 = transmute(vdup_lane_s32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgeq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 4);
+        let e: i32x4 = i32x4::new(1, 1, 1, 1);
+        let r: i32x4 = transmute(vdupq_laneq_s32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcge_u32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_laneq_s8() {
+        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i8x8 = transmute(vdup_laneq_s8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgeq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i16x4 = i16x4::new(1, 1, 1, 1);
+        let r: i16x4 = transmute(vdup_laneq_s16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_f32() {
-        let a: f32x2 = f32x2::new(1.2, 2.3);
-        let b: f32x2 = f32x2::new(0.1, 1.2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcge_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 4);
+        let e: i32x2 = i32x2::new(1, 1);
+        let r: i32x2 = transmute(vdup_laneq_s32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_f32() {
-        let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
-        let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgeq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_lane_s8() {
+        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i8x16 = transmute(vdupq_lane_s8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(0, 7, 7, 7, 7, 7, 7, 7);
-        let r: i8x8 = transmute(vcls_s8(transmute(a)));
+    unsafe fn test_vdupq_lane_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 4);
+        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i16x8 = transmute(vdupq_lane_s16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F);
-        let e: i8x16 = i8x16::new(0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0);
-        let r: i8x16 = transmute(vclsq_s8(transmute(a)));
+    unsafe fn test_vdupq_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let e: i32x4 = i32x4::new(1, 1, 1, 1);
+        let r: i32x4 = transmute(vdupq_lane_s32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(0, 15, 15, 15);
-        let r: i16x4 = transmute(vcls_s16(transmute(a)));
+    unsafe fn test_vdup_lane_u8() {
+        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: u8x8 = transmute(vdup_lane_u8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0, 15, 15, 15, 15, 15, 15, 15);
-        let r: i16x8 = transmute(vclsq_s16(transmute(a)));
+    unsafe fn test_vdupq_laneq_u8() {
+        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r: u8x16 = transmute(vdupq_laneq_u8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: i32x2 = i32x2::new(0, 31);
-        let r: i32x2 = transmute(vcls_s32(transmute(a)));
+    unsafe fn test_vdup_lane_u16() {
+        let a: u16x4 = u16x4::new(1, 1, 1, 4);
+        let e: u16x4 = u16x4::new(1, 1, 1, 1);
+        let r: u16x4 = transmute(vdup_lane_u16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0, 31, 31, 31);
-        let r: i32x4 = transmute(vclsq_s32(transmute(a)));
+    unsafe fn test_vdupq_laneq_u16() {
+        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: u16x8 = transmute(vdupq_laneq_u16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
-        let e: i8x8 = i8x8::new(0, 0, 8, 7, 7, 7, 7, 7);
-        let r: i8x8 = transmute(vclz_s8(transmute(a)));
+    unsafe fn test_vdup_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 1);
+        let e: u32x2 = u32x2::new(1, 1);
+        let r: u32x2 = transmute(vdup_lane_u32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7F);
-        let e: i8x16 = i8x16::new(0, 0, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1);
-        let r: i8x16 = transmute(vclzq_s8(transmute(a)));
+    unsafe fn test_vdupq_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 1, 1, 4);
+        let e: u32x4 = u32x4::new(1, 1, 1, 1);
+        let r: u32x4 = transmute(vdupq_laneq_u32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: i16x4 = i16x4::new(0, 0, 16, 15);
-        let r: i16x4 = transmute(vclz_s16(transmute(a)));
+    unsafe fn test_vdup_laneq_u8() {
+        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: u8x8 = transmute(vdup_laneq_u8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
-        let e: i16x8 = i16x8::new(0, 0, 16, 15, 15, 15, 15, 15);
-        let r: i16x8 = transmute(vclzq_s16(transmute(a)));
+    unsafe fn test_vdup_laneq_u16() {
+        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: u16x4 = u16x4::new(1, 1, 1, 1);
+        let r: u16x4 = transmute(vdup_laneq_u16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vclz_s32(transmute(a)));
+    unsafe fn test_vdup_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 1, 1, 4);
+        let e: u32x2 = u32x2::new(1, 1);
+        let r: u32x2 = transmute(vdup_laneq_u32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: i32x4 = i32x4::new(0, 0, 32, 31);
-        let r: i32x4 = transmute(vclzq_s32(transmute(a)));
+    unsafe fn test_vdupq_lane_u8() {
+        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r: u8x16 = transmute(vdupq_lane_u8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_u8() {
-        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
-        let e: u8x8 = u8x8::new(8, 8, 7, 7, 7, 7, 7, 7);
-        let r: u8x8 = transmute(vclz_u8(transmute(a)));
+    unsafe fn test_vdupq_lane_u16() {
+        let a: u16x4 = u16x4::new(1, 1, 1, 4);
+        let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: u16x8 = transmute(vdupq_lane_u16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_u8() {
-        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF);
-        let e: u8x16 = u8x16::new(8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0);
-        let r: u8x16 = transmute(vclzq_u8(transmute(a)));
+    unsafe fn test_vdupq_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 1);
+        let e: u32x4 = u32x4::new(1, 1, 1, 1);
+        let r: u32x4 = transmute(vdupq_lane_u32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_u16() {
-        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x01);
-        let e: u16x4 = u16x4::new(16, 16, 15, 15);
-        let r: u16x4 = transmute(vclz_u16(transmute(a)));
+    unsafe fn test_vdup_lane_p8() {
+        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i8x8 = transmute(vdup_lane_p8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_u16() {
-        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
-        let e: u16x8 = u16x8::new(16, 16, 15, 15, 15, 15, 15, 15);
-        let r: u16x8 = transmute(vclzq_u16(transmute(a)));
+    unsafe fn test_vdupq_laneq_p8() {
+        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i8x16 = transmute(vdupq_laneq_p8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_u32() {
-        let a: u32x2 = u32x2::new(0, 0x00);
-        let e: u32x2 = u32x2::new(32, 32);
-        let r: u32x2 = transmute(vclz_u32(transmute(a)));
+    unsafe fn test_vdup_lane_p16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 4);
+        let e: i16x4 = i16x4::new(1, 1, 1, 1);
+        let r: i16x4 = transmute(vdup_lane_p16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_u32() {
-        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x01);
-        let e: u32x4 = u32x4::new(32, 32, 31, 31);
-        let r: u32x4 = transmute(vclzq_u32(transmute(a)));
+    unsafe fn test_vdupq_laneq_p16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i16x8 = transmute(vdupq_laneq_p16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcagt_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_laneq_p8() {
+        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i8x8 = transmute(vdup_laneq_p8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcagtq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_laneq_p16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i16x4 = i16x4::new(1, 1, 1, 1);
+        let r: i16x4 = transmute(vdup_laneq_p16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcage_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_lane_p8() {
+        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i8x16 = transmute(vdupq_lane_p8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcageq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_lane_p16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 4);
+        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i16x8 = transmute(vdupq_lane_p16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcalt_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vcalt_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_laneq_s64() {
+        let a: i64x2 = i64x2::new(1, 1);
+        let e: i64x2 = i64x2::new(1, 1);
+        let r: i64x2 = transmute(vdupq_laneq_s64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcaltq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcaltq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_lane_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: i64x2 = i64x2::new(1, 1);
+        let r: i64x2 = transmute(vdupq_lane_s64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcale_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcale_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_laneq_u64() {
+        let a: u64x2 = u64x2::new(1, 1);
+        let e: u64x2 = u64x2::new(1, 1);
+        let r: u64x2 = transmute(vdupq_laneq_u64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcaleq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcaleq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vdupq_lane_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let e: u64x2 = u64x2::new(1, 1);
+        let r: u64x2 = transmute(vdupq_lane_u64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_s8() {
-        let a: u64 = 1;
-        let e: i8x8 = i8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vcreate_s8(transmute(a)));
+    unsafe fn test_vdup_lane_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(1., 1.);
+        let r: f32x2 = transmute(vdup_lane_f32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_s32() {
-        let a: u64 = 1;
-        let e: i32x2 = i32x2::new(1, 0);
-        let r: i32x2 = transmute(vcreate_s32(transmute(a)));
+    unsafe fn test_vdupq_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
+        let e: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let r: f32x4 = transmute(vdupq_laneq_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_s64() {
-        let a: u64 = 1;
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vcreate_s64(transmute(a)));
+    unsafe fn test_vdup_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
+        let e: f32x2 = f32x2::new(1., 1.);
+        let r: f32x2 = transmute(vdup_laneq_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_u8() {
-        let a: u64 = 1;
-        let e: u8x8 = u8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vcreate_u8(transmute(a)));
+    unsafe fn test_vdupq_lane_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let r: f32x4 = transmute(vdupq_lane_f32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_u32() {
-        let a: u64 = 1;
-        let e: u32x2 = u32x2::new(1, 0);
-        let r: u32x2 = transmute(vcreate_u32(transmute(a)));
+    unsafe fn test_vdup_lane_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vdup_lane_s64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_u64() {
-        let a: u64 = 1;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcreate_u64(transmute(a)));
+    unsafe fn test_vdup_lane_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vdup_lane_u64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_p8() {
-        let a: u64 = 1;
-        let e: i8x8 = i8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vcreate_p8(transmute(a)));
+    unsafe fn test_vdup_laneq_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i64x1 = i64x1::new(1);
+        let r: i64x1 = transmute(vdup_laneq_s64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_p16() {
-        let a: u64 = 1;
-        let e: i16x4 = i16x4::new(1, 0, 0, 0);
-        let r: i16x4 = transmute(vcreate_p16(transmute(a)));
+    unsafe fn test_vdup_laneq_u64() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vdup_laneq_u64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_p64() {
-        let a: u64 = 1;
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vcreate_p64(transmute(a)));
+    unsafe fn test_vext_s8() {
+        let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11);
+        let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19);
+        let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15);
+        let r: i8x8 = transmute(vext_s8::<4>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_f32() {
-        let a: u64 = 0;
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vcreate_f32(transmute(a)));
+    unsafe fn test_vextq_s8() {
+        let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15);
+        let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11);
+        let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19);
+        let r: i8x16 = transmute(vextq_s8::<8>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f32_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let e: f32x2 = f32x2::new(1., 2.);
-        let r: f32x2 = transmute(vcvt_f32_s32(transmute(a)));
+    unsafe fn test_vext_s16() {
+        let a: i16x4 = i16x4::new(0, 8, 8, 9);
+        let b: i16x4 = i16x4::new(9, 11, 14, 15);
+        let e: i16x4 = i16x4::new(8, 9, 9, 11);
+        let r: i16x4 = transmute(vext_s16::<2>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_f32_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let r: f32x4 = transmute(vcvtq_f32_s32(transmute(a)));
+    unsafe fn test_vextq_s16() {
+        let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11);
+        let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19);
+        let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15);
+        let r: i16x8 = transmute(vextq_s16::<4>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f32_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: f32x2 = f32x2::new(1., 2.);
-        let r: f32x2 = transmute(vcvt_f32_u32(transmute(a)));
+    unsafe fn test_vext_s32() {
+        let a: i32x2 = i32x2::new(0, 8);
+        let b: i32x2 = i32x2::new(9, 11);
+        let e: i32x2 = i32x2::new(8, 9);
+        let r: i32x2 = transmute(vext_s32::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_f32_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let r: f32x4 = transmute(vcvtq_f32_u32(transmute(a)));
+    unsafe fn test_vextq_s32() {
+        let a: i32x4 = i32x4::new(0, 8, 8, 9);
+        let b: i32x4 = i32x4::new(9, 11, 14, 15);
+        let e: i32x4 = i32x4::new(8, 9, 9, 11);
+        let r: i32x4 = transmute(vextq_s32::<2>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_f32_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let e: f32x2 = f32x2::new(0.25, 0.5);
-        let r: f32x2 = transmute(vcvt_n_f32_s32::<2>(transmute(a)));
+    unsafe fn test_vext_u8() {
+        let a: u8x8 = u8x8::new(0, 8, 8, 9, 8, 9, 9, 11);
+        let b: u8x8 = u8x8::new(9, 11, 14, 15, 16, 17, 18, 19);
+        let e: u8x8 = u8x8::new(8, 9, 9, 11, 9, 11, 14, 15);
+        let r: u8x8 = transmute(vext_u8::<4>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_f32_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
-        let r: f32x4 = transmute(vcvtq_n_f32_s32::<2>(transmute(a)));
+    unsafe fn test_vextq_u8() {
+        let a: u8x16 = u8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15);
+        let b: u8x16 = u8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11);
+        let e: u8x16 = u8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19);
+        let r: u8x16 = transmute(vextq_u8::<8>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_f32_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: f32x2 = f32x2::new(0.25, 0.5);
-        let r: f32x2 = transmute(vcvt_n_f32_u32::<2>(transmute(a)));
+    unsafe fn test_vext_u16() {
+        let a: u16x4 = u16x4::new(0, 8, 8, 9);
+        let b: u16x4 = u16x4::new(9, 11, 14, 15);
+        let e: u16x4 = u16x4::new(8, 9, 9, 11);
+        let r: u16x4 = transmute(vext_u16::<2>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_f32_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
-        let r: f32x4 = transmute(vcvtq_n_f32_u32::<2>(transmute(a)));
+    unsafe fn test_vextq_u16() {
+        let a: u16x8 = u16x8::new(0, 8, 8, 9, 8, 9, 9, 11);
+        let b: u16x8 = u16x8::new(9, 11, 14, 15, 16, 17, 18, 19);
+        let e: u16x8 = u16x8::new(8, 9, 9, 11, 9, 11, 14, 15);
+        let r: u16x8 = transmute(vextq_u16::<4>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_s32_f32() {
-        let a: f32x2 = f32x2::new(0.25, 0.5);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vcvt_n_s32_f32::<2>(transmute(a)));
+    unsafe fn test_vext_u32() {
+        let a: u32x2 = u32x2::new(0, 8);
+        let b: u32x2 = u32x2::new(9, 11);
+        let e: u32x2 = u32x2::new(8, 9);
+        let r: u32x2 = transmute(vext_u32::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_s32_f32() {
-        let a: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
-        let e: i32x4 = i32x4::new(1, 2, 3, 4);
-        let r: i32x4 = transmute(vcvtq_n_s32_f32::<2>(transmute(a)));
+    unsafe fn test_vextq_u32() {
+        let a: u32x4 = u32x4::new(0, 8, 8, 9);
+        let b: u32x4 = u32x4::new(9, 11, 14, 15);
+        let e: u32x4 = u32x4::new(8, 9, 9, 11);
+        let r: u32x4 = transmute(vextq_u32::<2>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_u32_f32() {
-        let a: f32x2 = f32x2::new(0.25, 0.5);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvt_n_u32_f32::<2>(transmute(a)));
+    unsafe fn test_vext_p8() {
+        let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11);
+        let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19);
+        let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15);
+        let r: i8x8 = transmute(vext_p8::<4>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_u32_f32() {
-        let a: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vcvtq_n_u32_f32::<2>(transmute(a)));
+    unsafe fn test_vextq_p8() {
+        let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15);
+        let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11);
+        let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19);
+        let r: i8x16 = transmute(vextq_p8::<8>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.1, 2.1);
-        let e: i32x2 = i32x2::new(-1, 2);
-        let r: i32x2 = transmute(vcvt_s32_f32(transmute(a)));
+    unsafe fn test_vext_p16() {
+        let a: i16x4 = i16x4::new(0, 8, 8, 9);
+        let b: i16x4 = i16x4::new(9, 11, 14, 15);
+        let e: i16x4 = i16x4::new(8, 9, 9, 11);
+        let r: i16x4 = transmute(vext_p16::<2>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-1, 2, -2, 3);
-        let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a)));
+    unsafe fn test_vextq_p16() {
+        let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11);
+        let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19);
+        let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15);
+        let r: i16x8 = transmute(vextq_p16::<4>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_u32_f32() {
-        let a: f32x2 = f32x2::new(1.1, 2.1);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvt_u32_f32(transmute(a)));
+    unsafe fn test_vextq_s64() {
+        let a: i64x2 = i64x2::new(0, 8);
+        let b: i64x2 = i64x2::new(9, 11);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vextq_s64::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(1, 2, 2, 3);
-        let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a)));
+    unsafe fn test_vextq_u64() {
+        let a: u64x2 = u64x2::new(0, 8);
+        let b: u64x2 = u64x2::new(9, 11);
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vextq_u64::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x8 = transmute(vdup_lane_s8::<4>(transmute(a)));
+    unsafe fn test_vext_f32() {
+        let a: f32x2 = f32x2::new(0., 2.);
+        let b: f32x2 = f32x2::new(3., 4.);
+        let e: f32x2 = f32x2::new(2., 3.);
+        let r: f32x2 = transmute(vext_f32::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x16 = transmute(vdupq_laneq_s8::<8>(transmute(a)));
+    unsafe fn test_vextq_f32() {
+        let a: f32x4 = f32x4::new(0., 2., 2., 3.);
+        let b: f32x4 = f32x4::new(3., 4., 5., 6.);
+        let e: f32x4 = f32x4::new(2., 3., 3., 4.);
+        let r: f32x4 = transmute(vextq_f32::<2>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vdup_lane_s16::<2>(transmute(a)));
+    unsafe fn test_vmla_s8() {
+        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let e: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: i8x8 = transmute(vmla_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vdupq_laneq_s16::<4>(transmute(a)));
+    unsafe fn test_vmlaq_s8() {
+        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
+        let e: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
+        let r: i8x16 = transmute(vmlaq_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let e: i32x2 = i32x2::new(1, 1);
-        let r: i32x2 = transmute(vdup_lane_s32::<1>(transmute(a)));
+    unsafe fn test_vmla_s16() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(2, 2, 2, 2);
+        let c: i16x4 = i16x4::new(3, 3, 3, 3);
+        let e: i16x4 = i16x4::new(6, 7, 8, 9);
+        let r: i16x4 = transmute(vmla_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 4);
-        let e: i32x4 = i32x4::new(1, 1, 1, 1);
-        let r: i32x4 = transmute(vdupq_laneq_s32::<2>(transmute(a)));
+    unsafe fn test_vmlaq_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: i16x8 = transmute(vmlaq_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x8 = transmute(vdup_laneq_s8::<8>(transmute(a)));
+    unsafe fn test_vmla_s32() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i32x2 = i32x2::new(2, 2);
+        let c: i32x2 = i32x2::new(3, 3);
+        let e: i32x2 = i32x2::new(6, 7);
+        let r: i32x2 = transmute(vmla_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vdup_laneq_s16::<4>(transmute(a)));
+    unsafe fn test_vmlaq_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(2, 2, 2, 2);
+        let c: i32x4 = i32x4::new(3, 3, 3, 3);
+        let e: i32x4 = i32x4::new(6, 7, 8, 9);
+        let r: i32x4 = transmute(vmlaq_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 4);
-        let e: i32x2 = i32x2::new(1, 1);
-        let r: i32x2 = transmute(vdup_laneq_s32::<2>(transmute(a)));
+    unsafe fn test_vmla_u8() {
+        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let e: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: u8x8 = transmute(vmla_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x16 = transmute(vdupq_lane_s8::<4>(transmute(a)));
+    unsafe fn test_vmlaq_u8() {
+        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
+        let e: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
+        let r: u8x16 = transmute(vmlaq_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vdupq_lane_s16::<2>(transmute(a)));
+    unsafe fn test_vmla_u16() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(2, 2, 2, 2);
+        let c: u16x4 = u16x4::new(3, 3, 3, 3);
+        let e: u16x4 = u16x4::new(6, 7, 8, 9);
+        let r: u16x4 = transmute(vmla_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let e: i32x4 = i32x4::new(1, 1, 1, 1);
-        let r: i32x4 = transmute(vdupq_lane_s32::<1>(transmute(a)));
+    unsafe fn test_vmlaq_u16() {
+        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: u16x8 = transmute(vmlaq_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u8x8 = transmute(vdup_lane_u8::<4>(transmute(a)));
+    unsafe fn test_vmla_u32() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u32x2 = u32x2::new(2, 2);
+        let c: u32x2 = u32x2::new(3, 3);
+        let e: u32x2 = u32x2::new(6, 7);
+        let r: u32x2 = transmute(vmla_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u8x16 = transmute(vdupq_laneq_u8::<8>(transmute(a)));
+    unsafe fn test_vmlaq_u32() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(2, 2, 2, 2);
+        let c: u32x4 = u32x4::new(3, 3, 3, 3);
+        let e: u32x4 = u32x4::new(6, 7, 8, 9);
+        let r: u32x4 = transmute(vmlaq_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 4);
-        let e: u16x4 = u16x4::new(1, 1, 1, 1);
-        let r: u16x4 = transmute(vdup_lane_u16::<2>(transmute(a)));
+    unsafe fn test_vmla_f32() {
+        let a: f32x2 = f32x2::new(0., 1.);
+        let b: f32x2 = f32x2::new(2., 2.);
+        let c: f32x2 = f32x2::new(3., 3.);
+        let e: f32x2 = f32x2::new(6., 7.);
+        let r: f32x2 = transmute(vmla_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u16x8 = transmute(vdupq_laneq_u16::<4>(transmute(a)));
+    unsafe fn test_vmlaq_f32() {
+        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
+        let c: f32x4 = f32x4::new(3., 3., 3., 3.);
+        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
+        let r: f32x4 = transmute(vmlaq_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let e: u32x2 = u32x2::new(1, 1);
-        let r: u32x2 = transmute(vdup_lane_u32::<1>(transmute(a)));
+    unsafe fn test_vmla_n_s16() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(2, 2, 2, 2);
+        let c: i16 = 3;
+        let e: i16x4 = i16x4::new(6, 7, 8, 9);
+        let r: i16x4 = transmute(vmla_n_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 4);
-        let e: u32x4 = u32x4::new(1, 1, 1, 1);
-        let r: u32x4 = transmute(vdupq_laneq_u32::<2>(transmute(a)));
+    unsafe fn test_vmlaq_n_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i16 = 3;
+        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: i16x8 = transmute(vmlaq_n_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u8x8 = transmute(vdup_laneq_u8::<8>(transmute(a)));
+    unsafe fn test_vmla_n_s32() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i32x2 = i32x2::new(2, 2);
+        let c: i32 = 3;
+        let e: i32x2 = i32x2::new(6, 7);
+        let r: i32x2 = transmute(vmla_n_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u16x4 = u16x4::new(1, 1, 1, 1);
-        let r: u16x4 = transmute(vdup_laneq_u16::<4>(transmute(a)));
+    unsafe fn test_vmlaq_n_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(2, 2, 2, 2);
+        let c: i32 = 3;
+        let e: i32x4 = i32x4::new(6, 7, 8, 9);
+        let r: i32x4 = transmute(vmlaq_n_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 4);
-        let e: u32x2 = u32x2::new(1, 1);
-        let r: u32x2 = transmute(vdup_laneq_u32::<2>(transmute(a)));
+    unsafe fn test_vmla_n_u16() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(2, 2, 2, 2);
+        let c: u16 = 3;
+        let e: u16x4 = u16x4::new(6, 7, 8, 9);
+        let r: u16x4 = transmute(vmla_n_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u8x16 = transmute(vdupq_lane_u8::<4>(transmute(a)));
+    unsafe fn test_vmlaq_n_u16() {
+        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u16 = 3;
+        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: u16x8 = transmute(vmlaq_n_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 4);
-        let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u16x8 = transmute(vdupq_lane_u16::<2>(transmute(a)));
+    unsafe fn test_vmla_n_u32() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u32x2 = u32x2::new(2, 2);
+        let c: u32 = 3;
+        let e: u32x2 = u32x2::new(6, 7);
+        let r: u32x2 = transmute(vmla_n_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let e: u32x4 = u32x4::new(1, 1, 1, 1);
-        let r: u32x4 = transmute(vdupq_lane_u32::<1>(transmute(a)));
+    unsafe fn test_vmlaq_n_u32() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(2, 2, 2, 2);
+        let c: u32 = 3;
+        let e: u32x4 = u32x4::new(6, 7, 8, 9);
+        let r: u32x4 = transmute(vmlaq_n_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_p8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x8 = transmute(vdup_lane_p8::<4>(transmute(a)));
+    unsafe fn test_vmla_n_f32() {
+        let a: f32x2 = f32x2::new(0., 1.);
+        let b: f32x2 = f32x2::new(2., 2.);
+        let c: f32 = 3.;
+        let e: f32x2 = f32x2::new(6., 7.);
+        let r: f32x2 = transmute(vmla_n_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_p8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x16 = transmute(vdupq_laneq_p8::<8>(transmute(a)));
+    unsafe fn test_vmlaq_n_f32() {
+        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
+        let c: f32 = 3.;
+        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
+        let r: f32x4 = transmute(vmlaq_n_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_p16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vdup_lane_p16::<2>(transmute(a)));
+    unsafe fn test_vmla_lane_s16() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(2, 2, 2, 2);
+        let c: i16x4 = i16x4::new(0, 3, 0, 0);
+        let e: i16x4 = i16x4::new(6, 7, 8, 9);
+        let r: i16x4 = transmute(vmla_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_p16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vdupq_laneq_p16::<4>(transmute(a)));
+    unsafe fn test_vmla_laneq_s16() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(2, 2, 2, 2);
+        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
+        let e: i16x4 = i16x4::new(6, 7, 8, 9);
+        let r: i16x4 = transmute(vmla_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_p8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x8 = transmute(vdup_laneq_p8::<8>(transmute(a)));
+    unsafe fn test_vmlaq_lane_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i16x4 = i16x4::new(0, 3, 0, 0);
+        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: i16x8 = transmute(vmlaq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_p16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vdup_laneq_p16::<4>(transmute(a)));
+    unsafe fn test_vmlaq_laneq_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
+        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: i16x8 = transmute(vmlaq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_p8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x16 = transmute(vdupq_lane_p8::<4>(transmute(a)));
+    unsafe fn test_vmla_lane_s32() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i32x2 = i32x2::new(2, 2);
+        let c: i32x2 = i32x2::new(0, 3);
+        let e: i32x2 = i32x2::new(6, 7);
+        let r: i32x2 = transmute(vmla_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_p16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vdupq_lane_p16::<2>(transmute(a)));
+    unsafe fn test_vmla_laneq_s32() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i32x2 = i32x2::new(2, 2);
+        let c: i32x4 = i32x4::new(0, 3, 0, 0);
+        let e: i32x2 = i32x2::new(6, 7);
+        let r: i32x2 = transmute(vmla_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_s64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let e: i64x2 = i64x2::new(1, 1);
-        let r: i64x2 = transmute(vdupq_laneq_s64::<1>(transmute(a)));
+    unsafe fn test_vmlaq_lane_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(2, 2, 2, 2);
+        let c: i32x2 = i32x2::new(0, 3);
+        let e: i32x4 = i32x4::new(6, 7, 8, 9);
+        let r: i32x4 = transmute(vmlaq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: i64x2 = i64x2::new(1, 1);
-        let r: i64x2 = transmute(vdupq_lane_s64::<0>(transmute(a)));
+    unsafe fn test_vmlaq_laneq_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(2, 2, 2, 2);
+        let c: i32x4 = i32x4::new(0, 3, 0, 0);
+        let e: i32x4 = i32x4::new(6, 7, 8, 9);
+        let r: i32x4 = transmute(vmlaq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_u64() {
-        let a: u64x2 = u64x2::new(1, 1);
-        let e: u64x2 = u64x2::new(1, 1);
-        let r: u64x2 = transmute(vdupq_laneq_u64::<1>(transmute(a)));
+    unsafe fn test_vmla_lane_u16() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(2, 2, 2, 2);
+        let c: u16x4 = u16x4::new(0, 3, 0, 0);
+        let e: u16x4 = u16x4::new(6, 7, 8, 9);
+        let r: u16x4 = transmute(vmla_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: u64x2 = u64x2::new(1, 1);
-        let r: u64x2 = transmute(vdupq_lane_u64::<0>(transmute(a)));
+    unsafe fn test_vmla_laneq_u16() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(2, 2, 2, 2);
+        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
+        let e: u16x4 = u16x4::new(6, 7, 8, 9);
+        let r: u16x4 = transmute(vmla_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(1., 1.);
-        let r: f32x2 = transmute(vdup_lane_f32::<1>(transmute(a)));
+    unsafe fn test_vmlaq_lane_u16() {
+        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u16x4 = u16x4::new(0, 3, 0, 0);
+        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: u16x8 = transmute(vmlaq_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
-        let e: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let r: f32x4 = transmute(vdupq_laneq_f32::<2>(transmute(a)));
+    unsafe fn test_vmlaq_laneq_u16() {
+        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
+        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: u16x8 = transmute(vmlaq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
-        let e: f32x2 = f32x2::new(1., 1.);
-        let r: f32x2 = transmute(vdup_laneq_f32::<2>(transmute(a)));
+    unsafe fn test_vmla_lane_u32() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u32x2 = u32x2::new(2, 2);
+        let c: u32x2 = u32x2::new(0, 3);
+        let e: u32x2 = u32x2::new(6, 7);
+        let r: u32x2 = transmute(vmla_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let e: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let r: f32x4 = transmute(vdupq_lane_f32::<1>(transmute(a)));
+    unsafe fn test_vmla_laneq_u32() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u32x2 = u32x2::new(2, 2);
+        let c: u32x4 = u32x4::new(0, 3, 0, 0);
+        let e: u32x2 = u32x2::new(6, 7);
+        let r: u32x2 = transmute(vmla_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vdup_lane_s64::<0>(transmute(a)));
+    unsafe fn test_vmlaq_lane_u32() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(2, 2, 2, 2);
+        let c: u32x2 = u32x2::new(0, 3);
+        let e: u32x4 = u32x4::new(6, 7, 8, 9);
+        let r: u32x4 = transmute(vmlaq_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vdup_lane_u64::<0>(transmute(a)));
+    unsafe fn test_vmlaq_laneq_u32() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(2, 2, 2, 2);
+        let c: u32x4 = u32x4::new(0, 3, 0, 0);
+        let e: u32x4 = u32x4::new(6, 7, 8, 9);
+        let r: u32x4 = transmute(vmlaq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vdup_laneq_s64::<1>(transmute(a)));
+    unsafe fn test_vmla_lane_f32() {
+        let a: f32x2 = f32x2::new(0., 1.);
+        let b: f32x2 = f32x2::new(2., 2.);
+        let c: f32x2 = f32x2::new(0., 3.);
+        let e: f32x2 = f32x2::new(6., 7.);
+        let r: f32x2 = transmute(vmla_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vdup_laneq_u64::<1>(transmute(a)));
+    unsafe fn test_vmla_laneq_f32() {
+        let a: f32x2 = f32x2::new(0., 1.);
+        let b: f32x2 = f32x2::new(2., 2.);
+        let c: f32x4 = f32x4::new(0., 3., 0., 0.);
+        let e: f32x2 = f32x2::new(6., 7.);
+        let r: f32x2 = transmute(vmla_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_s8() {
-        let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11);
-        let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19);
-        let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15);
-        let r: i8x8 = transmute(vext_s8::<4>(transmute(a), transmute(b)));
+    unsafe fn test_vmlaq_lane_f32() {
+        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
+        let c: f32x2 = f32x2::new(0., 3.);
+        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
+        let r: f32x4 = transmute(vmlaq_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_s8() {
-        let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15);
-        let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11);
-        let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19);
-        let r: i8x16 = transmute(vextq_s8::<8>(transmute(a), transmute(b)));
+    unsafe fn test_vmlaq_laneq_f32() {
+        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
+        let c: f32x4 = f32x4::new(0., 3., 0., 0.);
+        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
+        let r: f32x4 = transmute(vmlaq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_s16() {
-        let a: i16x4 = i16x4::new(0, 8, 8, 9);
-        let b: i16x4 = i16x4::new(9, 11, 14, 15);
-        let e: i16x4 = i16x4::new(8, 9, 9, 11);
-        let r: i16x4 = transmute(vext_s16::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_s8() {
+        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: i16x8 = transmute(vmlal_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_s16() {
-        let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11);
-        let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19);
-        let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15);
-        let r: i16x8 = transmute(vextq_s16::<4>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_s16() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(2, 2, 2, 2);
+        let c: i16x4 = i16x4::new(3, 3, 3, 3);
+        let e: i32x4 = i32x4::new(6, 7, 8, 9);
+        let r: i32x4 = transmute(vmlal_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_s32() {
-        let a: i32x2 = i32x2::new(0, 8);
-        let b: i32x2 = i32x2::new(9, 11);
-        let e: i32x2 = i32x2::new(8, 9);
-        let r: i32x2 = transmute(vext_s32::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_s32() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let b: i32x2 = i32x2::new(2, 2);
+        let c: i32x2 = i32x2::new(3, 3);
+        let e: i64x2 = i64x2::new(6, 7);
+        let r: i64x2 = transmute(vmlal_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_s32() {
-        let a: i32x4 = i32x4::new(0, 8, 8, 9);
-        let b: i32x4 = i32x4::new(9, 11, 14, 15);
-        let e: i32x4 = i32x4::new(8, 9, 9, 11);
-        let r: i32x4 = transmute(vextq_s32::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_u8() {
+        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: u16x8 = transmute(vmlal_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_u8() {
-        let a: u8x8 = u8x8::new(0, 8, 8, 9, 8, 9, 9, 11);
-        let b: u8x8 = u8x8::new(9, 11, 14, 15, 16, 17, 18, 19);
-        let e: u8x8 = u8x8::new(8, 9, 9, 11, 9, 11, 14, 15);
-        let r: u8x8 = transmute(vext_u8::<4>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_u16() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(2, 2, 2, 2);
+        let c: u16x4 = u16x4::new(3, 3, 3, 3);
+        let e: u32x4 = u32x4::new(6, 7, 8, 9);
+        let r: u32x4 = transmute(vmlal_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_u8() {
-        let a: u8x16 = u8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15);
-        let b: u8x16 = u8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11);
-        let e: u8x16 = u8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19);
-        let r: u8x16 = transmute(vextq_u8::<8>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_u32() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let b: u32x2 = u32x2::new(2, 2);
+        let c: u32x2 = u32x2::new(3, 3);
+        let e: u64x2 = u64x2::new(6, 7);
+        let r: u64x2 = transmute(vmlal_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_u16() {
-        let a: u16x4 = u16x4::new(0, 8, 8, 9);
-        let b: u16x4 = u16x4::new(9, 11, 14, 15);
-        let e: u16x4 = u16x4::new(8, 9, 9, 11);
-        let r: u16x4 = transmute(vext_u16::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_n_s16() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(2, 2, 2, 2);
+        let c: i16 = 3;
+        let e: i32x4 = i32x4::new(6, 7, 8, 9);
+        let r: i32x4 = transmute(vmlal_n_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_u16() {
-        let a: u16x8 = u16x8::new(0, 8, 8, 9, 8, 9, 9, 11);
-        let b: u16x8 = u16x8::new(9, 11, 14, 15, 16, 17, 18, 19);
-        let e: u16x8 = u16x8::new(8, 9, 9, 11, 9, 11, 14, 15);
-        let r: u16x8 = transmute(vextq_u16::<4>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_n_s32() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let b: i32x2 = i32x2::new(2, 2);
+        let c: i32 = 3;
+        let e: i64x2 = i64x2::new(6, 7);
+        let r: i64x2 = transmute(vmlal_n_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_u32() {
-        let a: u32x2 = u32x2::new(0, 8);
-        let b: u32x2 = u32x2::new(9, 11);
-        let e: u32x2 = u32x2::new(8, 9);
-        let r: u32x2 = transmute(vext_u32::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_n_u16() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(2, 2, 2, 2);
+        let c: u16 = 3;
+        let e: u32x4 = u32x4::new(6, 7, 8, 9);
+        let r: u32x4 = transmute(vmlal_n_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_u32() {
-        let a: u32x4 = u32x4::new(0, 8, 8, 9);
-        let b: u32x4 = u32x4::new(9, 11, 14, 15);
-        let e: u32x4 = u32x4::new(8, 9, 9, 11);
-        let r: u32x4 = transmute(vextq_u32::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_n_u32() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let b: u32x2 = u32x2::new(2, 2);
+        let c: u32 = 3;
+        let e: u64x2 = u64x2::new(6, 7);
+        let r: u64x2 = transmute(vmlal_n_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_p8() {
-        let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11);
-        let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19);
-        let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15);
-        let r: i8x8 = transmute(vext_p8::<4>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_lane_s16() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(2, 2, 2, 2);
+        let c: i16x4 = i16x4::new(0, 3, 0, 0);
+        let e: i32x4 = i32x4::new(6, 7, 8, 9);
+        let r: i32x4 = transmute(vmlal_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_p8() {
-        let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15);
-        let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11);
-        let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19);
-        let r: i8x16 = transmute(vextq_p8::<8>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_laneq_s16() {
+        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+        let b: i16x4 = i16x4::new(2, 2, 2, 2);
+        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(6, 7, 8, 9);
+        let r: i32x4 = transmute(vmlal_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_p16() {
-        let a: i16x4 = i16x4::new(0, 8, 8, 9);
-        let b: i16x4 = i16x4::new(9, 11, 14, 15);
-        let e: i16x4 = i16x4::new(8, 9, 9, 11);
-        let r: i16x4 = transmute(vext_p16::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_lane_s32() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let b: i32x2 = i32x2::new(2, 2);
+        let c: i32x2 = i32x2::new(0, 3);
+        let e: i64x2 = i64x2::new(6, 7);
+        let r: i64x2 = transmute(vmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_p16() {
-        let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11);
-        let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19);
-        let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15);
-        let r: i16x8 = transmute(vextq_p16::<4>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_laneq_s32() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let b: i32x2 = i32x2::new(2, 2);
+        let c: i32x4 = i32x4::new(0, 3, 0, 0);
+        let e: i64x2 = i64x2::new(6, 7);
+        let r: i64x2 = transmute(vmlal_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_s64() {
-        let a: i64x2 = i64x2::new(0, 8);
-        let b: i64x2 = i64x2::new(9, 11);
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vextq_s64::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_lane_u16() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(2, 2, 2, 2);
+        let c: u16x4 = u16x4::new(0, 3, 0, 0);
+        let e: u32x4 = u32x4::new(6, 7, 8, 9);
+        let r: u32x4 = transmute(vmlal_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_u64() {
-        let a: u64x2 = u64x2::new(0, 8);
-        let b: u64x2 = u64x2::new(9, 11);
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vextq_u64::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_laneq_u16() {
+        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+        let b: u16x4 = u16x4::new(2, 2, 2, 2);
+        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
+        let e: u32x4 = u32x4::new(6, 7, 8, 9);
+        let r: u32x4 = transmute(vmlal_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_f32() {
-        let a: f32x2 = f32x2::new(0., 2.);
-        let b: f32x2 = f32x2::new(3., 4.);
-        let e: f32x2 = f32x2::new(2., 3.);
-        let r: f32x2 = transmute(vext_f32::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_lane_u32() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let b: u32x2 = u32x2::new(2, 2);
+        let c: u32x2 = u32x2::new(0, 3);
+        let e: u64x2 = u64x2::new(6, 7);
+        let r: u64x2 = transmute(vmlal_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_f32() {
-        let a: f32x4 = f32x4::new(0., 2., 2., 3.);
-        let b: f32x4 = f32x4::new(3., 4., 5., 6.);
-        let e: f32x4 = f32x4::new(2., 3., 3., 4.);
-        let r: f32x4 = transmute(vextq_f32::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vmlal_laneq_u32() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let b: u32x2 = u32x2::new(2, 2);
+        let c: u32x4 = u32x4::new(0, 3, 0, 0);
+        let e: u64x2 = u64x2::new(6, 7);
+        let r: u64x2 = transmute(vmlal_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmls_s8() {
+        let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i8x8 = transmute(vmla_s8(transmute(a), transmute(b), transmute(c)));
+        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i8x8 = transmute(vmls_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+    unsafe fn test_vmlsq_s8() {
+        let a: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
         let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
-        let r: i8x16 = transmute(vmlaq_s8(transmute(a), transmute(b), transmute(c)));
+        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: i8x16 = transmute(vmlsq_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+    unsafe fn test_vmls_s16() {
+        let a: i16x4 = i16x4::new(6, 7, 8, 9);
         let b: i16x4 = i16x4::new(2, 2, 2, 2);
         let c: i16x4 = i16x4::new(3, 3, 3, 3);
-        let e: i16x4 = i16x4::new(6, 7, 8, 9);
-        let r: i16x4 = transmute(vmla_s16(transmute(a), transmute(b), transmute(c)));
+        let e: i16x4 = i16x4::new(0, 1, 2, 3);
+        let r: i16x4 = transmute(vmls_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsq_s16() {
+        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlaq_s16(transmute(a), transmute(b), transmute(c)));
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i16x8 = transmute(vmlsq_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
+    unsafe fn test_vmls_s32() {
+        let a: i32x2 = i32x2::new(6, 7);
         let b: i32x2 = i32x2::new(2, 2);
         let c: i32x2 = i32x2::new(3, 3);
-        let e: i32x2 = i32x2::new(6, 7);
-        let r: i32x2 = transmute(vmla_s32(transmute(a), transmute(b), transmute(c)));
+        let e: i32x2 = i32x2::new(0, 1);
+        let r: i32x2 = transmute(vmls_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsq_s32() {
+        let a: i32x4 = i32x4::new(6, 7, 8, 9);
         let b: i32x4 = i32x4::new(2, 2, 2, 2);
         let c: i32x4 = i32x4::new(3, 3, 3, 3);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlaq_s32(transmute(a), transmute(b), transmute(c)));
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmlsq_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmls_u8() {
+        let a: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u8x8 = transmute(vmla_u8(transmute(a), transmute(b), transmute(c)));
+        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: u8x8 = transmute(vmls_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+    unsafe fn test_vmlsq_u8() {
+        let a: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
         let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
-        let r: u8x16 = transmute(vmlaq_u8(transmute(a), transmute(b), transmute(c)));
+        let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vmlsq_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+    unsafe fn test_vmls_u16() {
+        let a: u16x4 = u16x4::new(6, 7, 8, 9);
         let b: u16x4 = u16x4::new(2, 2, 2, 2);
         let c: u16x4 = u16x4::new(3, 3, 3, 3);
-        let e: u16x4 = u16x4::new(6, 7, 8, 9);
-        let r: u16x4 = transmute(vmla_u16(transmute(a), transmute(b), transmute(c)));
+        let e: u16x4 = u16x4::new(0, 1, 2, 3);
+        let r: u16x4 = transmute(vmls_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsq_u16() {
+        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlaq_u16(transmute(a), transmute(b), transmute(c)));
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: u16x8 = transmute(vmlsq_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
+    unsafe fn test_vmls_u32() {
+        let a: u32x2 = u32x2::new(6, 7);
         let b: u32x2 = u32x2::new(2, 2);
         let c: u32x2 = u32x2::new(3, 3);
-        let e: u32x2 = u32x2::new(6, 7);
-        let r: u32x2 = transmute(vmla_u32(transmute(a), transmute(b), transmute(c)));
+        let e: u32x2 = u32x2::new(0, 1);
+        let r: u32x2 = transmute(vmls_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsq_u32() {
+        let a: u32x4 = u32x4::new(6, 7, 8, 9);
         let b: u32x4 = u32x4::new(2, 2, 2, 2);
         let c: u32x4 = u32x4::new(3, 3, 3, 3);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlaq_u32(transmute(a), transmute(b), transmute(c)));
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmlsq_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
+    unsafe fn test_vmls_f32() {
+        let a: f32x2 = f32x2::new(6., 7.);
         let b: f32x2 = f32x2::new(2., 2.);
         let c: f32x2 = f32x2::new(3., 3.);
-        let e: f32x2 = f32x2::new(6., 7.);
-        let r: f32x2 = transmute(vmla_f32(transmute(a), transmute(b), transmute(c)));
+        let e: f32x2 = f32x2::new(0., 1.);
+        let r: f32x2 = transmute(vmls_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_f32() {
-        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
+    unsafe fn test_vmlsq_f32() {
+        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
         let b: f32x4 = f32x4::new(2., 2., 2., 2.);
         let c: f32x4 = f32x4::new(3., 3., 3., 3.);
-        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let r: f32x4 = transmute(vmlaq_f32(transmute(a), transmute(b), transmute(c)));
+        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let r: f32x4 = transmute(vmlsq_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+    unsafe fn test_vmls_n_s16() {
+        let a: i16x4 = i16x4::new(6, 7, 8, 9);
         let b: i16x4 = i16x4::new(2, 2, 2, 2);
         let c: i16 = 3;
-        let e: i16x4 = i16x4::new(6, 7, 8, 9);
-        let r: i16x4 = transmute(vmla_n_s16(transmute(a), transmute(b), transmute(c)));
+        let e: i16x4 = i16x4::new(0, 1, 2, 3);
+        let r: i16x4 = transmute(vmls_n_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsq_n_s16() {
+        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: i16 = 3;
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlaq_n_s16(transmute(a), transmute(b), transmute(c)));
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i16x8 = transmute(vmlsq_n_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
+    unsafe fn test_vmls_n_s32() {
+        let a: i32x2 = i32x2::new(6, 7);
         let b: i32x2 = i32x2::new(2, 2);
         let c: i32 = 3;
-        let e: i32x2 = i32x2::new(6, 7);
-        let r: i32x2 = transmute(vmla_n_s32(transmute(a), transmute(b), transmute(c)));
+        let e: i32x2 = i32x2::new(0, 1);
+        let r: i32x2 = transmute(vmls_n_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsq_n_s32() {
+        let a: i32x4 = i32x4::new(6, 7, 8, 9);
         let b: i32x4 = i32x4::new(2, 2, 2, 2);
         let c: i32 = 3;
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlaq_n_s32(transmute(a), transmute(b), transmute(c)));
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmlsq_n_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+    unsafe fn test_vmls_n_u16() {
+        let a: u16x4 = u16x4::new(6, 7, 8, 9);
         let b: u16x4 = u16x4::new(2, 2, 2, 2);
         let c: u16 = 3;
-        let e: u16x4 = u16x4::new(6, 7, 8, 9);
-        let r: u16x4 = transmute(vmla_n_u16(transmute(a), transmute(b), transmute(c)));
+        let e: u16x4 = u16x4::new(0, 1, 2, 3);
+        let r: u16x4 = transmute(vmls_n_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsq_n_u16() {
+        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: u16 = 3;
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlaq_n_u16(transmute(a), transmute(b), transmute(c)));
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: u16x8 = transmute(vmlsq_n_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
+    unsafe fn test_vmls_n_u32() {
+        let a: u32x2 = u32x2::new(6, 7);
         let b: u32x2 = u32x2::new(2, 2);
         let c: u32 = 3;
-        let e: u32x2 = u32x2::new(6, 7);
-        let r: u32x2 = transmute(vmla_n_u32(transmute(a), transmute(b), transmute(c)));
+        let e: u32x2 = u32x2::new(0, 1);
+        let r: u32x2 = transmute(vmls_n_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsq_n_u32() {
+        let a: u32x4 = u32x4::new(6, 7, 8, 9);
         let b: u32x4 = u32x4::new(2, 2, 2, 2);
         let c: u32 = 3;
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlaq_n_u32(transmute(a), transmute(b), transmute(c)));
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmlsq_n_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
+    unsafe fn test_vmls_n_f32() {
+        let a: f32x2 = f32x2::new(6., 7.);
         let b: f32x2 = f32x2::new(2., 2.);
         let c: f32 = 3.;
-        let e: f32x2 = f32x2::new(6., 7.);
-        let r: f32x2 = transmute(vmla_n_f32(transmute(a), transmute(b), transmute(c)));
+        let e: f32x2 = f32x2::new(0., 1.);
+        let r: f32x2 = transmute(vmls_n_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_f32() {
-        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
+    unsafe fn test_vmlsq_n_f32() {
+        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
         let b: f32x4 = f32x4::new(2., 2., 2., 2.);
         let c: f32 = 3.;
-        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let r: f32x4 = transmute(vmlaq_n_f32(transmute(a), transmute(b), transmute(c)));
+        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let r: f32x4 = transmute(vmlsq_n_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+    unsafe fn test_vmls_lane_s16() {
+        let a: i16x4 = i16x4::new(6, 7, 8, 9);
         let b: i16x4 = i16x4::new(2, 2, 2, 2);
         let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i16x4 = i16x4::new(6, 7, 8, 9);
-        let r: i16x4 = transmute(vmla_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i16x4 = i16x4::new(0, 1, 2, 3);
+        let r: i16x4 = transmute(vmls_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+    unsafe fn test_vmls_laneq_s16() {
+        let a: i16x4 = i16x4::new(6, 7, 8, 9);
         let b: i16x4 = i16x4::new(2, 2, 2, 2);
         let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(6, 7, 8, 9);
-        let r: i16x4 = transmute(vmla_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i16x4 = i16x4::new(0, 1, 2, 3);
+        let r: i16x4 = transmute(vmls_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsq_lane_s16() {
+        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlaq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i16x8 = transmute(vmlsq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsq_laneq_s16() {
+        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlaq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i16x8 = transmute(vmlsq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
+    unsafe fn test_vmls_lane_s32() {
+        let a: i32x2 = i32x2::new(6, 7);
         let b: i32x2 = i32x2::new(2, 2);
         let c: i32x2 = i32x2::new(0, 3);
-        let e: i32x2 = i32x2::new(6, 7);
-        let r: i32x2 = transmute(vmla_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i32x2 = i32x2::new(0, 1);
+        let r: i32x2 = transmute(vmls_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
+    unsafe fn test_vmls_laneq_s32() {
+        let a: i32x2 = i32x2::new(6, 7);
         let b: i32x2 = i32x2::new(2, 2);
         let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i32x2 = i32x2::new(6, 7);
-        let r: i32x2 = transmute(vmla_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i32x2 = i32x2::new(0, 1);
+        let r: i32x2 = transmute(vmls_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsq_lane_s32() {
+        let a: i32x4 = i32x4::new(6, 7, 8, 9);
         let b: i32x4 = i32x4::new(2, 2, 2, 2);
         let c: i32x2 = i32x2::new(0, 3);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlaq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmlsq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsq_laneq_s32() {
+        let a: i32x4 = i32x4::new(6, 7, 8, 9);
         let b: i32x4 = i32x4::new(2, 2, 2, 2);
         let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlaq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmlsq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+    unsafe fn test_vmls_lane_u16() {
+        let a: u16x4 = u16x4::new(6, 7, 8, 9);
         let b: u16x4 = u16x4::new(2, 2, 2, 2);
         let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u16x4 = u16x4::new(6, 7, 8, 9);
-        let r: u16x4 = transmute(vmla_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u16x4 = u16x4::new(0, 1, 2, 3);
+        let r: u16x4 = transmute(vmls_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+    unsafe fn test_vmls_laneq_u16() {
+        let a: u16x4 = u16x4::new(6, 7, 8, 9);
         let b: u16x4 = u16x4::new(2, 2, 2, 2);
         let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u16x4 = u16x4::new(6, 7, 8, 9);
-        let r: u16x4 = transmute(vmla_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u16x4 = u16x4::new(0, 1, 2, 3);
+        let r: u16x4 = transmute(vmls_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsq_lane_u16() {
+        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlaq_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: u16x8 = transmute(vmlsq_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsq_laneq_u16() {
+        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlaq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: u16x8 = transmute(vmlsq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
+    unsafe fn test_vmls_lane_u32() {
+        let a: u32x2 = u32x2::new(6, 7);
         let b: u32x2 = u32x2::new(2, 2);
         let c: u32x2 = u32x2::new(0, 3);
-        let e: u32x2 = u32x2::new(6, 7);
-        let r: u32x2 = transmute(vmla_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u32x2 = u32x2::new(0, 1);
+        let r: u32x2 = transmute(vmls_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
+    unsafe fn test_vmls_laneq_u32() {
+        let a: u32x2 = u32x2::new(6, 7);
         let b: u32x2 = u32x2::new(2, 2);
         let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u32x2 = u32x2::new(6, 7);
-        let r: u32x2 = transmute(vmla_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u32x2 = u32x2::new(0, 1);
+        let r: u32x2 = transmute(vmls_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsq_lane_u32() {
+        let a: u32x4 = u32x4::new(6, 7, 8, 9);
         let b: u32x4 = u32x4::new(2, 2, 2, 2);
         let c: u32x2 = u32x2::new(0, 3);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlaq_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmlsq_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsq_laneq_u32() {
+        let a: u32x4 = u32x4::new(6, 7, 8, 9);
         let b: u32x4 = u32x4::new(2, 2, 2, 2);
         let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlaq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmlsq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
+    unsafe fn test_vmls_lane_f32() {
+        let a: f32x2 = f32x2::new(6., 7.);
         let b: f32x2 = f32x2::new(2., 2.);
         let c: f32x2 = f32x2::new(0., 3.);
-        let e: f32x2 = f32x2::new(6., 7.);
-        let r: f32x2 = transmute(vmla_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: f32x2 = f32x2::new(0., 1.);
+        let r: f32x2 = transmute(vmls_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
+    unsafe fn test_vmls_laneq_f32() {
+        let a: f32x2 = f32x2::new(6., 7.);
         let b: f32x2 = f32x2::new(2., 2.);
         let c: f32x4 = f32x4::new(0., 3., 0., 0.);
-        let e: f32x2 = f32x2::new(6., 7.);
-        let r: f32x2 = transmute(vmla_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: f32x2 = f32x2::new(0., 1.);
+        let r: f32x2 = transmute(vmls_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_f32() {
-        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
+    unsafe fn test_vmlsq_lane_f32() {
+        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
         let b: f32x4 = f32x4::new(2., 2., 2., 2.);
         let c: f32x2 = f32x2::new(0., 3.);
-        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let r: f32x4 = transmute(vmlaq_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let r: f32x4 = transmute(vmlsq_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_f32() {
-        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
+    unsafe fn test_vmlsq_laneq_f32() {
+        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
         let b: f32x4 = f32x4::new(2., 2., 2., 2.);
         let c: f32x4 = f32x4::new(0., 3., 0., 0.);
-        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let r: f32x4 = transmute(vmlaq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let r: f32x4 = transmute(vmlsq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_s8() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsl_s8() {
+        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlal_s8(transmute(a), transmute(b), transmute(c)));
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i16x8 = transmute(vmlsl_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsl_s16() {
+        let a: i32x4 = i32x4::new(6, 7, 8, 9);
         let b: i16x4 = i16x4::new(2, 2, 2, 2);
         let c: i16x4 = i16x4::new(3, 3, 3, 3);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlal_s16(transmute(a), transmute(b), transmute(c)));
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmlsl_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
+    unsafe fn test_vmlsl_s32() {
+        let a: i64x2 = i64x2::new(6, 7);
         let b: i32x2 = i32x2::new(2, 2);
         let c: i32x2 = i32x2::new(3, 3);
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vmlal_s32(transmute(a), transmute(b), transmute(c)));
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vmlsl_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_u8() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+    unsafe fn test_vmlsl_u8() {
+        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
         let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
         let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlal_u8(transmute(a), transmute(b), transmute(c)));
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: u16x8 = transmute(vmlsl_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsl_u16() {
+        let a: u32x4 = u32x4::new(6, 7, 8, 9);
         let b: u16x4 = u16x4::new(2, 2, 2, 2);
         let c: u16x4 = u16x4::new(3, 3, 3, 3);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlal_u16(transmute(a), transmute(b), transmute(c)));
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmlsl_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
+    unsafe fn test_vmlsl_u32() {
+        let a: u64x2 = u64x2::new(6, 7);
         let b: u32x2 = u32x2::new(2, 2);
         let c: u32x2 = u32x2::new(3, 3);
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vmlal_u32(transmute(a), transmute(b), transmute(c)));
+        let e: u64x2 = u64x2::new(0, 1);
+        let r: u64x2 = transmute(vmlsl_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_n_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsl_n_s16() {
+        let a: i32x4 = i32x4::new(6, 7, 8, 9);
         let b: i16x4 = i16x4::new(2, 2, 2, 2);
         let c: i16 = 3;
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlal_n_s16(transmute(a), transmute(b), transmute(c)));
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmlsl_n_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_n_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
+    unsafe fn test_vmlsl_n_s32() {
+        let a: i64x2 = i64x2::new(6, 7);
         let b: i32x2 = i32x2::new(2, 2);
         let c: i32 = 3;
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vmlal_n_s32(transmute(a), transmute(b), transmute(c)));
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vmlsl_n_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_n_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsl_n_u16() {
+        let a: u32x4 = u32x4::new(6, 7, 8, 9);
         let b: u16x4 = u16x4::new(2, 2, 2, 2);
         let c: u16 = 3;
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlal_n_u16(transmute(a), transmute(b), transmute(c)));
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmlsl_n_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_n_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
+    unsafe fn test_vmlsl_n_u32() {
+        let a: u64x2 = u64x2::new(6, 7);
         let b: u32x2 = u32x2::new(2, 2);
         let c: u32 = 3;
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vmlal_n_u32(transmute(a), transmute(b), transmute(c)));
+        let e: u64x2 = u64x2::new(0, 1);
+        let r: u64x2 = transmute(vmlsl_n_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_lane_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsl_lane_s16() {
+        let a: i32x4 = i32x4::new(6, 7, 8, 9);
         let b: i16x4 = i16x4::new(2, 2, 2, 2);
         let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlal_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmlsl_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_laneq_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsl_laneq_s16() {
+        let a: i32x4 = i32x4::new(6, 7, 8, 9);
         let b: i16x4 = i16x4::new(2, 2, 2, 2);
         let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlal_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmlsl_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_lane_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
+    unsafe fn test_vmlsl_lane_s32() {
+        let a: i64x2 = i64x2::new(6, 7);
         let b: i32x2 = i32x2::new(2, 2);
         let c: i32x2 = i32x2::new(0, 3);
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_laneq_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
+    unsafe fn test_vmlsl_laneq_s32() {
+        let a: i64x2 = i64x2::new(6, 7);
         let b: i32x2 = i32x2::new(2, 2);
         let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vmlal_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vmlsl_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_lane_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsl_lane_u16() {
+        let a: u32x4 = u32x4::new(6, 7, 8, 9);
         let b: u16x4 = u16x4::new(2, 2, 2, 2);
         let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlal_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmlsl_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_laneq_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
+    unsafe fn test_vmlsl_laneq_u16() {
+        let a: u32x4 = u32x4::new(6, 7, 8, 9);
         let b: u16x4 = u16x4::new(2, 2, 2, 2);
         let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlal_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmlsl_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_lane_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
+    unsafe fn test_vmlsl_lane_u32() {
+        let a: u64x2 = u64x2::new(6, 7);
         let b: u32x2 = u32x2::new(2, 2);
         let c: u32x2 = u32x2::new(0, 3);
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vmlal_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u64x2 = u64x2::new(0, 1);
+        let r: u64x2 = transmute(vmlsl_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_laneq_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
+    unsafe fn test_vmlsl_laneq_u32() {
+        let a: u64x2 = u64x2::new(6, 7);
         let b: u32x2 = u32x2::new(2, 2);
         let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vmlal_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        let e: u64x2 = u64x2::new(0, 1);
+        let r: u64x2 = transmute(vmlsl_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_s8() {
-        let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vmls_s8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vneg_s8() {
+        let a: i8x8 = i8x8::new(0, 1, -1, 2, -2, 3, -3, 4);
+        let e: i8x8 = i8x8::new(0, -1, 1, -2, 2, -3, 3, -4);
+        let r: i8x8 = transmute(vneg_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_s8() {
-        let a: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vmlsq_s8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vnegq_s8() {
+        let a: i8x16 = i8x16::new(0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8);
+        let e: i8x16 = i8x16::new(0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8);
+        let r: i8x16 = transmute(vnegq_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_s16() {
-        let a: i16x4 = i16x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(3, 3, 3, 3);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vmls_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vneg_s16() {
+        let a: i16x4 = i16x4::new(0, 1, -1, 2);
+        let e: i16x4 = i16x4::new(0, -1, 1, -2);
+        let r: i16x4 = transmute(vneg_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vnegq_s16() {
+        let a: i16x8 = i16x8::new(0, 1, -1, 2, -2, 3, -3, 4);
+        let e: i16x8 = i16x8::new(0, -1, 1, -2, 2, -3, 3, -4);
+        let r: i16x8 = transmute(vnegq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vneg_s32() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let e: i32x2 = i32x2::new(0, -1);
+        let r: i32x2 = transmute(vneg_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vnegq_s32() {
+        let a: i32x4 = i32x4::new(0, 1, -1, 2);
+        let e: i32x4 = i32x4::new(0, -1, 1, -2);
+        let r: i32x4 = transmute(vnegq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vneg_f32() {
+        let a: f32x2 = f32x2::new(0., 1.);
+        let e: f32x2 = f32x2::new(0., -1.);
+        let r: f32x2 = transmute(vneg_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_s16() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsq_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vnegq_f32() {
+        let a: f32x4 = f32x4::new(0., 1., -1., 2.);
+        let e: f32x4 = f32x4::new(0., -1., 1., -2.);
+        let r: f32x4 = transmute(vnegq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_s32() {
-        let a: i32x2 = i32x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(3, 3);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vmls_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqneg_s8() {
+        let a: i8x8 = i8x8::new(-128, 0, 1, -1, 2, -2, 3, -3);
+        let e: i8x8 = i8x8::new(0x7F, 0, -1, 1, -2, 2, -3, 3);
+        let r: i8x8 = transmute(vqneg_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_s32() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(3, 3, 3, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsq_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqnegq_s8() {
+        let a: i8x16 = i8x16::new(-128, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7);
+        let e: i8x16 = i8x16::new(0x7F, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let r: i8x16 = transmute(vqnegq_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_u8() {
-        let a: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vmls_u8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqneg_s16() {
+        let a: i16x4 = i16x4::new(-32768, 0, 1, -1);
+        let e: i16x4 = i16x4::new(0x7F_FF, 0, -1, 1);
+        let r: i16x4 = transmute(vqneg_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_u8() {
-        let a: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
-        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vmlsq_u8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqnegq_s16() {
+        let a: i16x8 = i16x8::new(-32768, 0, 1, -1, 2, -2, 3, -3);
+        let e: i16x8 = i16x8::new(0x7F_FF, 0, -1, 1, -2, 2, -3, 3);
+        let r: i16x8 = transmute(vqnegq_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_u16() {
-        let a: u16x4 = u16x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(3, 3, 3, 3);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vmls_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqneg_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, 0);
+        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0);
+        let r: i32x2 = transmute(vqneg_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_u16() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsq_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqnegq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, 0, 1, -1);
+        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, -1, 1);
+        let r: i32x4 = transmute(vqnegq_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_u32() {
-        let a: u32x2 = u32x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(3, 3);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vmls_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsub_u8() {
+        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x8 = u8x8::new(41, 40, 39, 38, 37, 36, 35, 34);
+        let r: u8x8 = transmute(vqsub_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_u32() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(3, 3, 3, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsq_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsubq_u8() {
+        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8x16 = u8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26);
+        let r: u8x16 = transmute(vqsubq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_f32() {
-        let a: f32x2 = f32x2::new(6., 7.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x2 = f32x2::new(3., 3.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vmls_f32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsub_u16() {
+        let a: u16x4 = u16x4::new(42, 42, 42, 42);
+        let b: u16x4 = u16x4::new(1, 2, 3, 4);
+        let e: u16x4 = u16x4::new(41, 40, 39, 38);
+        let r: u16x4 = transmute(vqsub_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_f32() {
-        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x4 = f32x4::new(3., 3., 3., 3.);
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vmlsq_f32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsubq_u16() {
+        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u16x8 = u16x8::new(41, 40, 39, 38, 37, 36, 35, 34);
+        let r: u16x8 = transmute(vqsubq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_s16() {
-        let a: i16x4 = i16x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vmls_n_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsub_u32() {
+        let a: u32x2 = u32x2::new(42, 42);
+        let b: u32x2 = u32x2::new(1, 2);
+        let e: u32x2 = u32x2::new(41, 40);
+        let r: u32x2 = transmute(vqsub_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_s16() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsq_n_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsubq_u32() {
+        let a: u32x4 = u32x4::new(42, 42, 42, 42);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(41, 40, 39, 38);
+        let r: u32x4 = transmute(vqsubq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_s32() {
-        let a: i32x2 = i32x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32 = 3;
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vmls_n_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsub_u64() {
+        let a: u64x1 = u64x1::new(42);
+        let b: u64x1 = u64x1::new(1);
+        let e: u64x1 = u64x1::new(41);
+        let r: u64x1 = transmute(vqsub_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_s32() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32 = 3;
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsq_n_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsubq_u64() {
+        let a: u64x2 = u64x2::new(42, 42);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(41, 40);
+        let r: u64x2 = transmute(vqsubq_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_u16() {
-        let a: u16x4 = u16x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vmls_n_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsub_s8() {
+        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x8 = i8x8::new(41, 40, 39, 38, 37, 36, 35, 34);
+        let r: i8x8 = transmute(vqsub_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_u16() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsq_n_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsubq_s8() {
+        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8x16 = i8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26);
+        let r: i8x16 = transmute(vqsubq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_u32() {
-        let a: u32x2 = u32x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32 = 3;
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vmls_n_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsub_s16() {
+        let a: i16x4 = i16x4::new(42, 42, 42, 42);
+        let b: i16x4 = i16x4::new(1, 2, 3, 4);
+        let e: i16x4 = i16x4::new(41, 40, 39, 38);
+        let r: i16x4 = transmute(vqsub_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_u32() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32 = 3;
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsq_n_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsubq_s16() {
+        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i16x8 = i16x8::new(41, 40, 39, 38, 37, 36, 35, 34);
+        let r: i16x8 = transmute(vqsubq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_f32() {
-        let a: f32x2 = f32x2::new(6., 7.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32 = 3.;
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vmls_n_f32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsub_s32() {
+        let a: i32x2 = i32x2::new(42, 42);
+        let b: i32x2 = i32x2::new(1, 2);
+        let e: i32x2 = i32x2::new(41, 40);
+        let r: i32x2 = transmute(vqsub_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_f32() {
-        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32 = 3.;
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vmlsq_n_f32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsubq_s32() {
+        let a: i32x4 = i32x4::new(42, 42, 42, 42);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i32x4 = i32x4::new(41, 40, 39, 38);
+        let r: i32x4 = transmute(vqsubq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_s16() {
-        let a: i16x4 = i16x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vmls_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsub_s64() {
+        let a: i64x1 = i64x1::new(42);
+        let b: i64x1 = i64x1::new(1);
+        let e: i64x1 = i64x1::new(41);
+        let r: i64x1 = transmute(vqsub_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_s16() {
-        let a: i16x4 = i16x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vmls_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqsubq_s64() {
+        let a: i64x2 = i64x2::new(42, 42);
+        let b: i64x2 = i64x2::new(1, 2);
+        let e: i64x2 = i64x2::new(41, 40);
+        let r: i64x2 = transmute(vqsubq_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_s16() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhadd_u8() {
+        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x8 = u8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
+        let r: u8x8 = transmute(vhadd_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_s16() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhaddq_u8() {
+        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8x16 = u8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
+        let r: u8x16 = transmute(vhaddq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_s32() {
-        let a: i32x2 = i32x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vmls_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhadd_u16() {
+        let a: u16x4 = u16x4::new(42, 42, 42, 42);
+        let b: u16x4 = u16x4::new(1, 2, 3, 4);
+        let e: u16x4 = u16x4::new(21, 22, 22, 23);
+        let r: u16x4 = transmute(vhadd_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_s32() {
-        let a: i32x2 = i32x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vmls_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhaddq_u16() {
+        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u16x8 = u16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
+        let r: u16x8 = transmute(vhaddq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_s32() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhadd_u32() {
+        let a: u32x2 = u32x2::new(42, 42);
+        let b: u32x2 = u32x2::new(1, 2);
+        let e: u32x2 = u32x2::new(21, 22);
+        let r: u32x2 = transmute(vhadd_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_s32() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhaddq_u32() {
+        let a: u32x4 = u32x4::new(42, 42, 42, 42);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(21, 22, 22, 23);
+        let r: u32x4 = transmute(vhaddq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_u16() {
-        let a: u16x4 = u16x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vmls_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhadd_s8() {
+        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x8 = i8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
+        let r: i8x8 = transmute(vhadd_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_u16() {
-        let a: u16x4 = u16x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vmls_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhaddq_s8() {
+        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8x16 = i8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
+        let r: i8x16 = transmute(vhaddq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_u16() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsq_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhadd_s16() {
+        let a: i16x4 = i16x4::new(42, 42, 42, 42);
+        let b: i16x4 = i16x4::new(1, 2, 3, 4);
+        let e: i16x4 = i16x4::new(21, 22, 22, 23);
+        let r: i16x4 = transmute(vhadd_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_u16() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhaddq_s16() {
+        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i16x8 = i16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
+        let r: i16x8 = transmute(vhaddq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_u32() {
-        let a: u32x2 = u32x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vmls_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhadd_s32() {
+        let a: i32x2 = i32x2::new(42, 42);
+        let b: i32x2 = i32x2::new(1, 2);
+        let e: i32x2 = i32x2::new(21, 22);
+        let r: i32x2 = transmute(vhadd_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_u32() {
-        let a: u32x2 = u32x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vmls_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vhaddq_s32() {
+        let a: i32x4 = i32x4::new(42, 42, 42, 42);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i32x4 = i32x4::new(21, 22, 22, 23);
+        let r: i32x4 = transmute(vhaddq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_u32() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsq_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhadd_u8() {
+        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x8 = u8x8::new(22, 22, 23, 23, 24, 24, 25, 25);
+        let r: u8x8 = transmute(vrhadd_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_u32() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhaddq_u8() {
+        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8x16 = u8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29);
+        let r: u8x16 = transmute(vrhaddq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_f32() {
-        let a: f32x2 = f32x2::new(6., 7.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x2 = f32x2::new(0., 3.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vmls_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhadd_u16() {
+        let a: u16x4 = u16x4::new(42, 42, 42, 42);
+        let b: u16x4 = u16x4::new(1, 2, 3, 4);
+        let e: u16x4 = u16x4::new(22, 22, 23, 23);
+        let r: u16x4 = transmute(vrhadd_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_f32() {
-        let a: f32x2 = f32x2::new(6., 7.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x4 = f32x4::new(0., 3., 0., 0.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vmls_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhaddq_u16() {
+        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u16x8 = u16x8::new(22, 22, 23, 23, 24, 24, 25, 25);
+        let r: u16x8 = transmute(vrhaddq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_f32() {
-        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x2 = f32x2::new(0., 3.);
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vmlsq_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhadd_u32() {
+        let a: u32x2 = u32x2::new(42, 42);
+        let b: u32x2 = u32x2::new(1, 2);
+        let e: u32x2 = u32x2::new(22, 22);
+        let r: u32x2 = transmute(vrhadd_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_f32() {
-        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x4 = f32x4::new(0., 3., 0., 0.);
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vmlsq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhaddq_u32() {
+        let a: u32x4 = u32x4::new(42, 42, 42, 42);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(22, 22, 23, 23);
+        let r: u32x4 = transmute(vrhaddq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_s8() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsl_s8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhadd_s8() {
+        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x8 = i8x8::new(22, 22, 23, 23, 24, 24, 25, 25);
+        let r: i8x8 = transmute(vrhadd_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_s16() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(3, 3, 3, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsl_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhaddq_s8() {
+        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8x16 = i8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29);
+        let r: i8x16 = transmute(vrhaddq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_s32() {
-        let a: i64x2 = i64x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(3, 3);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vmlsl_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhadd_s16() {
+        let a: i16x4 = i16x4::new(42, 42, 42, 42);
+        let b: i16x4 = i16x4::new(1, 2, 3, 4);
+        let e: i16x4 = i16x4::new(22, 22, 23, 23);
+        let r: i16x4 = transmute(vrhadd_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_u8() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsl_u8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhaddq_s16() {
+        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i16x8 = i16x8::new(22, 22, 23, 23, 24, 24, 25, 25);
+        let r: i16x8 = transmute(vrhaddq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_u16() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(3, 3, 3, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsl_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhadd_s32() {
+        let a: i32x2 = i32x2::new(42, 42);
+        let b: i32x2 = i32x2::new(1, 2);
+        let e: i32x2 = i32x2::new(22, 22);
+        let r: i32x2 = transmute(vrhadd_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_u32() {
-        let a: u64x2 = u64x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(3, 3);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vmlsl_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrhaddq_s32() {
+        let a: i32x4 = i32x4::new(42, 42, 42, 42);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i32x4 = i32x4::new(22, 22, 23, 23);
+        let r: i32x4 = transmute(vrhaddq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_n_s16() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsl_n_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndn_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndn_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_n_s32() {
-        let a: i64x2 = i64x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32 = 3;
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vmlsl_n_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndnq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
+        let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_n_u16() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsl_n_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqadd_u8() {
+        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x8 = u8x8::new(43, 44, 45, 46, 47, 48, 49, 50);
+        let r: u8x8 = transmute(vqadd_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_n_u32() {
-        let a: u64x2 = u64x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32 = 3;
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vmlsl_n_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddq_u8() {
+        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8x16 = u8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58);
+        let r: u8x16 = transmute(vqaddq_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_lane_s16() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsl_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqadd_u16() {
+        let a: u16x4 = u16x4::new(42, 42, 42, 42);
+        let b: u16x4 = u16x4::new(1, 2, 3, 4);
+        let e: u16x4 = u16x4::new(43, 44, 45, 46);
+        let r: u16x4 = transmute(vqadd_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_laneq_s16() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsl_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddq_u16() {
+        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u16x8 = u16x8::new(43, 44, 45, 46, 47, 48, 49, 50);
+        let r: u16x8 = transmute(vqaddq_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_lane_s32() {
-        let a: i64x2 = i64x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqadd_u32() {
+        let a: u32x2 = u32x2::new(42, 42);
+        let b: u32x2 = u32x2::new(1, 2);
+        let e: u32x2 = u32x2::new(43, 44);
+        let r: u32x2 = transmute(vqadd_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_laneq_s32() {
-        let a: i64x2 = i64x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vmlsl_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddq_u32() {
+        let a: u32x4 = u32x4::new(42, 42, 42, 42);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(43, 44, 45, 46);
+        let r: u32x4 = transmute(vqaddq_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_lane_u16() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsl_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqadd_u64() {
+        let a: u64x1 = u64x1::new(42);
+        let b: u64x1 = u64x1::new(1);
+        let e: u64x1 = u64x1::new(43);
+        let r: u64x1 = transmute(vqadd_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_laneq_u16() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsl_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddq_u64() {
+        let a: u64x2 = u64x2::new(42, 42);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(43, 44);
+        let r: u64x2 = transmute(vqaddq_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_lane_u32() {
-        let a: u64x2 = u64x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vmlsl_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqadd_s8() {
+        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x8 = i8x8::new(43, 44, 45, 46, 47, 48, 49, 50);
+        let r: i8x8 = transmute(vqadd_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_laneq_u32() {
-        let a: u64x2 = u64x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vmlsl_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddq_s8() {
+        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8x16 = i8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58);
+        let r: i8x16 = transmute(vqaddq_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_s8() {
-        let a: i8x8 = i8x8::new(0, 1, -1, 2, -2, 3, -3, 4);
-        let e: i8x8 = i8x8::new(0, -1, 1, -2, 2, -3, 3, -4);
-        let r: i8x8 = transmute(vneg_s8(transmute(a)));
+    unsafe fn test_vqadd_s16() {
+        let a: i16x4 = i16x4::new(42, 42, 42, 42);
+        let b: i16x4 = i16x4::new(1, 2, 3, 4);
+        let e: i16x4 = i16x4::new(43, 44, 45, 46);
+        let r: i16x4 = transmute(vqadd_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8);
-        let e: i8x16 = i8x16::new(0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8);
-        let r: i8x16 = transmute(vnegq_s8(transmute(a)));
+    unsafe fn test_vqaddq_s16() {
+        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i16x8 = i16x8::new(43, 44, 45, 46, 47, 48, 49, 50);
+        let r: i16x8 = transmute(vqaddq_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_s16() {
-        let a: i16x4 = i16x4::new(0, 1, -1, 2);
-        let e: i16x4 = i16x4::new(0, -1, 1, -2);
-        let r: i16x4 = transmute(vneg_s16(transmute(a)));
+    unsafe fn test_vqadd_s32() {
+        let a: i32x2 = i32x2::new(42, 42);
+        let b: i32x2 = i32x2::new(1, 2);
+        let e: i32x2 = i32x2::new(43, 44);
+        let r: i32x2 = transmute(vqadd_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, -1, 2, -2, 3, -3, 4);
-        let e: i16x8 = i16x8::new(0, -1, 1, -2, 2, -3, 3, -4);
-        let r: i16x8 = transmute(vnegq_s16(transmute(a)));
+    unsafe fn test_vqaddq_s32() {
+        let a: i32x4 = i32x4::new(42, 42, 42, 42);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i32x4 = i32x4::new(43, 44, 45, 46);
+        let r: i32x4 = transmute(vqaddq_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: i32x2 = i32x2::new(0, -1);
-        let r: i32x2 = transmute(vneg_s32(transmute(a)));
+    unsafe fn test_vqadd_s64() {
+        let a: i64x1 = i64x1::new(42);
+        let b: i64x1 = i64x1::new(1);
+        let e: i64x1 = i64x1::new(43);
+        let r: i64x1 = transmute(vqadd_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, -1, 2);
-        let e: i32x4 = i32x4::new(0, -1, 1, -2);
-        let r: i32x4 = transmute(vnegq_s32(transmute(a)));
+    unsafe fn test_vqaddq_s64() {
+        let a: i64x2 = i64x2::new(42, 42);
+        let b: i64x2 = i64x2::new(1, 2);
+        let e: i64x2 = i64x2::new(43, 44);
+        let r: i64x2 = transmute(vqaddq_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
-        let e: f32x2 = f32x2::new(0., -1.);
-        let r: f32x2 = transmute(vneg_f32(transmute(a)));
+    unsafe fn test_vld1_s8_x2() {
+        let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i8x8; 2] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [i8x8; 2] = transmute(vld1_s8_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_f32() {
-        let a: f32x4 = f32x4::new(0., 1., -1., 2.);
-        let e: f32x4 = f32x4::new(0., -1., 1., -2.);
-        let r: f32x4 = transmute(vnegq_f32(transmute(a)));
+    unsafe fn test_vld1_s16_x2() {
+        let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [i16x4; 2] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8)];
+        let r: [i16x4; 2] = transmute(vld1_s16_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqneg_s8() {
-        let a: i8x8 = i8x8::new(-128, 0, 1, -1, 2, -2, 3, -3);
-        let e: i8x8 = i8x8::new(0x7F, 0, -1, 1, -2, 2, -3, 3);
-        let r: i8x8 = transmute(vqneg_s8(transmute(a)));
+    unsafe fn test_vld1_s32_x2() {
+        let a: [i32; 5] = [0, 1, 2, 3, 4];
+        let e: [i32x2; 2] = [i32x2::new(1, 2), i32x2::new(3, 4)];
+        let r: [i32x2; 2] = transmute(vld1_s32_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7);
-        let e: i8x16 = i8x16::new(0x7F, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
-        let r: i8x16 = transmute(vqnegq_s8(transmute(a)));
+    unsafe fn test_vld1_s64_x2() {
+        let a: [i64; 3] = [0, 1, 2];
+        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
+        let r: [i64x1; 2] = transmute(vld1_s64_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqneg_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0, 1, -1);
-        let e: i16x4 = i16x4::new(0x7F_FF, 0, -1, 1);
-        let r: i16x4 = transmute(vqneg_s16(transmute(a)));
+    unsafe fn test_vld1q_s8_x2() {
+        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [i8x16; 2] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [i8x16; 2] = transmute(vld1q_s8_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0, 1, -1, 2, -2, 3, -3);
-        let e: i16x8 = i16x8::new(0x7F_FF, 0, -1, 1, -2, 2, -3, 3);
-        let r: i16x8 = transmute(vqnegq_s16(transmute(a)));
+    unsafe fn test_vld1q_s16_x2() {
+        let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i16x8; 2] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [i16x8; 2] = transmute(vld1q_s16_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqneg_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0);
-        let r: i32x2 = transmute(vqneg_s32(transmute(a)));
+    unsafe fn test_vld1q_s32_x2() {
+        let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [i32x4; 2] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8)];
+        let r: [i32x4; 2] = transmute(vld1q_s32_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0, 1, -1);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, -1, 1);
-        let r: i32x4 = transmute(vqnegq_s32(transmute(a)));
+    unsafe fn test_vld1q_s64_x2() {
+        let a: [i64; 5] = [0, 1, 2, 3, 4];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(3, 4)];
+        let r: [i64x2; 2] = transmute(vld1q_s64_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u8() {
-        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(41, 40, 39, 38, 37, 36, 35, 34);
-        let r: u8x8 = transmute(vqsub_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_s8_x3() {
+        let a: [i8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
+        let e: [i8x8; 3] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
+        let r: [i8x8; 3] = transmute(vld1_s8_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u8() {
-        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26);
-        let r: u8x16 = transmute(vqsubq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_s16_x3() {
+        let a: [i16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+        let e: [i16x4; 3] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12)];
+        let r: [i16x4; 3] = transmute(vld1_s16_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u16() {
-        let a: u16x4 = u16x4::new(42, 42, 42, 42);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(41, 40, 39, 38);
-        let r: u16x4 = transmute(vqsub_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_s32_x3() {
+        let a: [i32; 7] = [0, 1, 2, 3, 4, 5, 6];
+        let e: [i32x2; 3] = [i32x2::new(1, 2), i32x2::new(3, 4), i32x2::new(5, 6)];
+        let r: [i32x2; 3] = transmute(vld1_s32_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u16() {
-        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(41, 40, 39, 38, 37, 36, 35, 34);
-        let r: u16x8 = transmute(vqsubq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_s64_x3() {
+        let a: [i64; 4] = [0, 1, 2, 3];
+        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(3)];
+        let r: [i64x1; 3] = transmute(vld1_s64_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u32() {
-        let a: u32x2 = u32x2::new(42, 42);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(41, 40);
-        let r: u32x2 = transmute(vqsub_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_s8_x3() {
+        let a: [i8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i8x16; 3] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [i8x16; 3] = transmute(vld1q_s8_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u32() {
-        let a: u32x4 = u32x4::new(42, 42, 42, 42);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(41, 40, 39, 38);
-        let r: u32x4 = transmute(vqsubq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_s16_x3() {
+        let a: [i16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
+        let e: [i16x8; 3] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
+        let r: [i16x8; 3] = transmute(vld1q_s16_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u64() {
-        let a: u64x1 = u64x1::new(42);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(41);
-        let r: u64x1 = transmute(vqsub_u64(transmute(a), transmute(b)));
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_s32_x3() {
+        let a: [i32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+        let e: [i32x4; 3] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8), i32x4::new(9, 10, 11, 12)];
+        let r: [i32x4; 3] = transmute(vld1q_s32_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u64() {
-        let a: u64x2 = u64x2::new(42, 42);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(41, 40);
-        let r: u64x2 = transmute(vqsubq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_s64_x3() {
+        let a: [i64; 7] = [0, 1, 2, 3, 4, 5, 6];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6)];
+        let r: [i64x2; 3] = transmute(vld1q_s64_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s8() {
-        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(41, 40, 39, 38, 37, 36, 35, 34);
-        let r: i8x8 = transmute(vqsub_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_s8_x4() {
+        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [i8x8; 4] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24), i8x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [i8x8; 4] = transmute(vld1_s8_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s8() {
-        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26);
-        let r: i8x16 = transmute(vqsubq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_s16_x4() {
+        let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i16x4; 4] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12), i16x4::new(13, 14, 15, 16)];
+        let r: [i16x4; 4] = transmute(vld1_s16_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s16() {
-        let a: i16x4 = i16x4::new(42, 42, 42, 42);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(41, 40, 39, 38);
-        let r: i16x4 = transmute(vqsub_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_s32_x4() {
+        let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [i32x2; 4] = [i32x2::new(1, 2), i32x2::new(3, 4), i32x2::new(5, 6), i32x2::new(7, 8)];
+        let r: [i32x2; 4] = transmute(vld1_s32_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s16() {
-        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(41, 40, 39, 38, 37, 36, 35, 34);
-        let r: i16x8 = transmute(vqsubq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_s64_x4() {
+        let a: [i64; 5] = [0, 1, 2, 3, 4];
+        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(3), i64x1::new(4)];
+        let r: [i64x1; 4] = transmute(vld1_s64_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s32() {
-        let a: i32x2 = i32x2::new(42, 42);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(41, 40);
-        let r: i32x2 = transmute(vqsub_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_s8_x4() {
+        let a: [i8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [i8x16; 4] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [i8x16; 4] = transmute(vld1q_s8_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s32() {
-        let a: i32x4 = i32x4::new(42, 42, 42, 42);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(41, 40, 39, 38);
-        let r: i32x4 = transmute(vqsubq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_s16_x4() {
+        let a: [i16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [i16x8; 4] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24), i16x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [i16x8; 4] = transmute(vld1q_s16_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s64() {
-        let a: i64x1 = i64x1::new(42);
-        let b: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(41);
-        let r: i64x1 = transmute(vqsub_s64(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_s32_x4() {
+        let a: [i32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i32x4; 4] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8), i32x4::new(9, 10, 11, 12), i32x4::new(13, 14, 15, 16)];
+        let r: [i32x4; 4] = transmute(vld1q_s32_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s64() {
-        let a: i64x2 = i64x2::new(42, 42);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: i64x2 = i64x2::new(41, 40);
-        let r: i64x2 = transmute(vqsubq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_s64_x4() {
+        let a: [i64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6), i64x2::new(7, 8)];
+        let r: [i64x2; 4] = transmute(vld1q_s64_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u8() {
-        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
-        let r: u8x8 = transmute(vhadd_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u8_x2() {
+        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [u8x8; 2] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [u8x8; 2] = transmute(vld1_u8_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u8() {
-        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
-        let r: u8x16 = transmute(vhaddq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u16_x2() {
+        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [u16x4; 2] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8)];
+        let r: [u16x4; 2] = transmute(vld1_u16_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u16() {
-        let a: u16x4 = u16x4::new(42, 42, 42, 42);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(21, 22, 22, 23);
-        let r: u16x4 = transmute(vhadd_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u32_x2() {
+        let a: [u32; 5] = [0, 1, 2, 3, 4];
+        let e: [u32x2; 2] = [u32x2::new(1, 2), u32x2::new(3, 4)];
+        let r: [u32x2; 2] = transmute(vld1_u32_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u16() {
-        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
-        let r: u16x8 = transmute(vhaddq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u64_x2() {
+        let a: [u64; 3] = [0, 1, 2];
+        let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)];
+        let r: [u64x1; 2] = transmute(vld1_u64_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u32() {
-        let a: u32x2 = u32x2::new(42, 42);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(21, 22);
-        let r: u32x2 = transmute(vhadd_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u8_x2() {
+        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [u8x16; 2] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [u8x16; 2] = transmute(vld1q_u8_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u32() {
-        let a: u32x4 = u32x4::new(42, 42, 42, 42);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(21, 22, 22, 23);
-        let r: u32x4 = transmute(vhaddq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u16_x2() {
+        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [u16x8; 2] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [u16x8; 2] = transmute(vld1q_u16_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s8() {
-        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
-        let r: i8x8 = transmute(vhadd_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u32_x2() {
+        let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [u32x4; 2] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8)];
+        let r: [u32x4; 2] = transmute(vld1q_u32_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s8() {
-        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
-        let r: i8x16 = transmute(vhaddq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u64_x2() {
+        let a: [u64; 5] = [0, 1, 2, 3, 4];
+        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(3, 4)];
+        let r: [u64x2; 2] = transmute(vld1q_u64_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s16() {
-        let a: i16x4 = i16x4::new(42, 42, 42, 42);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(21, 22, 22, 23);
-        let r: i16x4 = transmute(vhadd_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u8_x3() {
+        let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
+        let e: [u8x8; 3] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16), u8x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
+        let r: [u8x8; 3] = transmute(vld1_u8_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s16() {
-        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
-        let r: i16x8 = transmute(vhaddq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u16_x3() {
+        let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+        let e: [u16x4; 3] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8), u16x4::new(9, 10, 11, 12)];
+        let r: [u16x4; 3] = transmute(vld1_u16_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s32() {
-        let a: i32x2 = i32x2::new(42, 42);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(21, 22);
-        let r: i32x2 = transmute(vhadd_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u32_x3() {
+        let a: [u32; 7] = [0, 1, 2, 3, 4, 5, 6];
+        let e: [u32x2; 3] = [u32x2::new(1, 2), u32x2::new(3, 4), u32x2::new(5, 6)];
+        let r: [u32x2; 3] = transmute(vld1_u32_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s32() {
-        let a: i32x4 = i32x4::new(42, 42, 42, 42);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(21, 22, 22, 23);
-        let r: i32x4 = transmute(vhaddq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u64_x3() {
+        let a: [u64; 4] = [0, 1, 2, 3];
+        let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(3)];
+        let r: [u64x1; 3] = transmute(vld1_u64_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u8() {
-        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(22, 22, 23, 23, 24, 24, 25, 25);
-        let r: u8x8 = transmute(vrhadd_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u8_x3() {
+        let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [u8x16; 3] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [u8x16; 3] = transmute(vld1q_u8_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u8() {
-        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29);
-        let r: u8x16 = transmute(vrhaddq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u16_x3() {
+        let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
+        let e: [u16x8; 3] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16), u16x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
+        let r: [u16x8; 3] = transmute(vld1q_u16_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u16() {
-        let a: u16x4 = u16x4::new(42, 42, 42, 42);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(22, 22, 23, 23);
-        let r: u16x4 = transmute(vrhadd_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u32_x3() {
+        let a: [u32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+        let e: [u32x4; 3] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8), u32x4::new(9, 10, 11, 12)];
+        let r: [u32x4; 3] = transmute(vld1q_u32_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u16() {
-        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(22, 22, 23, 23, 24, 24, 25, 25);
-        let r: u16x8 = transmute(vrhaddq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u64_x3() {
+        let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
+        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(3, 4), u64x2::new(5, 6)];
+        let r: [u64x2; 3] = transmute(vld1q_u64_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u32() {
-        let a: u32x2 = u32x2::new(42, 42);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(22, 22);
-        let r: u32x2 = transmute(vrhadd_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u8_x4() {
+        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [u8x8; 4] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16), u8x8::new(17, 18, 19, 20, 21, 22, 23, 24), u8x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [u8x8; 4] = transmute(vld1_u8_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u32() {
-        let a: u32x4 = u32x4::new(42, 42, 42, 42);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(22, 22, 23, 23);
-        let r: u32x4 = transmute(vrhaddq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u16_x4() {
+        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [u16x4; 4] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8), u16x4::new(9, 10, 11, 12), u16x4::new(13, 14, 15, 16)];
+        let r: [u16x4; 4] = transmute(vld1_u16_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s8() {
-        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(22, 22, 23, 23, 24, 24, 25, 25);
-        let r: i8x8 = transmute(vrhadd_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u32_x4() {
+        let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [u32x2; 4] = [u32x2::new(1, 2), u32x2::new(3, 4), u32x2::new(5, 6), u32x2::new(7, 8)];
+        let r: [u32x2; 4] = transmute(vld1_u32_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s8() {
-        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29);
-        let r: i8x16 = transmute(vrhaddq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_u64_x4() {
+        let a: [u64; 5] = [0, 1, 2, 3, 4];
+        let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(3), u64x1::new(4)];
+        let r: [u64x1; 4] = transmute(vld1_u64_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s16() {
-        let a: i16x4 = i16x4::new(42, 42, 42, 42);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(22, 22, 23, 23);
-        let r: i16x4 = transmute(vrhadd_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u8_x4() {
+        let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [u8x16; 4] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [u8x16; 4] = transmute(vld1q_u8_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s16() {
-        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(22, 22, 23, 23, 24, 24, 25, 25);
-        let r: i16x8 = transmute(vrhaddq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u16_x4() {
+        let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [u16x8; 4] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16), u16x8::new(17, 18, 19, 20, 21, 22, 23, 24), u16x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [u16x8; 4] = transmute(vld1q_u16_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s32() {
-        let a: i32x2 = i32x2::new(42, 42);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(22, 22);
-        let r: i32x2 = transmute(vrhadd_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u32_x4() {
+        let a: [u32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [u32x4; 4] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8), u32x4::new(9, 10, 11, 12), u32x4::new(13, 14, 15, 16)];
+        let r: [u32x4; 4] = transmute(vld1q_u32_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s32() {
-        let a: i32x4 = i32x4::new(42, 42, 42, 42);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(22, 22, 23, 23);
-        let r: i32x4 = transmute(vrhaddq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_u64_x4() {
+        let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(3, 4), u64x2::new(5, 6), u64x2::new(7, 8)];
+        let r: [u64x2; 4] = transmute(vld1q_u64_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndn_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndn_f32(transmute(a)));
+    unsafe fn test_vld1_p8_x2() {
+        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i8x8; 2] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [i8x8; 2] = transmute(vld1_p8_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndnq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
-        let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
+    unsafe fn test_vld1_p8_x3() {
+        let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
+        let e: [i8x8; 3] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
+        let r: [i8x8; 3] = transmute(vld1_p8_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u8() {
-        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(43, 44, 45, 46, 47, 48, 49, 50);
-        let r: u8x8 = transmute(vqadd_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld1_p8_x4() {
+        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [i8x8; 4] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24), i8x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [i8x8; 4] = transmute(vld1_p8_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u8() {
-        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58);
-        let r: u8x16 = transmute(vqaddq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_p8_x2() {
+        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [i8x16; 2] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [i8x16; 2] = transmute(vld1q_p8_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u16() {
-        let a: u16x4 = u16x4::new(42, 42, 42, 42);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(43, 44, 45, 46);
-        let r: u16x4 = transmute(vqadd_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_p8_x3() {
+        let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i8x16; 3] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [i8x16; 3] = transmute(vld1q_p8_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u16() {
-        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(43, 44, 45, 46, 47, 48, 49, 50);
-        let r: u16x8 = transmute(vqaddq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_p8_x4() {
+        let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [i8x16; 4] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [i8x16; 4] = transmute(vld1q_p8_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u32() {
-        let a: u32x2 = u32x2::new(42, 42);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(43, 44);
-        let r: u32x2 = transmute(vqadd_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1_p16_x2() {
+        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+        let e: [i16x4; 2] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8)];
+        let r: [i16x4; 2] = transmute(vld1_p16_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u32() {
-        let a: u32x4 = u32x4::new(42, 42, 42, 42);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(43, 44, 45, 46);
-        let r: u32x4 = transmute(vqaddq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1_p16_x3() {
+        let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+        let e: [i16x4; 3] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12)];
+        let r: [i16x4; 3] = transmute(vld1_p16_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u64() {
-        let a: u64x1 = u64x1::new(42);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(43);
-        let r: u64x1 = transmute(vqadd_u64(transmute(a), transmute(b)));
+    unsafe fn test_vld1_p16_x4() {
+        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i16x4; 4] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12), i16x4::new(13, 14, 15, 16)];
+        let r: [i16x4; 4] = transmute(vld1_p16_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u64() {
-        let a: u64x2 = u64x2::new(42, 42);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(43, 44);
-        let r: u64x2 = transmute(vqaddq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_p16_x2() {
+        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+        let e: [i16x8; 2] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
+        let r: [i16x8; 2] = transmute(vld1q_p16_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s8() {
-        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(43, 44, 45, 46, 47, 48, 49, 50);
-        let r: i8x8 = transmute(vqadd_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_p16_x3() {
+        let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
+        let e: [i16x8; 3] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
+        let r: [i16x8; 3] = transmute(vld1q_p16_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s8() {
-        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58);
-        let r: i8x16 = transmute(vqaddq_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_p16_x4() {
+        let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+        let e: [i16x8; 4] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24), i16x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
+        let r: [i16x8; 4] = transmute(vld1q_p16_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s16() {
-        let a: i16x4 = i16x4::new(42, 42, 42, 42);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(43, 44, 45, 46);
-        let r: i16x4 = transmute(vqadd_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_f32_x2() {
+        let a: [f32; 5] = [0., 1., 2., 3., 4.];
+        let e: [f32x2; 2] = [f32x2::new(1., 2.), f32x2::new(3., 4.)];
+        let r: [f32x2; 2] = transmute(vld1_f32_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s16() {
-        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(43, 44, 45, 46, 47, 48, 49, 50);
-        let r: i16x8 = transmute(vqaddq_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_f32_x2() {
+        let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
+        let e: [f32x4; 2] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.)];
+        let r: [f32x4; 2] = transmute(vld1q_f32_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s32() {
-        let a: i32x2 = i32x2::new(42, 42);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(43, 44);
-        let r: i32x2 = transmute(vqadd_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1_f32_x3() {
+        let a: [f32; 7] = [0., 1., 2., 3., 4., 5., 6.];
+        let e: [f32x2; 3] = [f32x2::new(1., 2.), f32x2::new(3., 4.), f32x2::new(5., 6.)];
+        let r: [f32x2; 3] = transmute(vld1_f32_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s32() {
-        let a: i32x4 = i32x4::new(42, 42, 42, 42);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(43, 44, 45, 46);
-        let r: i32x4 = transmute(vqaddq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_f32_x3() {
+        let a: [f32; 13] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.];
+        let e: [f32x4; 3] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.), f32x4::new(9., 10., 11., 12.)];
+        let r: [f32x4; 3] = transmute(vld1q_f32_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s64() {
-        let a: i64x1 = i64x1::new(42);
-        let b: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(43);
-        let r: i64x1 = transmute(vqadd_s64(transmute(a), transmute(b)));
+    unsafe fn test_vld1_f32_x4() {
+        let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
+        let e: [f32x2; 4] = [f32x2::new(1., 2.), f32x2::new(3., 4.), f32x2::new(5., 6.), f32x2::new(7., 8.)];
+        let r: [f32x2; 4] = transmute(vld1_f32_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s64() {
-        let a: i64x2 = i64x2::new(42, 42);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: i64x2 = i64x2::new(43, 44);
-        let r: i64x2 = transmute(vqaddq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_f32_x4() {
+        let a: [f32; 17] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.];
+        let e: [f32x4; 4] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.), f32x4::new(9., 10., 11., 12.), f32x4::new(13., 14., 15., 16.)];
+        let r: [f32x4; 4] = transmute(vld1q_f32_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index 9ed9f77aa7..118db0f31f 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -92,6 +92,16 @@ pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t);
 #[derive(Copy, Clone)]
 pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t);
 
+/// ARM-specific type containing two `int8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t);
+/// ARM-specific type containing three `int8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t);
+/// ARM-specific type containing four `int8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t);
+
 /// ARM-specific type containing two `uint8x8_t` vectors.
 #[derive(Copy, Clone)]
 pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t);
@@ -102,6 +112,21 @@ pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t);
 #[derive(Copy, Clone)]
 pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t);
 
+/// ARM-specific type containing two `uint8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t);
+/// ARM-specific type containing three `uint8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t);
+/// ARM-specific type containing four `uint8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint8x16x4_t(
+    pub uint8x16_t,
+    pub uint8x16_t,
+    pub uint8x16_t,
+    pub uint8x16_t,
+);
+
 /// ARM-specific type containing two `poly8x8_t` vectors.
 #[derive(Copy, Clone)]
 pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t);
@@ -112,6 +137,231 @@ pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t);
 #[derive(Copy, Clone)]
 pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t);
 
+/// ARM-specific type containing two `poly8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t);
+/// ARM-specific type containing three `poly8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t);
+/// ARM-specific type containing four `poly8x16_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly8x16x4_t(
+    pub poly8x16_t,
+    pub poly8x16_t,
+    pub poly8x16_t,
+    pub poly8x16_t,
+);
+
+/// ARM-specific type containing two `int16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t);
+/// ARM-specific type containing three `int16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int16x4x3_t(pub int16x4_t, pub int16x4_t, pub int16x4_t);
+/// ARM-specific type containing four `int16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int16x4x4_t(pub int16x4_t, pub int16x4_t, pub int16x4_t, pub int16x4_t);
+
+/// ARM-specific type containing two `int16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int16x8x2_t(pub int16x8_t, pub int16x8_t);
+/// ARM-specific type containing three `int16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int16x8x3_t(pub int16x8_t, pub int16x8_t, pub int16x8_t);
+/// ARM-specific type containing four `int16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int16x8x4_t(pub int16x8_t, pub int16x8_t, pub int16x8_t, pub int16x8_t);
+
+/// ARM-specific type containing two `uint16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint16x4x2_t(pub uint16x4_t, pub uint16x4_t);
+/// ARM-specific type containing three `uint16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint16x4x3_t(pub uint16x4_t, pub uint16x4_t, pub uint16x4_t);
+/// ARM-specific type containing four `uint16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint16x4x4_t(
+    pub uint16x4_t,
+    pub uint16x4_t,
+    pub uint16x4_t,
+    pub uint16x4_t,
+);
+
+/// ARM-specific type containing two `uint16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint16x8x2_t(pub uint16x8_t, pub uint16x8_t);
+/// ARM-specific type containing three `uint16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint16x8x3_t(pub uint16x8_t, pub uint16x8_t, pub uint16x8_t);
+/// ARM-specific type containing four `uint16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint16x8x4_t(
+    pub uint16x8_t,
+    pub uint16x8_t,
+    pub uint16x8_t,
+    pub uint16x8_t,
+);
+
+/// ARM-specific type containing two `poly16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly16x4x2_t(pub poly16x4_t, pub poly16x4_t);
+/// ARM-specific type containing three `poly16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly16x4x3_t(pub poly16x4_t, pub poly16x4_t, pub poly16x4_t);
+/// ARM-specific type containing four `poly16x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly16x4x4_t(
+    pub poly16x4_t,
+    pub poly16x4_t,
+    pub poly16x4_t,
+    pub poly16x4_t,
+);
+
+/// ARM-specific type containing two `poly16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly16x8x2_t(pub poly16x8_t, pub poly16x8_t);
+/// ARM-specific type containing three `poly16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly16x8x3_t(pub poly16x8_t, pub poly16x8_t, pub poly16x8_t);
+/// ARM-specific type containing four `poly16x8_t` vectors.
+#[derive(Copy, Clone)]
+pub struct poly16x8x4_t(
+    pub poly16x8_t,
+    pub poly16x8_t,
+    pub poly16x8_t,
+    pub poly16x8_t,
+);
+
+/// ARM-specific type containing two `int32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int32x2x2_t(pub int32x2_t, pub int32x2_t);
+/// ARM-specific type containing three `int32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int32x2x3_t(pub int32x2_t, pub int32x2_t, pub int32x2_t);
+/// ARM-specific type containing four `int32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int32x2x4_t(pub int32x2_t, pub int32x2_t, pub int32x2_t, pub int32x2_t);
+
+/// ARM-specific type containing two `int32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int32x4x2_t(pub int32x4_t, pub int32x4_t);
+/// ARM-specific type containing three `int32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int32x4x3_t(pub int32x4_t, pub int32x4_t, pub int32x4_t);
+/// ARM-specific type containing four `int32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int32x4x4_t(pub int32x4_t, pub int32x4_t, pub int32x4_t, pub int32x4_t);
+
+/// ARM-specific type containing two `uint32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint32x2x2_t(pub uint32x2_t, pub uint32x2_t);
+/// ARM-specific type containing three `uint32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint32x2x3_t(pub uint32x2_t, pub uint32x2_t, pub uint32x2_t);
+/// ARM-specific type containing four `uint32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint32x2x4_t(
+    pub uint32x2_t,
+    pub uint32x2_t,
+    pub uint32x2_t,
+    pub uint32x2_t,
+);
+
+/// ARM-specific type containing two `uint32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint32x4x2_t(pub uint32x4_t, pub uint32x4_t);
+/// ARM-specific type containing three `uint32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint32x4x3_t(pub uint32x4_t, pub uint32x4_t, pub uint32x4_t);
+/// ARM-specific type containing four `uint32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint32x4x4_t(
+    pub uint32x4_t,
+    pub uint32x4_t,
+    pub uint32x4_t,
+    pub uint32x4_t,
+);
+
+/// ARM-specific type containing two `float32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct float32x2x2_t(pub float32x2_t, pub float32x2_t);
+/// ARM-specific type containing three `float32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct float32x2x3_t(pub float32x2_t, pub float32x2_t, pub float32x2_t);
+/// ARM-specific type containing four `float32x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct float32x2x4_t(
+    pub float32x2_t,
+    pub float32x2_t,
+    pub float32x2_t,
+    pub float32x2_t,
+);
+
+/// ARM-specific type containing two `float32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct float32x4x2_t(pub float32x4_t, pub float32x4_t);
+/// ARM-specific type containing three `float32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct float32x4x3_t(pub float32x4_t, pub float32x4_t, pub float32x4_t);
+/// ARM-specific type containing four `float32x4_t` vectors.
+#[derive(Copy, Clone)]
+pub struct float32x4x4_t(
+    pub float32x4_t,
+    pub float32x4_t,
+    pub float32x4_t,
+    pub float32x4_t,
+);
+
+/// ARM-specific type containing four `int64x1_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int64x1x2_t(pub int64x1_t, pub int64x1_t);
+/// ARM-specific type containing four `int64x1_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int64x1x3_t(pub int64x1_t, pub int64x1_t, pub int64x1_t);
+/// ARM-specific type containing four `int64x1_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int64x1x4_t(pub int64x1_t, pub int64x1_t, pub int64x1_t, pub int64x1_t);
+
+/// ARM-specific type containing four `int64x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int64x2x2_t(pub int64x2_t, pub int64x2_t);
+/// ARM-specific type containing four `int64x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int64x2x3_t(pub int64x2_t, pub int64x2_t, pub int64x2_t);
+/// ARM-specific type containing four `int64x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct int64x2x4_t(pub int64x2_t, pub int64x2_t, pub int64x2_t, pub int64x2_t);
+
+/// ARM-specific type containing four `uint64x1_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint64x1x2_t(pub uint64x1_t, pub uint64x1_t);
+/// ARM-specific type containing four `uint64x1_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint64x1x3_t(pub uint64x1_t, pub uint64x1_t, pub uint64x1_t);
+/// ARM-specific type containing four `uint64x1_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint64x1x4_t(
+    pub uint64x1_t,
+    pub uint64x1_t,
+    pub uint64x1_t,
+    pub uint64x1_t,
+);
+
+/// ARM-specific type containing four `uint64x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint64x2x2_t(pub uint64x2_t, pub uint64x2_t);
+/// ARM-specific type containing four `uint64x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint64x2x3_t(pub uint64x2_t, pub uint64x2_t, pub uint64x2_t);
+/// ARM-specific type containing four `uint64x2_t` vectors.
+#[derive(Copy, Clone)]
+pub struct uint64x2x4_t(
+    pub uint64x2_t,
+    pub uint64x2_t,
+    pub uint64x2_t,
+    pub uint64x2_t,
+);
+
 #[allow(improper_ctypes)]
 extern "C" {
     // absolute value (64-bit)
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
index 5850be482b..88798c0c75 100644
--- a/crates/stdarch-gen/neon.spec
+++ b/crates/stdarch-gen/neon.spec
@@ -2033,6 +2033,81 @@ aarch64 = sqadd
 link-aarch64 = sqadd._EXT_
 generate i32, i64
 
+/// Load multiple single-element structures to one, two, three, or four registers
+name = vld1
+out-suffix
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
+validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
+test = load_test
+
+aarch64 = ld1
+link-aarch64 = ld1x2._EXT2_
+arm = vld1
+link-arm = vld1x2._EXT2_
+generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
+generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
+
+link-aarch64 = ld1x3._EXT2_
+link-arm = ld1x3._EXT2_
+generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
+generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t, *const i64:int64x2x3_t
+
+link-aarch64 = ld1x4._EXT2_
+link-arm = vldx4._EXT2_
+generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
+generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t, *const i64:int64x2x4_t
+
+/// Load multiple single-element structures to one, two, three, or four registers
+name = vld1
+out-suffix
+multi_fn = transmute, {vld1-outsigned-noext, transmute(a)}
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
+validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
+
+test = load_test
+aarch64 = ld1
+arm = vld1
+generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
+generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
+generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
+generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t, *const u64:uint64x2x3_t
+generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
+generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t, *const u64:uint64x2x4_t
+generate *const p8:poly8x8x2_t, *const p8:poly8x8x3_t, *const p8:poly8x8x4_t
+generate *const p8:poly8x16x2_t, *const p8:poly8x16x3_t, *const p8:poly8x16x4_t
+generate *const p16:poly16x4x2_t, *const p16:poly16x4x3_t, *const p16:poly16x4x4_t
+generate *const p16:poly16x8x2_t, *const p16:poly16x8x3_t, *const p16:poly16x8x4_t
+
+/// Load multiple single-element structures to one, two, three, or four registers
+name = vld1
+out-suffix
+a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
+validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
+test = load_test
+
+aarch64 = ld1
+link-aarch64 = ld1x2._EXT2_
+generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
+
+link-aarch64 = ld1x3._EXT2_
+generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
+
+link-aarch64 = ld1x4._EXT2_
+generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
+
+arm = vld1
+link-aarch64 = ld1x2._EXT2_
+link-arm = vld1x2._EXT2_
+generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
+
+link-aarch64 = ld1x3._EXT2_
+link-arm = vld1x3._EXT2_
+generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
+
+link-aarch64 = ld1x4._EXT2_
+link-arm = vld1x4._EXT2_
+generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
+
 /// Multiply
 name = vmul
 a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs
index 7f506acb77..be8c8ba6bb 100644
--- a/crates/stdarch-gen/src/main.rs
+++ b/crates/stdarch-gen/src/main.rs
@@ -80,12 +80,116 @@ fn type_len(t: &str) -> usize {
         "poly16x8_t" => 8,
         "poly64x1_t" => 1,
         "poly64x2_t" => 2,
+        "int8x8x2_t" => 16,
+        "int8x8x3_t" => 24,
+        "int8x8x4_t" => 32,
+        "int16x4x2_t" => 8,
+        "int16x4x3_t" => 12,
+        "int16x4x4_t" => 16,
+        "int32x2x2_t" => 4,
+        "int32x2x3_t" => 6,
+        "int32x2x4_t" => 8,
+        "int64x1x2_t" => 2,
+        "int64x1x3_t" => 3,
+        "int64x1x4_t" => 4,
+        "uint8x8x2_t" => 16,
+        "uint8x8x3_t" => 24,
+        "uint8x8x4_t" => 32,
+        "uint16x4x2_t" => 8,
+        "uint16x4x3_t" => 12,
+        "uint16x4x4_t" => 16,
+        "uint32x2x2_t" => 4,
+        "uint32x2x3_t" => 6,
+        "uint32x2x4_t" => 8,
+        "uint64x1x2_t" => 2,
+        "uint64x1x3_t" => 3,
+        "uint64x1x4_t" => 4,
+        "poly8x8x2_t" => 16,
+        "poly8x8x3_t" => 24,
+        "poly8x8x4_t" => 32,
+        "poly16x4x2_t" => 8,
+        "poly16x4x3_t" => 12,
+        "poly16x4x4_t" => 16,
+        "poly64x1x2_t" => 2,
+        "poly64x1x3_t" => 3,
+        "poly64x1x4_t" => 4,
+        "float32x2x2_t" => 4,
+        "float32x2x3_t" => 6,
+        "float32x2x4_t" => 8,
+        "float64x1x2_t" => 2,
+        "float64x1x3_t" => 3,
+        "float64x1x4_t" => 4,
+        "int8x16x2_t" => 32,
+        "int8x16x3_t" => 48,
+        "int8x16x4_t" => 64,
+        "int16x8x2_t" => 16,
+        "int16x8x3_t" => 24,
+        "int16x8x4_t" => 32,
+        "int32x4x2_t" => 8,
+        "int32x4x3_t" => 12,
+        "int32x4x4_t" => 16,
+        "int64x2x2_t" => 4,
+        "int64x2x3_t" => 6,
+        "int64x2x4_t" => 8,
+        "uint8x16x2_t" => 32,
+        "uint8x16x3_t" => 48,
+        "uint8x16x4_t" => 64,
+        "uint16x8x2_t" => 16,
+        "uint16x8x3_t" => 24,
+        "uint16x8x4_t" => 32,
+        "uint32x4x2_t" => 8,
+        "uint32x4x3_t" => 12,
+        "uint32x4x4_t" => 16,
+        "uint64x2x2_t" => 4,
+        "uint64x2x3_t" => 6,
+        "uint64x2x4_t" => 8,
+        "poly8x16x2_t" => 32,
+        "poly8x16x3_t" => 48,
+        "poly8x16x4_t" => 64,
+        "poly16x8x2_t" => 16,
+        "poly16x8x3_t" => 24,
+        "poly16x8x4_t" => 32,
+        "poly64x2x2_t" => 4,
+        "poly64x2x3_t" => 6,
+        "poly64x2x4_t" => 8,
+        "float32x4x2_t" => 8,
+        "float32x4x3_t" => 12,
+        "float32x4x4_t" => 16,
+        "float64x2x2_t" => 4,
+        "float64x2x3_t" => 6,
+        "float64x2x4_t" => 8,
         "i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "p8"
         | "p16" | "p64" | "p128" => 1,
         _ => panic!("unknown type: {}", t),
     }
 }
 
+fn type_sub_len(t: &str) -> usize {
+    match t {
+        "int8x8_t" | "int16x4_t" | "int32x2_t" | "int64x1_t" | "uint8x8_t" | "uint16x4_t"
+        | "uint32x2_t" | "uint64x1_t" => 1,
+        "int8x8x2_t" | "int16x4x2_t" | "int32x2x2_t" | "int64x1x2_t" | "uint8x8x2_t"
+        | "uint16x4x2_t" | "uint32x2x2_t" | "uint64x1x2_t" | "poly8x8x2_t" | "poly16x4x2_t"
+        | "poly64x1x2_t" | "float32x2x2_t" | "float64x1x2_t" | "int8x16x2_t" | "int16x8x2_t"
+        | "int32x4x2_t" | "int64x2x2_t" | "uint8x16x2_t" | "uint16x8x2_t" | "uint32x4x2_t"
+        | "uint64x2x2_t" | "poly8x16x2_t" | "poly16x8x2_t" | "poly64x2x2_t" | "float32x4x2_t"
+        | "float64x2x2_t" => 2,
+        "int8x8x3_t" | "int16x4x3_t" | "int32x2x3_t" | "int64x1x3_t" | "uint8x8x3_t"
+        | "uint16x4x3_t" | "uint32x2x3_t" | "uint64x1x3_t" | "poly8x8x3_t" | "poly16x4x3_t"
+        | "poly64x1x3_t" | "float32x2x3_t" | "float64x1x3_t" | "int8x16x3_t" | "int16x8x3_t"
+        | "int32x4x3_t" | "int64x2x3_t" | "uint8x16x3_t" | "uint16x8x3_t" | "uint32x4x3_t"
+        | "uint64x2x3_t" | "poly8x16x3_t" | "poly16x8x3_t" | "poly64x2x3_t" | "float32x4x3_t"
+        | "float64x2x3_t" => 3,
+        "int8x8x4_t" | "int16x4x4_t" | "int32x2x4_t" | "int64x1x4_t" | "uint8x8x4_t"
+        | "uint16x4x4_t" | "uint32x2x4_t" | "uint64x1x4_t" | "poly8x8x4_t" | "poly16x4x4_t"
+        | "poly64x1x4_t" | "float32x2x4_t" | "float64x1x4_t" | "int8x16x4_t" | "int16x8x4_t"
+        | "int32x4x4_t" | "int64x2x4_t" | "uint8x16x4_t" | "uint16x8x4_t" | "uint32x4x4_t"
+        | "uint64x2x4_t" | "poly8x16x4_t" | "poly16x8x4_t" | "poly64x2x4_t" | "float32x4x4_t"
+        | "float64x2x4_t" => 4,
+        _ => panic!("unknown type: {}", t),
+    }
+}
+
 fn type_bits(t: &str) -> usize {
     match t {
         "int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t"
@@ -177,6 +281,84 @@ fn type_to_suffix(t: &str) -> &str {
         "poly16x8_t" => "q_p16",
         "poly64x1_t" => "_p64",
         "poly64x2_t" => "q_p64",
+        "int8x8x2_t" => "_s8_x2",
+        "int8x8x3_t" => "_s8_x3",
+        "int8x8x4_t" => "_s8_x4",
+        "int16x4x2_t" => "_s16_x2",
+        "int16x4x3_t" => "_s16_x3",
+        "int16x4x4_t" => "_s16_x4",
+        "int32x2x2_t" => "_s32_x2",
+        "int32x2x3_t" => "_s32_x3",
+        "int32x2x4_t" => "_s32_x4",
+        "int64x1x2_t" => "_s64_x2",
+        "int64x1x3_t" => "_s64_x3",
+        "int64x1x4_t" => "_s64_x4",
+        "uint8x8x2_t" => "_u8_x2",
+        "uint8x8x3_t" => "_u8_x3",
+        "uint8x8x4_t" => "_u8_x4",
+        "uint16x4x2_t" => "_u16_x2",
+        "uint16x4x3_t" => "_u16_x3",
+        "uint16x4x4_t" => "_u16_x4",
+        "uint32x2x2_t" => "_u32_x2",
+        "uint32x2x3_t" => "_u32_x3",
+        "uint32x2x4_t" => "_u32_x4",
+        "uint64x1x2_t" => "_u64_x2",
+        "uint64x1x3_t" => "_u64_x3",
+        "uint64x1x4_t" => "_u64_x4",
+        "poly8x8x2_t" => "_p8_x2",
+        "poly8x8x3_t" => "_p8_x3",
+        "poly8x8x4_t" => "_p8_x4",
+        "poly16x4x2_t" => "_p16_x2",
+        "poly16x4x3_t" => "_p16_x3",
+        "poly16x4x4_t" => "_p16_x4",
+        "poly64x1x2_t" => "_p64_x2",
+        "poly64x1x3_t" => "_p64_x3",
+        "poly64x1x4_t" => "_p64_x4",
+        "float32x2x2_t" => "_f32_x2",
+        "float32x2x3_t" => "_f32_x3",
+        "float32x2x4_t" => "_f32_x4",
+        "float64x1x2_t" => "_f64_x2",
+        "float64x1x3_t" => "_f64_x3",
+        "float64x1x4_t" => "_f64_x4",
+        "int8x16x2_t" => "q_s8_x2",
+        "int8x16x3_t" => "q_s8_x3",
+        "int8x16x4_t" => "q_s8_x4",
+        "int16x8x2_t" => "q_s16_x2",
+        "int16x8x3_t" => "q_s16_x3",
+        "int16x8x4_t" => "q_s16_x4",
+        "int32x4x2_t" => "q_s32_x2",
+        "int32x4x3_t" => "q_s32_x3",
+        "int32x4x4_t" => "q_s32_x4",
+        "int64x2x2_t" => "q_s64_x2",
+        "int64x2x3_t" => "q_s64_x3",
+        "int64x2x4_t" => "q_s64_x4",
+        "uint8x16x2_t" => "q_u8_x2",
+        "uint8x16x3_t" => "q_u8_x3",
+        "uint8x16x4_t" => "q_u8_x4",
+        "uint16x8x2_t" => "q_u16_x2",
+        "uint16x8x3_t" => "q_u16_x3",
+        "uint16x8x4_t" => "q_u16_x4",
+        "uint32x4x2_t" => "q_u32_x2",
+        "uint32x4x3_t" => "q_u32_x3",
+        "uint32x4x4_t" => "q_u32_x4",
+        "uint64x2x2_t" => "q_u64_x2",
+        "uint64x2x3_t" => "q_u64_x3",
+        "uint64x2x4_t" => "q_u64_x4",
+        "poly8x16x2_t" => "q_p8_x2",
+        "poly8x16x3_t" => "q_p8_x3",
+        "poly8x16x4_t" => "q_p8_x4",
+        "poly16x8x2_t" => "q_p16_x2",
+        "poly16x8x3_t" => "q_p16_x3",
+        "poly16x8x4_t" => "q_p16_x4",
+        "poly64x2x2_t" => "q_p64_x2",
+        "poly64x2x3_t" => "q_p64_x3",
+        "poly64x2x4_t" => "q_p64_x4",
+        "float32x4x2_t" => "q_f32_x2",
+        "float32x4x3_t" => "q_f32_x3",
+        "float32x4x4_t" => "q_f32_x4",
+        "float64x2x2_t" => "q_f64_x2",
+        "float64x2x3_t" => "q_f64_x3",
+        "float64x2x4_t" => "q_f64_x4",
         "i8" => "b_s8",
         "i16" => "h_s16",
         "i32" => "s_s32",
@@ -274,18 +456,10 @@ fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
     str
 }
 
-fn type_to_signed(t: &str) -> &str {
-    match t {
-        "int8x8_t" | "uint8x8_t" | "poly8x8_t" => "int8x8_t",
-        "int8x16_t" | "uint8x16_t" | "poly8x16_t" => "int8x16_t",
-        "int16x4_t" | "uint16x4_t" | "poly16x4_t" => "int16x4_t",
-        "int16x8_t" | "uint16x8_t" | "poly16x8_t" => "int16x8_t",
-        "int32x2_t" | "uint32x2_t" => "int32x2_t",
-        "int32x4_t" | "uint32x4_t" => "int32x4_t",
-        "int64x1_t" | "uint64x1_t" | "poly64x1_t" => "int64x1_t",
-        "int64x2_t" | "uint64x2_t" | "poly64x2_t" => "int64x2_t",
-        _ => panic!("unknown type: {}", t),
-    }
+fn type_to_signed(t: &String) -> String {
+    let s = t.replace("uint", "int");
+    let s = s.replace("poly", "int");
+    s
 }
 
 fn type_to_unsigned(t: &str) -> &str {
@@ -384,34 +558,34 @@ enum TargetFeature {
 
 fn type_to_global_type(t: &str) -> &str {
     match t {
-        "int8x8_t" => "i8x8",
-        "int8x16_t" => "i8x16",
-        "int16x4_t" => "i16x4",
-        "int16x8_t" => "i16x8",
-        "int32x2_t" => "i32x2",
-        "int32x4_t" => "i32x4",
-        "int64x1_t" => "i64x1",
-        "int64x2_t" => "i64x2",
-        "uint8x8_t" => "u8x8",
-        "uint8x16_t" => "u8x16",
-        "uint16x4_t" => "u16x4",
-        "uint16x8_t" => "u16x8",
-        "uint32x2_t" => "u32x2",
-        "uint32x4_t" => "u32x4",
-        "uint64x1_t" => "u64x1",
-        "uint64x2_t" => "u64x2",
+        "int8x8_t" | "int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t" => "i8x8",
+        "int8x16_t" | "int8x16x2_t" | "int8x16x3_t" | "int8x16x4_t" => "i8x16",
+        "int16x4_t" | "int16x4x2_t" | "int16x4x3_t" | "int16x4x4_t" => "i16x4",
+        "int16x8_t" | "int16x8x2_t" | "int16x8x3_t" | "int16x8x4_t" => "i16x8",
+        "int32x2_t" | "int32x2x2_t" | "int32x2x3_t" | "int32x2x4_t" => "i32x2",
+        "int32x4_t" | "int32x4x2_t" | "int32x4x3_t" | "int32x4x4_t" => "i32x4",
+        "int64x1_t" | "int64x1x2_t" | "int64x1x3_t" | "int64x1x4_t" => "i64x1",
+        "int64x2_t" | "int64x2x2_t" | "int64x2x3_t" | "int64x2x4_t" => "i64x2",
+        "uint8x8_t" | "uint8x8x2_t" | "uint8x8x3_t" | "uint8x8x4_t" => "u8x8",
+        "uint8x16_t" | "uint8x16x2_t" | "uint8x16x3_t" | "uint8x16x4_t" => "u8x16",
+        "uint16x4_t" | "uint16x4x2_t" | "uint16x4x3_t" | "uint16x4x4_t" => "u16x4",
+        "uint16x8_t" | "uint16x8x2_t" | "uint16x8x3_t" | "uint16x8x4_t" => "u16x8",
+        "uint32x2_t" | "uint32x2x2_t" | "uint32x2x3_t" | "uint32x2x4_t" => "u32x2",
+        "uint32x4_t" | "uint32x4x2_t" | "uint32x4x3_t" | "uint32x4x4_t" => "u32x4",
+        "uint64x1_t" | "uint64x1x2_t" | "uint64x1x3_t" | "uint64x1x4_t" => "u64x1",
+        "uint64x2_t" | "uint64x2x2_t" | "uint64x2x3_t" | "uint64x2x4_t" => "u64x2",
         "float16x4_t" => "f16x4",
         "float16x8_t" => "f16x8",
-        "float32x2_t" => "f32x2",
-        "float32x4_t" => "f32x4",
-        "float64x1_t" => "f64",
-        "float64x2_t" => "f64x2",
-        "poly8x8_t" => "i8x8",
-        "poly8x16_t" => "i8x16",
-        "poly16x4_t" => "i16x4",
-        "poly16x8_t" => "i16x8",
-        "poly64x1_t" => "i64x1",
-        "poly64x2_t" => "i64x2",
+        "float32x2_t" | "float32x2x2_t" | "float32x2x3_t" | "float32x2x4_t" => "f32x2",
+        "float32x4_t" | "float32x4x2_t" | "float32x4x3_t" | "float32x4x4_t" => "f32x4",
+        "float64x1_t" | "float64x1x2_t" | "float64x1x3_t" | "float64x1x4_t" => "f64",
+        "float64x2_t" | "float64x2x2_t" | "float64x2x3_t" | "float64x2x4_t" => "f64x2",
+        "poly8x8_t" | "poly8x8x2_t" | "poly8x8x3_t" | "poly8x8x4_t" => "i8x8",
+        "poly8x16_t" | "poly8x16x2_t" | "poly8x16x3_t" | "poly8x16x4_t" => "i8x16",
+        "poly16x4_t" | "poly16x4x2_t" | "poly16x4x3_t" | "poly16x4x4_t" => "i16x4",
+        "poly16x8_t" | "poly16x8x2_t" | "poly16x8x3_t" | "poly16x8x4_t" => "i16x8",
+        "poly64x1_t" | "poly64x1x2_t" | "poly64x1x3_t" | "poly64x1x4_t" => "i64x1",
+        "poly64x2_t" | "poly64x2x2_t" | "poly64x2x3_t" | "poly64x2x4_t" => "i64x2",
         "i8" => "i8",
         "i16" => "i16",
         "i32" => "i32",
@@ -432,18 +606,33 @@ fn type_to_global_type(t: &str) -> &str {
 
 fn type_to_native_type(t: &str) -> &str {
     match t {
-        "int8x8_t" | "int8x16_t" | "i8" => "i8",
-        "int16x4_t" | "int16x8_t" | "i16" => "i16",
-        "int32x2_t" | "int32x4_t" | "i32" => "i32",
-        "int64x1_t" | "int64x2_t" | "i64" => "i64",
-        "uint8x8_t" | "uint8x16_t" | "u8" => "u8",
-        "uint16x4_t" | "uint16x8_t" | "u16" => "u16",
-        "uint32x2_t" | "uint32x4_t" | "u32" => "u32",
-        "uint64x1_t" | "uint64x2_t" | "u64" => "u64",
+        "int8x8_t" | "int8x16_t" | "i8" | "int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t"
+        | "int8x16x2_t" | "int8x16x3_t" | "int8x16x4_t" => "i8",
+        "int16x4_t" | "int16x8_t" | "i16" | "int16x4x2_t" | "int16x4x3_t" | "int16x4x4_t"
+        | "int16x8x2_t" | "int16x8x3_t" | "int16x8x4_t" => "i16",
+        "int32x2_t" | "int32x4_t" | "i32" | "int32x2x2_t" | "int32x2x3_t" | "int32x2x4_t"
+        | "int32x4x2_t" | "int32x4x3_t" | "int32x4x4_t" => "i32",
+        "int64x1_t" | "int64x2_t" | "i64" | "int64x1x2_t" | "int64x1x3_t" | "int64x1x4_t"
+        | "int64x2x2_t" | "int64x2x3_t" | "int64x2x4_t" => "i64",
+        "uint8x8_t" | "uint8x16_t" | "u8" | "uint8x8x2_t" | "uint8x8x3_t" | "uint8x8x4_t"
+        | "uint8x16x2_t" | "uint8x16x3_t" | "uint8x16x4_t" => "u8",
+        "uint16x4_t" | "uint16x8_t" | "u16" | "uint16x4x2_t" | "uint16x4x3_t" | "uint16x4x4_t"
+        | "uint16x8x2_t" | "uint16x8x3_t" | "uint16x8x4_t" => "u16",
+        "uint32x2_t" | "uint32x4_t" | "u32" | "uint32x2x2_t" | "uint32x2x3_t" | "uint32x2x4_t"
+        | "uint32x4x2_t" | "uint32x4x3_t" | "uint32x4x4_t" => "u32",
+        "uint64x1_t" | "uint64x2_t" | "u64" | "uint64x1x2_t" | "uint64x1x3_t" | "uint64x1x4_t"
+        | "uint64x2x2_t" | "uint64x2x3_t" | "uint64x2x4_t" => "u64",
         "float16x4_t" | "float16x8_t" => "f16",
-        "float32x2_t" | "float32x4_t" => "f32",
-        "float64x1_t" | "float64x2_t" => "f64",
-        "poly64x1_t" | "poly64x2_t" => "u64",
+        "float32x2_t" | "float32x4_t" | "float32x2x2_t" | "float32x2x3_t" | "float32x2x4_t"
+        | "float32x4x2_t" | "float32x4x3_t" | "float32x4x4_t" => "f32",
+        "float64x1_t" | "float64x2_t" | "float64x1x2_t" | "float64x1x3_t" | "float64x1x4_t"
+        | "float64x2x2_t" | "float64x2x3_t" | "float64x2x4_t" => "f64",
+        "poly8x8_t" | "poly8x16_t" | "poly8x8x2_t" | "poly8x8x3_t" | "poly8x8x4_t"
+        | "poly8x16x2_t" | "poly8x16x3_t" | "poly8x16x4_t" => "u8",
+        "poly16x4_t" | "poly16x8_t" | "poly16x4x2_t" | "poly16x4x3_t" | "poly16x4x4_t"
+        | "poly16x8x2_t" | "poly16x8x3_t" | "poly16x8x4_t" => "u16",
+        "poly64x1_t" | "poly64x2_t" | "poly64x1x2_t" | "poly64x1x3_t" | "poly64x1x4_t"
+        | "poly64x2x2_t" | "poly64x2x3_t" | "poly64x2x4_t" => "u64",
         _ => panic!("unknown type: {}", t),
     }
 }
@@ -510,6 +699,26 @@ fn type_to_ext(t: &str) -> &str {
         "poly8x16_t" => "v16i8",
         "poly16x4_t" => "v4i16",
         "poly16x8_t" => "v8i16",
+        "int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t" => "v8i8.p0i8",
+        "int16x4x2_t" | "int16x4x3_t" | "int16x4x4_t" => "v4i16.p0i16",
+        "int32x2x2_t" | "int32x2x3_t" | "int32x2x4_t" => "v2i32.p0i32",
+        "int64x1x2_t" | "int64x1x3_t" | "int64x1x4_t" => "v1i64.p0i64",
+        "uint8x8x2_t" | "uint8x8x3_t" | "uint8x8x4_t" => "v8i8.p0i8",
+        "uint16x4x2_t" | "uint16x4x3_t" | "uint16x4x4_t" => "v4i16.p0i16",
+        "uint32x2x2_t" | "uint32x2x3_t" | "uint32x2x4_t" => "v2i32.p0i32",
+        "uint64x1x2_t" | "uint64x1x3_t" | "uint64x1x4_t" => "v1i64.p0i64",
+        "float32x2x2_t" | "float32x2x3_t" | "float32x2x4_t" => "v2f32.p0f32",
+        "float64x1x2_t" | "float64x1x3_t" | "float64x1x4_t" => "v1f64.p0f64",
+        "int8x16x2_t" | "int8x16x3_t" | "int8x16x4_t" => "v16i8.p0i8",
+        "int16x8x2_t" | "int16x8x3_t" | "int16x8x4_t" => "v8i16.p0i16",
+        "int32x4x2_t" | "int32x4x3_t" | "int32x4x4_t" => "v4i32.p0i32",
+        "int64x2x2_t" | "int64x2x3_t" | "int64x2x4_t" => "v2i64.p0i64",
+        "uint8x16x2_t" | "uint8x16x3_t" | "uint8x16x4_t" => "v16i8.p0i8",
+        "uint16x8x2_t" | "uint16x8x3_t" | "uint16x8x4_t" => "v8i16.p0i16",
+        "uint32x4x2_t" | "uint32x4x3_t" | "uint32x4x4_t" => "v4i32.p0i32",
+        "uint64x2x2_t" | "uint64x2x3_t" | "uint64x2x4_t" => "v2i64.p0i64",
+        "float32x4x2_t" | "float32x4x3_t" | "float32x4x4_t" => "v4f32.p0f32",
+        "float64x2x2_t" | "float64x2x3_t" | "float64x2x4_t" => "v2f64.p0f64",
         "i8" => "i8",
         "i16" => "i16",
         "i32" => "i32",
@@ -522,6 +731,16 @@ fn type_to_ext(t: &str) -> &str {
         "f64" => "f64",
         "p64" => "p64",
         "p128" => "p128",
+        "*const i8" => "i8",
+        "*const i16" => "i16",
+        "*const i32" => "i32",
+        "*const i64" => "i64",
+        "*const u8" => "i8",
+        "*const u16" => "i16",
+        "*const u32" => "i32",
+        "*const u64" => "i64",
+        "*const f32" => "f32",
+        "*const f64" => "f64",
         /*
         "poly64x1_t" => "i64x1",
         "poly64x2_t" => "i64x2",
@@ -858,9 +1077,8 @@ fn gen_aarch64(
     target: TargetFeature,
     fixed: &Vec<String>,
     multi_fn: &Vec<String>,
+    test_fn: &str,
 ) -> (String, String) {
-    let _global_t = type_to_global_type(in_t[0]);
-    let _global_ret_t = type_to_global_type(out_t);
     let name = match suffix {
         Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])),
         NoQ => format!("{}{}", current_name, type_to_noq_suffix(in_t[1])),
@@ -939,13 +1157,19 @@ fn gen_aarch64(
             link.push_str(&link_aarch64);
             link.replace("_EXT_", ext).replace("_EXT2_", ext2)
         };
+        let abi = if test_fn != "normal" {
+            "unadjusted"
+        } else {
+            "C"
+        };
         ext_c = format!(
             r#"#[allow(improper_ctypes)]
-    extern "C" {{
+    extern "{}" {{
         #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
         fn {}({}) -> {};
     }}
     "#,
+            abi,
             link_aarch64,
             current_fn,
             match para_num {
@@ -1162,17 +1386,94 @@ fn gen_aarch64(
         current_comment, current_target, current_aarch64, const_assert, const_legacy, call
     );
 
-    let test = gen_test(
-        &name,
-        in_t,
-        &out_t,
-        current_tests,
-        [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
-        type_len(out_t),
-        para_num,
-    );
+    let test = if test_fn == "load_test" {
+        gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t))
+    } else {
+        gen_test(
+            &name,
+            in_t,
+            &out_t,
+            current_tests,
+            [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
+            type_len(out_t),
+            para_num,
+        )
+    };
     (function, test)
 }
+fn gen_load_test(
+    name: &str,
+    _in_t: &[&str; 3],
+    out_t: &str,
+    current_tests: &[(
+        Vec<String>,
+        Vec<String>,
+        Vec<String>,
+        Option<String>,
+        Vec<String>,
+    )],
+    len_out: usize,
+) -> String {
+    let mut test = format!(
+        r#"
+    #[simd_test(enable = "neon")]
+    unsafe fn test_{}() {{"#,
+        name,
+    );
+    for (a, _, _, _, e) in current_tests {
+        let a: Vec<String> = a.iter().take(len_out + 1).cloned().collect();
+        let e: Vec<String> = e.iter().take(len_out).cloned().collect();
+        let mut input = String::from("[");
+        for i in 0..type_len(out_t) + 1 {
+            if i != 0 {
+                input.push_str(", ");
+            }
+            input.push_str(&a[i])
+        }
+        input.push_str("]");
+        let mut output = String::from("[");
+        for i in 0..type_sub_len(out_t) {
+            if i != 0 {
+                output.push_str(", ");
+            }
+            let sub_len = type_len(out_t) / type_sub_len(out_t);
+            if type_to_global_type(out_t) != "f64" {
+                let mut sub_output = format!("{}::new(", type_to_global_type(out_t));
+                for j in 0..sub_len {
+                    if j != 0 {
+                        sub_output.push_str(", ");
+                    }
+                    sub_output.push_str(&e[i * sub_len + j]);
+                }
+                sub_output.push_str(")");
+                output.push_str(&sub_output);
+            } else {
+                output.push_str(&e[i]);
+            }
+        }
+        output.push_str("]");
+        let t = format!(
+            r#"
+        let a: [{}; {}] = {};
+        let e: [{}; {}] = {};
+        let r: [{}; {}] = transmute({}(a[1..].as_ptr()));
+        assert_eq!(r, e);
+"#,
+            type_to_native_type(out_t),
+            type_len(out_t) + 1,
+            input,
+            type_to_global_type(out_t),
+            type_sub_len(out_t),
+            output,
+            type_to_global_type(out_t),
+            type_sub_len(out_t),
+            name,
+        );
+        test.push_str(&t);
+    }
+    test.push_str("    }\n");
+    test
+}
 
 fn gen_test(
     name: &str,
@@ -1305,9 +1606,8 @@ fn gen_arm(
     target: TargetFeature,
     fixed: &Vec<String>,
     multi_fn: &Vec<String>,
+    test_fn: &str,
 ) -> (String, String) {
-    let _global_t = type_to_global_type(in_t[0]);
-    let _global_ret_t = type_to_global_type(out_t);
     let name = match suffix {
         Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])),
         NoQ => format!("{}{}", current_name, type_to_noq_suffix(in_t[1])),
@@ -1437,15 +1737,21 @@ fn gen_arm(
             link.push_str(&link_aarch64);
             link.replace("_EXT_", ext).replace("_EXT2_", ext2)
         };
+        let abi = if test_fn == "load_test" {
+            "unadjusted"
+        } else {
+            "C"
+        };
         if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] {
             ext_c = format!(
                 r#"#[allow(improper_ctypes)]
-    extern "C" {{
+    extern "{}" {{
         #[cfg_attr(target_arch = "arm", link_name = "{}")]
         #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
         fn {}({}) -> {};
     }}
 "#,
+                abi,
                 link_arm,
                 link_aarch64,
                 current_fn,
@@ -1880,15 +2186,19 @@ fn gen_arm(
             call,
         )
     };
-    let test = gen_test(
-        &name,
-        in_t,
-        &out_t,
-        current_tests,
-        [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
-        type_len(out_t),
-        para_num,
-    );
+    let test = if test_fn == "load_test" {
+        gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t))
+    } else {
+        gen_test(
+            &name,
+            in_t,
+            &out_t,
+            current_tests,
+            [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
+            type_len(out_t),
+            para_num,
+        )
+    };
 
     (function, test)
 }
@@ -2305,7 +2615,9 @@ fn get_call(
         } else if fn_format[1] == "in2lane" {
             fn_name.push_str(&type_to_lane_suffixes(out_t, in_t[2]));
         } else if fn_format[1] == "signed" {
-            fn_name.push_str(type_to_suffix(type_to_signed(in_t[1])));
+            fn_name.push_str(type_to_suffix(&type_to_signed(&String::from(in_t[1]))));
+        } else if fn_format[1] == "outsigned" {
+            fn_name.push_str(type_to_suffix(&type_to_signed(&String::from(out_t))));
         } else if fn_format[1] == "unsigned" {
             fn_name.push_str(type_to_suffix(type_to_unsigned(in_t[1])));
         } else if fn_format[1] == "doubleself" {
@@ -2315,7 +2627,7 @@ fn get_call(
         } else if fn_format[1] == "noqself" {
             fn_name.push_str(type_to_noq_suffix(in_t[1]));
         } else if fn_format[1] == "noqsigned" {
-            fn_name.push_str(type_to_noq_suffix(type_to_signed(in_t[1])));
+            fn_name.push_str(type_to_noq_suffix(&type_to_signed(&String::from(in_t[1]))));
         } else if fn_format[1] == "nosuffix" {
         } else if fn_format[1] == "in_len" {
             fn_name.push_str(&type_len(in_t[1]).to_string());
@@ -2330,7 +2642,7 @@ fn get_call(
         } else if fn_format[1] == "nin0" {
             fn_name.push_str(type_to_n_suffix(in_t[0]));
         } else if fn_format[1] == "nsigned" {
-            fn_name.push_str(type_to_n_suffix(type_to_signed(in_t[1])));
+            fn_name.push_str(type_to_n_suffix(&type_to_signed(&String::from(in_t[1]))));
         } else if fn_format[1] == "in_ntt" {
             fn_name.push_str(type_to_suffix(native_type_to_type(in_t[1])));
         } else if fn_format[1] == "out_ntt" {
@@ -2410,6 +2722,7 @@ fn main() -> io::Result<()> {
     )> = Vec::new();
     let mut multi_fn: Vec<String> = Vec::new();
     let mut target: TargetFeature = Default;
+    let mut test_fn = "normal";
 
     //
     // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
@@ -2491,6 +2804,7 @@ mod test {
             n = None;
             multi_fn = Vec::new();
             target = Default;
+            test_fn = "normal";
         } else if line.starts_with("//") {
         } else if line.starts_with("name = ") {
             current_name = Some(String::from(&line[7..]));
@@ -2547,6 +2861,14 @@ mod test {
             link_arm = Some(String::from(&line[11..]));
         } else if line.starts_with("const-arm = ") {
             const_arm = Some(String::from(&line[12..]));
+        } else if line.starts_with("test = ") {
+            test_fn = if line.contains("load_test") {
+                "load_test"
+            } else if line.contains("store_test") {
+                "store_test"
+            } else {
+                "normal"
+            }
         } else if line.starts_with("target = ") {
             target = match Some(String::from(&line[9..])) {
                 Some(input) => match input.as_str() {
@@ -2618,6 +2940,7 @@ mod test {
                         target,
                         &fixed,
                         &multi_fn,
+                        test_fn,
                     );
                     out_arm.push_str(&function);
                     tests_arm.push_str(&test);
@@ -2638,6 +2961,7 @@ mod test {
                         target,
                         &fixed,
                         &multi_fn,
+                        test_fn,
                     );
                     out_aarch64.push_str(&function);
                     tests_aarch64.push_str(&test);
diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs
index e85f0489a8..5836949122 100644
--- a/crates/stdarch-verify/src/lib.rs
+++ b/crates/stdarch-verify/src/lib.rs
@@ -218,11 +218,29 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "int8x16_t" => quote! { &I8X16 },
             "int16x2_t" => quote! { &I16X2 },
             "int16x4_t" => quote! { &I16X4 },
+            "int16x4x2_t" => quote! { &I16X4X2 },
+            "int16x4x3_t" => quote! { &I16x4x3 },
+            "int16x4x4_t" => quote! { &I16x4x4 },
             "int16x8_t" => quote! { &I16X8 },
+            "int16x8x2_t" => quote! { &I16X8X2 },
+            "int16x8x3_t" => quote! { &I16x8x3 },
+            "int16x8x4_t" => quote! { &I16x8x4 },
             "int32x2_t" => quote! { &I32X2 },
+            "int32x2x2_t" => quote! { &I32X2X2 },
+            "int32x2x3_t" => quote! { &I32X2X3 },
+            "int32x2x4_t" => quote! { &I32X2X4 },
             "int32x4_t" => quote! { &I32X4 },
+            "int32x4x2_t" => quote! { &I32X4X2 },
+            "int32x4x3_t" => quote! { &I32X4X3 },
+            "int32x4x4_t" => quote! { &I32X4X4 },
             "int64x1_t" => quote! { &I64X1 },
+            "int64x1x2_t" => quote! { &I64X1X2 },
+            "int64x1x3_t" => quote! { &I64X1X3 },
+            "int64x1x4_t" => quote! { &I64X1X4 },
             "int64x2_t" => quote! { &I64X2 },
+            "int64x2x2_t" => quote! { &I64X2X2 },
+            "int64x2x3_t" => quote! { &I64X2X3 },
+            "int64x2x4_t" => quote! { &I64X2X4 },
             "uint8x8_t" => quote! { &U8X8 },
             "uint8x4_t" => quote! { &U8X4 },
             "uint8x8x2_t" => quote! { &U8X8X2 },
@@ -233,15 +251,45 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "uint8x8x4_t" => quote! { &U8X8X4 },
             "uint8x16_t" => quote! { &U8X16 },
             "uint16x4_t" => quote! { &U16X4 },
+            "uint16x4x2_t" => quote! { &U16X4X2 },
+            "uint16x4x3_t" => quote! { &U16x4x3 },
+            "uint16x4x4_t" => quote! { &U16x4x4 },
             "uint16x8_t" => quote! { &U16X8 },
+            "uint16x8x2_t" => quote! { &U16X8X2 },
+            "uint16x8x3_t" => quote! { &U16x8x3 },
+            "uint16x8x4_t" => quote! { &U16x8x4 },
             "uint32x2_t" => quote! { &U32X2 },
+            "uint32x2x2_t" => quote! { &U32X2X2 },
+            "uint32x2x3_t" => quote! { &U32X2X3 },
+            "uint32x2x4_t" => quote! { &U32X2X4 },
             "uint32x4_t" => quote! { &U32X4 },
+            "uint32x4x2_t" => quote! { &U32X4X2 },
+            "uint32x4x3_t" => quote! { &U32X4X3 },
+            "uint32x4x4_t" => quote! { &U32X4X4 },
             "uint64x1_t" => quote! { &U64X1 },
+            "uint64x1x2_t" => quote! { &U64X1X2 },
+            "uint64x1x3_t" => quote! { &U64X1X3 },
+            "uint64x1x4_t" => quote! { &U64X1X4 },
             "uint64x2_t" => quote! { &U64X2 },
+            "uint64x2x2_t" => quote! { &U64X2X2 },
+            "uint64x2x3_t" => quote! { &U64X2X3 },
+            "uint64x2x4_t" => quote! { &U64X2X4 },
             "float32x2_t" => quote! { &F32X2 },
+            "float32x2x2_t" => quote! { &F32X2X2 },
+            "float32x2x3_t" => quote! { &F32X2X3 },
+            "float32x2x4_t" => quote! { &F32X2X4 },
             "float32x4_t" => quote! { &F32X4 },
+            "float32x4x2_t" => quote! { &F32X4X2 },
+            "float32x4x3_t" => quote! { &F32X4X3 },
+            "float32x4x4_t" => quote! { &F32X4X4 },
             "float64x1_t" => quote! { &F64X1 },
+            "float64x1x2_t" => quote! { &F64X1X2 },
+            "float64x1x3_t" => quote! { &F64X1X3 },
+            "float64x1x4_t" => quote! { &F64X1X4 },
             "float64x2_t" => quote! { &F64X2 },
+            "float64x2x2_t" => quote! { &F64X2X2 },
+            "float64x2x3_t" => quote! { &F64X2X3 },
+            "float64x2x4_t" => quote! { &F64X2X4 },
             "poly8x8_t" => quote! { &POLY8X8 },
             "poly8x8x2_t" => quote! { &POLY8X8X2 },
             "poly8x8x3_t" => quote! { &POLY8X8X3 },
@@ -254,7 +302,13 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "poly64x2_t" => quote! { &POLY64X2 },
             "poly8x16_t" => quote! { &POLY8X16 },
             "poly16x4_t" => quote! { &POLY16X4 },
+            "poly16x4x2_t" => quote! { &POLY16X4X2 },
+            "poly16x4x3_t" => quote! { &POLY16X4X3 },
+            "poly16x4x4_t" => quote! { &POLY16X4X4 },
             "poly16x8_t" => quote! { &POLY16X8 },
+            "poly16x8x2_t" => quote! { &POLY16X8X2 },
+            "poly16x8x3_t" => quote! { &POLY16X8X3 },
+            "poly16x8x4_t" => quote! { &POLY16X8X4 },
             "p128" => quote! { &P128 },
 
             "v16i8" => quote! { &v16i8 },

From bf836e59f55830ef54b52fc60b04ba0695a184be Mon Sep 17 00:00:00 2001
From: SparrowLii <liyuan179@huawei.com>
Date: Fri, 20 Aug 2021 18:06:11 +0800
Subject: [PATCH 2/7] correct llvm link name

---
 .../src/arm_shared/neon/generated.rs          | 32 +++++++++----------
 crates/stdarch-gen/neon.spec                  |  4 +--
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index 28bf79bd87..dae5eb8630 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -5995,7 +5995,7 @@ vld1q_s64_x2_(a)
 pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v8i8.p0i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8")]
         fn vld1_s8_x3_(a: *const i8) -> int8x8x3_t;
     }
@@ -6011,7 +6011,7 @@ vld1_s8_x3_(a)
 pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v4i16.p0i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16")]
         fn vld1_s16_x3_(a: *const i16) -> int16x4x3_t;
     }
@@ -6027,7 +6027,7 @@ vld1_s16_x3_(a)
 pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v2i32.p0i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32")]
         fn vld1_s32_x3_(a: *const i32) -> int32x2x3_t;
     }
@@ -6043,7 +6043,7 @@ vld1_s32_x3_(a)
 pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v1i64.p0i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64")]
         fn vld1_s64_x3_(a: *const i64) -> int64x1x3_t;
     }
@@ -6059,7 +6059,7 @@ vld1_s64_x3_(a)
 pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v16i8.p0i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8")]
         fn vld1q_s8_x3_(a: *const i8) -> int8x16x3_t;
     }
@@ -6075,7 +6075,7 @@ vld1q_s8_x3_(a)
 pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v8i16.p0i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16")]
         fn vld1q_s16_x3_(a: *const i16) -> int16x8x3_t;
     }
@@ -6091,7 +6091,7 @@ vld1q_s16_x3_(a)
 pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v4i32.p0i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32")]
         fn vld1q_s32_x3_(a: *const i32) -> int32x4x3_t;
     }
@@ -6107,7 +6107,7 @@ vld1q_s32_x3_(a)
 pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ld1x3.v2i64.p0i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64")]
         fn vld1q_s64_x3_(a: *const i64) -> int64x2x3_t;
     }
@@ -6123,7 +6123,7 @@ vld1q_s64_x3_(a)
 pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v8i8.p0i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8")]
         fn vld1_s8_x4_(a: *const i8) -> int8x8x4_t;
     }
@@ -6139,7 +6139,7 @@ vld1_s8_x4_(a)
 pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v4i16.p0i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16")]
         fn vld1_s16_x4_(a: *const i16) -> int16x4x4_t;
     }
@@ -6155,7 +6155,7 @@ vld1_s16_x4_(a)
 pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v2i32.p0i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32")]
         fn vld1_s32_x4_(a: *const i32) -> int32x2x4_t;
     }
@@ -6171,7 +6171,7 @@ vld1_s32_x4_(a)
 pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v1i64.p0i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64")]
         fn vld1_s64_x4_(a: *const i64) -> int64x1x4_t;
     }
@@ -6187,7 +6187,7 @@ vld1_s64_x4_(a)
 pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v16i8.p0i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8")]
         fn vld1q_s8_x4_(a: *const i8) -> int8x16x4_t;
     }
@@ -6203,7 +6203,7 @@ vld1q_s8_x4_(a)
 pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v8i16.p0i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16")]
         fn vld1q_s16_x4_(a: *const i16) -> int16x8x4_t;
     }
@@ -6219,7 +6219,7 @@ vld1q_s16_x4_(a)
 pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v4i32.p0i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32")]
         fn vld1q_s32_x4_(a: *const i32) -> int32x4x4_t;
     }
@@ -6235,7 +6235,7 @@ vld1q_s32_x4_(a)
 pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vldx4.v2i64.p0i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64")]
         fn vld1q_s64_x4_(a: *const i64) -> int64x2x4_t;
     }
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
index 88798c0c75..2e2e3cee40 100644
--- a/crates/stdarch-gen/neon.spec
+++ b/crates/stdarch-gen/neon.spec
@@ -2048,12 +2048,12 @@ generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *
 generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
 
 link-aarch64 = ld1x3._EXT2_
-link-arm = ld1x3._EXT2_
+link-arm = vld1x3._EXT2_
 generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
 generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t, *const i64:int64x2x3_t
 
 link-aarch64 = ld1x4._EXT2_
-link-arm = vldx4._EXT2_
+link-arm = vld1x4._EXT2_
 generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
 generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t, *const i64:int64x2x4_t
 

From 1ee274933631d5fee38d25544d64b4e70f3b57b8 Mon Sep 17 00:00:00 2001
From: SparrowLii <liyuan179@huawei.com>
Date: Mon, 23 Aug 2021 14:49:29 +0800
Subject: [PATCH 3/7] correct Abi from "C" to "unadjusted"

---
 .../core_arch/src/aarch64/neon/generated.rs   | 334 +++++-----
 .../src/arm_shared/neon/generated.rs          | 596 +++++++++---------
 crates/stdarch-gen/src/main.rs                | 182 +-----
 3 files changed, 496 insertions(+), 616 deletions(-)

diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index 886e47fcf5..97e794f2a1 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -15,7 +15,7 @@ use stdarch_test::assert_instr;
 #[cfg_attr(test, assert_instr(fabd))]
 pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v1f64")]
         fn vabd_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
@@ -28,7 +28,7 @@ pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fabd))]
 pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v2f64")]
         fn vabdq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
@@ -1084,7 +1084,7 @@ pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(facgt))]
 pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64")]
         fn vcagt_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
     }
@@ -1097,7 +1097,7 @@ pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(facgt))]
 pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64")]
         fn vcagtq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
     }
@@ -1110,7 +1110,7 @@ pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(facge))]
 pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v1i64.v1f64")]
         fn vcage_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
     }
@@ -1123,7 +1123,7 @@ pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(facge))]
 pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i64.v2f64")]
         fn vcageq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
     }
@@ -2103,7 +2103,7 @@ pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(fcvtxn))]
 pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64")]
         fn vcvtx_f32_f64_(a: float64x2_t) -> float32x2_t;
     }
@@ -2126,7 +2126,7 @@ pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t
 pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64")]
         fn vcvt_n_f64_s64_(a: int64x1_t, n: i32) -> float64x1_t;
     }
@@ -2141,7 +2141,7 @@ pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
 pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64")]
         fn vcvtq_n_f64_s64_(a: int64x2_t, n: i32) -> float64x2_t;
     }
@@ -2156,7 +2156,7 @@ pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
 pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32")]
         fn vcvts_n_f32_s32_(a: i32, n: i32) -> f32;
     }
@@ -2171,7 +2171,7 @@ pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
 pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64")]
         fn vcvtd_n_f64_s64_(a: i64, n: i32) -> f64;
     }
@@ -2186,7 +2186,7 @@ pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
 pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64")]
         fn vcvt_n_f64_u64_(a: uint64x1_t, n: i32) -> float64x1_t;
     }
@@ -2201,7 +2201,7 @@ pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
 pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64")]
         fn vcvtq_n_f64_u64_(a: uint64x2_t, n: i32) -> float64x2_t;
     }
@@ -2216,7 +2216,7 @@ pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
 pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32")]
         fn vcvts_n_f32_u32_(a: u32, n: i32) -> f32;
     }
@@ -2231,7 +2231,7 @@ pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
 pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64")]
         fn vcvtd_n_f64_u64_(a: u64, n: i32) -> f64;
     }
@@ -2246,7 +2246,7 @@ pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
 pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64")]
         fn vcvt_n_s64_f64_(a: float64x1_t, n: i32) -> int64x1_t;
     }
@@ -2261,7 +2261,7 @@ pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
 pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64")]
         fn vcvtq_n_s64_f64_(a: float64x2_t, n: i32) -> int64x2_t;
     }
@@ -2276,7 +2276,7 @@ pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
 pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32")]
         fn vcvts_n_s32_f32_(a: f32, n: i32) -> i32;
     }
@@ -2291,7 +2291,7 @@ pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
 pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64")]
         fn vcvtd_n_s64_f64_(a: f64, n: i32) -> i64;
     }
@@ -2306,7 +2306,7 @@ pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
 pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64")]
         fn vcvt_n_u64_f64_(a: float64x1_t, n: i32) -> uint64x1_t;
     }
@@ -2321,7 +2321,7 @@ pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
 pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64")]
         fn vcvtq_n_u64_f64_(a: float64x2_t, n: i32) -> uint64x2_t;
     }
@@ -2336,7 +2336,7 @@ pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
 pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32")]
         fn vcvts_n_u32_f32_(a: f32, n: i32) -> u32;
     }
@@ -2351,7 +2351,7 @@ pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
 pub unsafe fn vcvtd_n_u64_f64<const N: i32>(a: f64) -> u64 {
     static_assert!(N : i32 where N >= 1 && N <= 64);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64")]
         fn vcvtd_n_u64_f64_(a: f64, n: i32) -> u64;
     }
@@ -2428,7 +2428,7 @@ pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtzs))]
 pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v1i64.v1f64")]
         fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
@@ -2441,7 +2441,7 @@ pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtzs))]
 pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i64.v2f64")]
         fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
@@ -2454,7 +2454,7 @@ pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtzu))]
 pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v1i64.v1f64")]
         fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
@@ -2467,7 +2467,7 @@ pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtzu))]
 pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i64.v2f64")]
         fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
@@ -2480,7 +2480,7 @@ pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32")]
         fn vcvta_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
@@ -2493,7 +2493,7 @@ pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32")]
         fn vcvtaq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
@@ -2506,7 +2506,7 @@ pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64")]
         fn vcvta_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
@@ -2519,7 +2519,7 @@ pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64")]
         fn vcvtaq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
@@ -2532,7 +2532,7 @@ pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtas))]
 pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.i32.f32")]
         fn vcvtas_s32_f32_(a: f32) -> i32;
     }
@@ -2545,7 +2545,7 @@ pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtas))]
 pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.i64.f64")]
         fn vcvtad_s64_f64_(a: f64) -> i64;
     }
@@ -2558,7 +2558,7 @@ pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtau))]
 pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.i32.f32")]
         fn vcvtas_u32_f32_(a: f32) -> u32;
     }
@@ -2571,7 +2571,7 @@ pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtau))]
 pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.i64.f64")]
         fn vcvtad_u64_f64_(a: f64) -> u64;
     }
@@ -2584,7 +2584,7 @@ pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtns))]
 pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32")]
         fn vcvtn_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
@@ -2597,7 +2597,7 @@ pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32")]
         fn vcvtnq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
@@ -2610,7 +2610,7 @@ pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64")]
         fn vcvtn_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
@@ -2623,7 +2623,7 @@ pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64")]
         fn vcvtnq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
@@ -2636,7 +2636,7 @@ pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtns))]
 pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.i32.f32")]
         fn vcvtns_s32_f32_(a: f32) -> i32;
     }
@@ -2649,7 +2649,7 @@ pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtns))]
 pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.i64.f64")]
         fn vcvtnd_s64_f64_(a: f64) -> i64;
     }
@@ -2662,7 +2662,7 @@ pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtms))]
 pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32")]
         fn vcvtm_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
@@ -2675,7 +2675,7 @@ pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32")]
         fn vcvtmq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
@@ -2688,7 +2688,7 @@ pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64")]
         fn vcvtm_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
@@ -2701,7 +2701,7 @@ pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64")]
         fn vcvtmq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
@@ -2714,7 +2714,7 @@ pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtms))]
 pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.i32.f32")]
         fn vcvtms_s32_f32_(a: f32) -> i32;
     }
@@ -2727,7 +2727,7 @@ pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtms))]
 pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.i64.f64")]
         fn vcvtmd_s64_f64_(a: f64) -> i64;
     }
@@ -2740,7 +2740,7 @@ pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtps))]
 pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32")]
         fn vcvtp_s32_f32_(a: float32x2_t) -> int32x2_t;
     }
@@ -2753,7 +2753,7 @@ pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32")]
         fn vcvtpq_s32_f32_(a: float32x4_t) -> int32x4_t;
     }
@@ -2766,7 +2766,7 @@ pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64")]
         fn vcvtp_s64_f64_(a: float64x1_t) -> int64x1_t;
     }
@@ -2779,7 +2779,7 @@ pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64")]
         fn vcvtpq_s64_f64_(a: float64x2_t) -> int64x2_t;
     }
@@ -2792,7 +2792,7 @@ pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(fcvtps))]
 pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.i32.f32")]
         fn vcvtps_s32_f32_(a: f32) -> i32;
     }
@@ -2805,7 +2805,7 @@ pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
 #[cfg_attr(test, assert_instr(fcvtps))]
 pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.i64.f64")]
         fn vcvtpd_s64_f64_(a: f64) -> i64;
     }
@@ -2818,7 +2818,7 @@ pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
 #[cfg_attr(test, assert_instr(fcvtau))]
 pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32")]
         fn vcvta_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
@@ -2831,7 +2831,7 @@ pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32")]
         fn vcvtaq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
@@ -2844,7 +2844,7 @@ pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64")]
         fn vcvta_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
@@ -2857,7 +2857,7 @@ pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtau))]
 pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64")]
         fn vcvtaq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
@@ -2870,7 +2870,7 @@ pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32")]
         fn vcvtn_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
@@ -2883,7 +2883,7 @@ pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32")]
         fn vcvtnq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
@@ -2896,7 +2896,7 @@ pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64")]
         fn vcvtn_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
@@ -2909,7 +2909,7 @@ pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64")]
         fn vcvtnq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
@@ -2922,7 +2922,7 @@ pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.i32.f32")]
         fn vcvtns_u32_f32_(a: f32) -> u32;
     }
@@ -2935,7 +2935,7 @@ pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtnu))]
 pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.i64.f64")]
         fn vcvtnd_u64_f64_(a: f64) -> u64;
     }
@@ -2948,7 +2948,7 @@ pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32")]
         fn vcvtm_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
@@ -2961,7 +2961,7 @@ pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32")]
         fn vcvtmq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
@@ -2974,7 +2974,7 @@ pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64")]
         fn vcvtm_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
@@ -2987,7 +2987,7 @@ pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64")]
         fn vcvtmq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
@@ -3000,7 +3000,7 @@ pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.i32.f32")]
         fn vcvtms_u32_f32_(a: f32) -> u32;
     }
@@ -3013,7 +3013,7 @@ pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtmu))]
 pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.i64.f64")]
         fn vcvtmd_u64_f64_(a: f64) -> u64;
     }
@@ -3026,7 +3026,7 @@ pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32")]
         fn vcvtp_u32_f32_(a: float32x2_t) -> uint32x2_t;
     }
@@ -3039,7 +3039,7 @@ pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32")]
         fn vcvtpq_u32_f32_(a: float32x4_t) -> uint32x4_t;
     }
@@ -3052,7 +3052,7 @@ pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64")]
         fn vcvtp_u64_f64_(a: float64x1_t) -> uint64x1_t;
     }
@@ -3065,7 +3065,7 @@ pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64")]
         fn vcvtpq_u64_f64_(a: float64x2_t) -> uint64x2_t;
     }
@@ -3078,7 +3078,7 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.i32.f32")]
         fn vcvtps_u32_f32_(a: f32) -> u32;
     }
@@ -3091,7 +3091,7 @@ pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcvtpu))]
 pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.i64.f64")]
         fn vcvtpd_u64_f64_(a: f64) -> u64;
     }
@@ -3914,7 +3914,7 @@ pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(sqneg))]
 pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v1i64")]
         fn vqneg_s64_(a: int64x1_t) -> int64x1_t;
     }
@@ -3927,7 +3927,7 @@ pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(sqneg))]
 pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i64")]
         fn vqnegq_s64_(a: int64x2_t) -> int64x2_t;
     }
@@ -3980,7 +3980,7 @@ pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
 #[cfg_attr(test, assert_instr(uqsub))]
 pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i32")]
         fn vqsubs_u32_(a: u32, b: u32) -> u32;
     }
@@ -3993,7 +3993,7 @@ pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
 #[cfg_attr(test, assert_instr(uqsub))]
 pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i64")]
         fn vqsubd_u64_(a: u64, b: u64) -> u64;
     }
@@ -4006,7 +4006,7 @@ pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(sqsub))]
 pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i32")]
         fn vqsubs_s32_(a: i32, b: i32) -> i32;
     }
@@ -4019,7 +4019,7 @@ pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
 #[cfg_attr(test, assert_instr(sqsub))]
 pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i64")]
         fn vqsubd_s64_(a: i64, b: i64) -> i64;
     }
@@ -4032,7 +4032,7 @@ pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
 #[cfg_attr(test, assert_instr(rbit))]
 pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v8i8")]
         fn vrbit_s8_(a: int8x8_t) -> int8x8_t;
     }
@@ -4045,7 +4045,7 @@ pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
 #[cfg_attr(test, assert_instr(rbit))]
 pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v16i8")]
         fn vrbitq_s8_(a: int8x16_t) -> int8x16_t;
     }
@@ -4090,7 +4090,7 @@ pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
 #[cfg_attr(test, assert_instr(frintx))]
 pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f32")]
         fn vrndx_f32_(a: float32x2_t) -> float32x2_t;
     }
@@ -4103,7 +4103,7 @@ pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(frintx))]
 pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v4f32")]
         fn vrndxq_f32_(a: float32x4_t) -> float32x4_t;
     }
@@ -4116,7 +4116,7 @@ pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(frintx))]
 pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v1f64")]
         fn vrndx_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -4129,7 +4129,7 @@ pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frintx))]
 pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f64")]
         fn vrndxq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -4142,7 +4142,7 @@ pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frinta))]
 pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f32")]
         fn vrnda_f32_(a: float32x2_t) -> float32x2_t;
     }
@@ -4155,7 +4155,7 @@ pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(frinta))]
 pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v4f32")]
         fn vrndaq_f32_(a: float32x4_t) -> float32x4_t;
     }
@@ -4168,7 +4168,7 @@ pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(frinta))]
 pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v1f64")]
         fn vrnda_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -4181,7 +4181,7 @@ pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frinta))]
 pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f64")]
         fn vrndaq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -4194,7 +4194,7 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frintn))]
 pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")]
         fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -4207,7 +4207,7 @@ pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frintn))]
 pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")]
         fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -4220,7 +4220,7 @@ pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frintm))]
 pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f32")]
         fn vrndm_f32_(a: float32x2_t) -> float32x2_t;
     }
@@ -4233,7 +4233,7 @@ pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(frintm))]
 pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v4f32")]
         fn vrndmq_f32_(a: float32x4_t) -> float32x4_t;
     }
@@ -4246,7 +4246,7 @@ pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(frintm))]
 pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v1f64")]
         fn vrndm_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -4259,7 +4259,7 @@ pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frintm))]
 pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f64")]
         fn vrndmq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -4272,7 +4272,7 @@ pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frintp))]
 pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f32")]
         fn vrndp_f32_(a: float32x2_t) -> float32x2_t;
     }
@@ -4285,7 +4285,7 @@ pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(frintp))]
 pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v4f32")]
         fn vrndpq_f32_(a: float32x4_t) -> float32x4_t;
     }
@@ -4298,7 +4298,7 @@ pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(frintp))]
 pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v1f64")]
         fn vrndp_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -4311,7 +4311,7 @@ pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frintp))]
 pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f64")]
         fn vrndpq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -4324,7 +4324,7 @@ pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frintz))]
 pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f32")]
         fn vrnd_f32_(a: float32x2_t) -> float32x2_t;
     }
@@ -4337,7 +4337,7 @@ pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(frintz))]
 pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v4f32")]
         fn vrndq_f32_(a: float32x4_t) -> float32x4_t;
     }
@@ -4350,7 +4350,7 @@ pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(frintz))]
 pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v1f64")]
         fn vrnd_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -4363,7 +4363,7 @@ pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frintz))]
 pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f64")]
         fn vrndq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -4376,7 +4376,7 @@ pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frinti))]
 pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f32")]
         fn vrndi_f32_(a: float32x2_t) -> float32x2_t;
     }
@@ -4389,7 +4389,7 @@ pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(frinti))]
 pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v4f32")]
         fn vrndiq_f32_(a: float32x4_t) -> float32x4_t;
     }
@@ -4402,7 +4402,7 @@ pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(frinti))]
 pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v1f64")]
         fn vrndi_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -4415,7 +4415,7 @@ pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frinti))]
 pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f64")]
         fn vrndiq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -4468,7 +4468,7 @@ pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
 #[cfg_attr(test, assert_instr(uqadd))]
 pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i32")]
         fn vqadds_u32_(a: u32, b: u32) -> u32;
     }
@@ -4481,7 +4481,7 @@ pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
 #[cfg_attr(test, assert_instr(uqadd))]
 pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i64")]
         fn vqaddd_u64_(a: u64, b: u64) -> u64;
     }
@@ -4494,7 +4494,7 @@ pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(sqadd))]
 pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i32")]
         fn vqadds_s32_(a: i32, b: i32) -> i32;
     }
@@ -4507,7 +4507,7 @@ pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
 #[cfg_attr(test, assert_instr(sqadd))]
 pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i64")]
         fn vqaddd_s64_(a: i64, b: i64) -> i64;
     }
@@ -4774,7 +4774,7 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(pmull))]
 pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")]
         fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
     }
@@ -4917,7 +4917,7 @@ pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f32")]
         fn vmulx_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
@@ -4930,7 +4930,7 @@ pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v4f32")]
         fn vmulxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
@@ -4943,7 +4943,7 @@ pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v1f64")]
         fn vmulx_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
@@ -4956,7 +4956,7 @@ pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f64")]
         fn vmulxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
@@ -5049,7 +5049,7 @@ pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t)
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f32")]
         fn vmulxs_f32_(a: f32, b: f32) -> f32;
     }
@@ -5062,7 +5062,7 @@ pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
 #[cfg_attr(test, assert_instr(fmulx))]
 pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f64")]
         fn vmulxd_f64_(a: f64, b: f64) -> f64;
     }
@@ -5115,7 +5115,7 @@ pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
 #[cfg_attr(test, assert_instr(fmadd))]
 pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v1f64")]
         fn vfma_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t;
     }
@@ -5128,7 +5128,7 @@ pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float6
 #[cfg_attr(test, assert_instr(fmla))]
 pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f64")]
         fn vfmaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
@@ -5238,7 +5238,7 @@ pub unsafe fn vfmaq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f32")]
         fn vfmas_lane_f32_(a: f32, b: f32, c: f32) -> f32;
     }
@@ -5254,7 +5254,7 @@ pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) ->
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f32")]
         fn vfmas_laneq_f32_(a: f32, b: f32, c: f32) -> f32;
     }
@@ -5270,7 +5270,7 @@ pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f64")]
         fn vfmad_lane_f64_(a: f64, b: f64, c: f64) -> f64;
     }
@@ -5286,7 +5286,7 @@ pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) ->
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f64")]
         fn vfmad_laneq_f64_(a: f64, b: f64, c: f64) -> f64;
     }
@@ -5499,7 +5499,7 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(saddlv))]
 pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
         fn vaddlv_s16_(a: int16x4_t) -> i32;
     }
@@ -5512,7 +5512,7 @@ pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
 #[cfg_attr(test, assert_instr(saddlv))]
 pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
         fn vaddlvq_s16_(a: int16x8_t) -> i32;
     }
@@ -5525,7 +5525,7 @@ pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
 #[cfg_attr(test, assert_instr(saddlp))]
 pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
         fn vaddlv_s32_(a: int32x2_t) -> i64;
     }
@@ -5538,7 +5538,7 @@ pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
 #[cfg_attr(test, assert_instr(saddlv))]
 pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v4i32")]
         fn vaddlvq_s32_(a: int32x4_t) -> i64;
     }
@@ -5551,7 +5551,7 @@ pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
 #[cfg_attr(test, assert_instr(uaddlv))]
 pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16")]
         fn vaddlv_u16_(a: uint16x4_t) -> u32;
     }
@@ -5564,7 +5564,7 @@ pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
 #[cfg_attr(test, assert_instr(uaddlv))]
 pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16")]
         fn vaddlvq_u16_(a: uint16x8_t) -> u32;
     }
@@ -5577,7 +5577,7 @@ pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
 #[cfg_attr(test, assert_instr(uaddlp))]
 pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32")]
         fn vaddlv_u32_(a: uint32x2_t) -> u64;
     }
@@ -5590,7 +5590,7 @@ pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
 #[cfg_attr(test, assert_instr(uaddlv))]
 pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32")]
         fn vaddlvq_u32_(a: uint32x4_t) -> u64;
     }
@@ -5729,7 +5729,7 @@ pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fmax))]
 pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v1f64")]
         fn vmax_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
@@ -5742,7 +5742,7 @@ pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fmax))]
 pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f64")]
         fn vmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
@@ -5755,7 +5755,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
         fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
@@ -5768,7 +5768,7 @@ pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
         fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
@@ -5781,7 +5781,7 @@ pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
         fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
@@ -5794,7 +5794,7 @@ pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
         fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
@@ -5807,7 +5807,7 @@ pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
         fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
@@ -5820,7 +5820,7 @@ pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(fmin))]
 pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v1f64")]
         fn vmin_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
@@ -5833,7 +5833,7 @@ pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fmin))]
 pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f64")]
         fn vminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
@@ -5846,7 +5846,7 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fminnm))]
 pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
         fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
@@ -5859,7 +5859,7 @@ pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fminnm))]
 pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
         fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
@@ -5872,7 +5872,7 @@ pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fminnmp))]
 pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
         fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
@@ -5885,7 +5885,7 @@ pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[cfg_attr(test, assert_instr(fminnmp))]
 pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
         fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
@@ -5898,7 +5898,7 @@ pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fminnmp))]
 pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
         fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
@@ -5921,7 +5921,7 @@ pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
 #[cfg_attr(test, assert_instr(sqdmull))]
 pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulls.scalar")]
         fn vqdmulls_s32_(a: i32, b: i32) -> i64;
     }
@@ -6368,7 +6368,7 @@ pub unsafe fn vqmovns_u32(a: u32) -> u16 {
 #[cfg_attr(test, assert_instr(sqxtn))]
 pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
         fn vqmovnd_s64_(a: i64) -> i32;
     }
@@ -6381,7 +6381,7 @@ pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
 #[cfg_attr(test, assert_instr(uqxtn))]
 pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
         fn vqmovnd_u64_(a: u64) -> u32;
     }
@@ -6658,7 +6658,7 @@ pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t)
 #[cfg_attr(test, assert_instr(sqrshl))]
 pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")]
         fn vqrshls_s32_(a: i32, b: i32) -> i32;
     }
@@ -6671,7 +6671,7 @@ pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
 #[cfg_attr(test, assert_instr(sqrshl))]
 pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")]
         fn vqrshld_s64_(a: i64, b: i64) -> i64;
     }
@@ -6704,7 +6704,7 @@ pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
 #[cfg_attr(test, assert_instr(uqrshl))]
 pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")]
         fn vqrshls_u32_(a: u32, b: i32) -> u32;
     }
@@ -6717,7 +6717,7 @@ pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
 #[cfg_attr(test, assert_instr(uqrshl))]
 pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")]
         fn vqrshld_u64_(a: u64, b: i64) -> u64;
     }
@@ -6939,7 +6939,7 @@ pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) ->
 #[cfg_attr(test, assert_instr(sqshl))]
 pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")]
         fn vqshld_s64_(a: i64, b: i64) -> i64;
     }
@@ -6979,7 +6979,7 @@ pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
 #[cfg_attr(test, assert_instr(uqshl))]
 pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")]
         fn vqshld_u64_(a: u64, b: i64) -> u64;
     }
@@ -7101,7 +7101,7 @@ pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
 pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")]
         fn vqshrnd_n_s64_(a: i64, n: i32) -> i32;
     }
@@ -7166,7 +7166,7 @@ pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int
 pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")]
         fn vqshrnd_n_u64_(a: u64, n: i32) -> u32;
     }
@@ -7321,7 +7321,7 @@ pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frsqrte))]
 pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v1f64")]
         fn vrsqrte_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -7334,7 +7334,7 @@ pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frsqrte))]
 pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f64")]
         fn vrsqrteq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -7347,7 +7347,7 @@ pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(frecpe))]
 pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v1f64")]
         fn vrecpe_f64_(a: float64x1_t) -> float64x1_t;
     }
@@ -7360,7 +7360,7 @@ pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(frecpe))]
 pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f64")]
         fn vrecpeq_f64_(a: float64x2_t) -> float64x2_t;
     }
@@ -8109,7 +8109,7 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(srshl))]
 pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")]
         fn vrshld_s64_(a: i64, b: i64) -> i64;
     }
@@ -8122,7 +8122,7 @@ pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
 #[cfg_attr(test, assert_instr(urshl))]
 pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")]
         fn vrshld_u64_(a: u64, b: i64) -> u64;
     }
@@ -9524,7 +9524,7 @@ pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x
 #[cfg_attr(test, assert_instr(sqabs))]
 pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v1i64")]
         fn vqabs_s64_(a: int64x1_t) -> int64x1_t;
     }
@@ -9537,7 +9537,7 @@ pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
 #[cfg_attr(test, assert_instr(sqabs))]
 pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i64")]
         fn vqabsq_s64_(a: int64x2_t) -> int64x2_t;
     }
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index dae5eb8630..bdf8937d9d 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -497,7 +497,7 @@ pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
 pub unsafe fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v8i8")]
         fn vabd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -513,7 +513,7 @@ vabd_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
 pub unsafe fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v16i8")]
         fn vabdq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -529,7 +529,7 @@ vabdq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
 pub unsafe fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v4i16")]
         fn vabd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -545,7 +545,7 @@ vabd_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
 pub unsafe fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v8i16")]
         fn vabdq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -561,7 +561,7 @@ vabdq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
 pub unsafe fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v2i32")]
         fn vabd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -577,7 +577,7 @@ vabd_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))]
 pub unsafe fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v4i32")]
         fn vabdq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -593,7 +593,7 @@ vabdq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
 pub unsafe fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v8i8")]
         fn vabd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
@@ -609,7 +609,7 @@ vabd_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
 pub unsafe fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v16i8")]
         fn vabdq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
@@ -625,7 +625,7 @@ vabdq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
 pub unsafe fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v4i16")]
         fn vabd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
@@ -641,7 +641,7 @@ vabd_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
 pub unsafe fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v8i16")]
         fn vabdq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
@@ -657,7 +657,7 @@ vabdq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
 pub unsafe fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v2i32")]
         fn vabd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
@@ -673,7 +673,7 @@ vabd_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))]
 pub unsafe fn vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v4i32")]
         fn vabdq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
@@ -689,7 +689,7 @@ vabdq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabd))]
 pub unsafe fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v2f32")]
         fn vabd_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
@@ -705,7 +705,7 @@ vabd_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabd))]
 pub unsafe fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v4f32")]
         fn vabdq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
@@ -1692,7 +1692,7 @@ pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
 pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i8")]
         fn vcls_s8_(a: int8x8_t) -> int8x8_t;
@@ -1708,7 +1708,7 @@ vcls_s8_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
 pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v16i8")]
         fn vclsq_s8_(a: int8x16_t) -> int8x16_t;
@@ -1724,7 +1724,7 @@ vclsq_s8_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
 pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i16")]
         fn vcls_s16_(a: int16x4_t) -> int16x4_t;
@@ -1740,7 +1740,7 @@ vcls_s16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
 pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i16")]
         fn vclsq_s16_(a: int16x8_t) -> int16x8_t;
@@ -1756,7 +1756,7 @@ vclsq_s16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
 pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v2i32")]
         fn vcls_s32_(a: int32x2_t) -> int32x2_t;
@@ -1772,7 +1772,7 @@ vcls_s32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
 pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i32")]
         fn vclsq_s32_(a: int32x4_t) -> int32x4_t;
@@ -1908,7 +1908,7 @@ pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
 pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32")]
         fn vcagt_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t;
@@ -1924,7 +1924,7 @@ vcagt_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
 pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32")]
         fn vcagtq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t;
@@ -1940,7 +1940,7 @@ vcagtq_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
 pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i32.v2f32")]
         fn vcage_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t;
@@ -1956,7 +1956,7 @@ vcage_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
 pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v4i32.v4f32")]
         fn vcageq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t;
@@ -2153,7 +2153,7 @@ pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
 pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32")]
         fn vcvt_n_f32_s32_(a: int32x2_t, n: i32) -> float32x2_t;
     }
@@ -2169,7 +2169,7 @@ vcvt_n_f32_s32_(a, N)
 pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32")]
         fn vcvt_n_f32_s32_(a: int32x2_t, n: i32) -> float32x2_t;
     }
@@ -2185,7 +2185,7 @@ vcvt_n_f32_s32_(a, N)
 pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32")]
         fn vcvtq_n_f32_s32_(a: int32x4_t, n: i32) -> float32x4_t;
     }
@@ -2201,7 +2201,7 @@ vcvtq_n_f32_s32_(a, N)
 pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32")]
         fn vcvtq_n_f32_s32_(a: int32x4_t, n: i32) -> float32x4_t;
     }
@@ -2217,7 +2217,7 @@ vcvtq_n_f32_s32_(a, N)
 pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32")]
         fn vcvt_n_f32_u32_(a: uint32x2_t, n: i32) -> float32x2_t;
     }
@@ -2233,7 +2233,7 @@ vcvt_n_f32_u32_(a, N)
 pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32")]
         fn vcvt_n_f32_u32_(a: uint32x2_t, n: i32) -> float32x2_t;
     }
@@ -2249,7 +2249,7 @@ vcvt_n_f32_u32_(a, N)
 pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32")]
         fn vcvtq_n_f32_u32_(a: uint32x4_t, n: i32) -> float32x4_t;
     }
@@ -2265,7 +2265,7 @@ vcvtq_n_f32_u32_(a, N)
 pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32")]
         fn vcvtq_n_f32_u32_(a: uint32x4_t, n: i32) -> float32x4_t;
     }
@@ -2281,7 +2281,7 @@ vcvtq_n_f32_u32_(a, N)
 pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32")]
         fn vcvt_n_s32_f32_(a: float32x2_t, n: i32) -> int32x2_t;
     }
@@ -2297,7 +2297,7 @@ vcvt_n_s32_f32_(a, N)
 pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32")]
         fn vcvt_n_s32_f32_(a: float32x2_t, n: i32) -> int32x2_t;
     }
@@ -2313,7 +2313,7 @@ vcvt_n_s32_f32_(a, N)
 pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32")]
         fn vcvtq_n_s32_f32_(a: float32x4_t, n: i32) -> int32x4_t;
     }
@@ -2329,7 +2329,7 @@ vcvtq_n_s32_f32_(a, N)
 pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32")]
         fn vcvtq_n_s32_f32_(a: float32x4_t, n: i32) -> int32x4_t;
     }
@@ -2345,7 +2345,7 @@ vcvtq_n_s32_f32_(a, N)
 pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32")]
         fn vcvt_n_u32_f32_(a: float32x2_t, n: i32) -> uint32x2_t;
     }
@@ -2361,7 +2361,7 @@ vcvt_n_u32_f32_(a, N)
 pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32")]
         fn vcvt_n_u32_f32_(a: float32x2_t, n: i32) -> uint32x2_t;
     }
@@ -2377,7 +2377,7 @@ vcvt_n_u32_f32_(a, N)
 pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32")]
         fn vcvtq_n_u32_f32_(a: float32x4_t, n: i32) -> uint32x4_t;
     }
@@ -2393,7 +2393,7 @@ vcvtq_n_u32_f32_(a, N)
 pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32")]
         fn vcvtq_n_u32_f32_(a: float32x4_t, n: i32) -> uint32x4_t;
     }
@@ -2408,7 +2408,7 @@ vcvtq_n_u32_f32_(a, N)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
 pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
         fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t;
@@ -2424,7 +2424,7 @@ vcvt_s32_f32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
 pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
         fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
@@ -2440,7 +2440,7 @@ vcvtq_s32_f32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
 pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
         fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t;
@@ -2456,7 +2456,7 @@ vcvt_u32_f32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
 pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
         fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
@@ -4842,7 +4842,7 @@ pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
 pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i8")]
         fn vqneg_s8_(a: int8x8_t) -> int8x8_t;
@@ -4858,7 +4858,7 @@ vqneg_s8_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
 pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v16i8")]
         fn vqnegq_s8_(a: int8x16_t) -> int8x16_t;
@@ -4874,7 +4874,7 @@ vqnegq_s8_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
 pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i16")]
         fn vqneg_s16_(a: int16x4_t) -> int16x4_t;
@@ -4890,7 +4890,7 @@ vqneg_s16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
 pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i16")]
         fn vqnegq_s16_(a: int16x8_t) -> int16x8_t;
@@ -4906,7 +4906,7 @@ vqnegq_s16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
 pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i32")]
         fn vqneg_s32_(a: int32x2_t) -> int32x2_t;
@@ -4922,7 +4922,7 @@ vqneg_s32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))]
 pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i32")]
         fn vqnegq_s32_(a: int32x4_t) -> int32x4_t;
@@ -4938,7 +4938,7 @@ vqnegq_s32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
 pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i8")]
         fn vqsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
@@ -4954,7 +4954,7 @@ vqsub_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
 pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v16i8")]
         fn vqsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
@@ -4970,7 +4970,7 @@ vqsubq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
 pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i16")]
         fn vqsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
@@ -4986,7 +4986,7 @@ vqsub_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
 pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i16")]
         fn vqsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
@@ -5002,7 +5002,7 @@ vqsubq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
 pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i32")]
         fn vqsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
@@ -5018,7 +5018,7 @@ vqsub_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
 pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i32")]
         fn vqsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
@@ -5034,7 +5034,7 @@ vqsubq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
 pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v1i64")]
         fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
@@ -5050,7 +5050,7 @@ vqsub_u64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
 pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i64")]
         fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
@@ -5066,7 +5066,7 @@ vqsubq_u64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
 pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i8")]
         fn vqsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -5082,7 +5082,7 @@ vqsub_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
 pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v16i8")]
         fn vqsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -5098,7 +5098,7 @@ vqsubq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
 pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i16")]
         fn vqsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -5114,7 +5114,7 @@ vqsub_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
 pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i16")]
         fn vqsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -5130,7 +5130,7 @@ vqsubq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
 pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i32")]
         fn vqsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -5146,7 +5146,7 @@ vqsub_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
 pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i32")]
         fn vqsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -5162,7 +5162,7 @@ vqsubq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
 pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v1i64")]
         fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
@@ -5178,7 +5178,7 @@ vqsub_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
 pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i64")]
         fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
@@ -5194,7 +5194,7 @@ vqsubq_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
 pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i8")]
         fn vhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
@@ -5210,7 +5210,7 @@ vhadd_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
 pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v16i8")]
         fn vhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
@@ -5226,7 +5226,7 @@ vhaddq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
 pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i16")]
         fn vhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
@@ -5242,7 +5242,7 @@ vhadd_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
 pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i16")]
         fn vhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
@@ -5258,7 +5258,7 @@ vhaddq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
 pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v2i32")]
         fn vhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
@@ -5274,7 +5274,7 @@ vhadd_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
 pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i32")]
         fn vhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
@@ -5290,7 +5290,7 @@ vhaddq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
 pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i8")]
         fn vhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -5306,7 +5306,7 @@ vhadd_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
 pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v16i8")]
         fn vhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -5322,7 +5322,7 @@ vhaddq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
 pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i16")]
         fn vhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -5338,7 +5338,7 @@ vhadd_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
 pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i16")]
         fn vhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -5354,7 +5354,7 @@ vhaddq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
 pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v2i32")]
         fn vhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -5370,7 +5370,7 @@ vhadd_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
 pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i32")]
         fn vhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -5386,7 +5386,7 @@ vhaddq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
 pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i8")]
         fn vrhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
@@ -5402,7 +5402,7 @@ vrhadd_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
 pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v16i8")]
         fn vrhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
@@ -5418,7 +5418,7 @@ vrhaddq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
 pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i16")]
         fn vrhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
@@ -5434,7 +5434,7 @@ vrhadd_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
 pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i16")]
         fn vrhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
@@ -5450,7 +5450,7 @@ vrhaddq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
 pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v2i32")]
         fn vrhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
@@ -5466,7 +5466,7 @@ vrhadd_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))]
 pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i32")]
         fn vrhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
@@ -5482,7 +5482,7 @@ vrhaddq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
 pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i8")]
         fn vrhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -5498,7 +5498,7 @@ vrhadd_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
 pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v16i8")]
         fn vrhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -5514,7 +5514,7 @@ vrhaddq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
 pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i16")]
         fn vrhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -5530,7 +5530,7 @@ vrhadd_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
 pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i16")]
         fn vrhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -5546,7 +5546,7 @@ vrhaddq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
 pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v2i32")]
         fn vrhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -5562,7 +5562,7 @@ vrhadd_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))]
 pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i32")]
         fn vrhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -5578,7 +5578,7 @@ vrhaddq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
 pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
         fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
@@ -5594,7 +5594,7 @@ vrndn_f32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
 pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
         fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
@@ -5610,7 +5610,7 @@ vrndnq_f32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
 pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i8")]
         fn vqadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
@@ -5626,7 +5626,7 @@ vqadd_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
 pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v16i8")]
         fn vqaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
@@ -5642,7 +5642,7 @@ vqaddq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
 pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i16")]
         fn vqadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
@@ -5658,7 +5658,7 @@ vqadd_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
 pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i16")]
         fn vqaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
@@ -5674,7 +5674,7 @@ vqaddq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
 pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i32")]
         fn vqadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
@@ -5690,7 +5690,7 @@ vqadd_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
 pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i32")]
         fn vqaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
@@ -5706,7 +5706,7 @@ vqaddq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
 pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v1i64")]
         fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
@@ -5722,7 +5722,7 @@ vqadd_u64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
 pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i64")]
         fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
@@ -5738,7 +5738,7 @@ vqaddq_u64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
 pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i8")]
         fn vqadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -5754,7 +5754,7 @@ vqadd_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
 pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v16i8")]
         fn vqaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -5770,7 +5770,7 @@ vqaddq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
 pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i16")]
         fn vqadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -5786,7 +5786,7 @@ vqadd_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
 pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i16")]
         fn vqaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -5802,7 +5802,7 @@ vqaddq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
 pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i32")]
         fn vqadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -5818,7 +5818,7 @@ vqadd_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
 pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i32")]
         fn vqaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -5834,7 +5834,7 @@ vqaddq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
 pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v1i64")]
         fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
@@ -5850,7 +5850,7 @@ vqadd_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
 pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i64")]
         fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
@@ -6826,7 +6826,7 @@ pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
 pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v8i8")]
         fn vmul_p8_(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
@@ -6842,7 +6842,7 @@ vmul_p8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
 pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v16i8")]
         fn vmulq_p8_(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
@@ -7218,7 +7218,7 @@ pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
 pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v8i8")]
         fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t;
@@ -7234,7 +7234,7 @@ vmull_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
 pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v4i16")]
         fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
@@ -7250,7 +7250,7 @@ vmull_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
 pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v2i32")]
         fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
@@ -7266,7 +7266,7 @@ vmull_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
 pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v8i8")]
         fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t;
@@ -7282,7 +7282,7 @@ vmull_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
 pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v4i16")]
         fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t;
@@ -7298,7 +7298,7 @@ vmull_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
 pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v2i32")]
         fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t;
@@ -7314,7 +7314,7 @@ vmull_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmull))]
 pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull.v8i8")]
         fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
@@ -7466,7 +7466,7 @@ pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) ->
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
 pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
         fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
@@ -7482,7 +7482,7 @@ vfma_f32_(b, c, a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
 pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
         fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
@@ -7872,7 +7872,7 @@ pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> ui
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
 pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i8")]
         fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
@@ -7888,7 +7888,7 @@ vhsub_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
 pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v16i8")]
         fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
@@ -7904,7 +7904,7 @@ vhsubq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
 pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i16")]
         fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
@@ -7920,7 +7920,7 @@ vhsub_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
 pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i16")]
         fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
@@ -7936,7 +7936,7 @@ vhsubq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
 pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v2i32")]
         fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
@@ -7952,7 +7952,7 @@ vhsub_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))]
 pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i32")]
         fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
@@ -7968,7 +7968,7 @@ vhsubq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
 pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i8")]
         fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -7984,7 +7984,7 @@ vhsub_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
 pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v16i8")]
         fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -8000,7 +8000,7 @@ vhsubq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
 pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i16")]
         fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -8016,7 +8016,7 @@ vhsub_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
 pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i16")]
         fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -8032,7 +8032,7 @@ vhsubq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
 pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v2i32")]
         fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -8048,7 +8048,7 @@ vhsub_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))]
 pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i32")]
         fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -8196,7 +8196,7 @@ pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
 pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i8")]
         fn vmax_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -8212,7 +8212,7 @@ vmax_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
 pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v16i8")]
         fn vmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -8228,7 +8228,7 @@ vmaxq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
 pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i16")]
         fn vmax_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -8244,7 +8244,7 @@ vmax_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
 pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i16")]
         fn vmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -8260,7 +8260,7 @@ vmaxq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
 pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v2i32")]
         fn vmax_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -8276,7 +8276,7 @@ vmax_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))]
 pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i32")]
         fn vmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -8292,7 +8292,7 @@ vmaxq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
 pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i8")]
         fn vmax_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
@@ -8308,7 +8308,7 @@ vmax_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
 pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v16i8")]
         fn vmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
@@ -8324,7 +8324,7 @@ vmaxq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
 pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i16")]
         fn vmax_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
@@ -8340,7 +8340,7 @@ vmax_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
 pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i16")]
         fn vmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
@@ -8356,7 +8356,7 @@ vmaxq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
 pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v2i32")]
         fn vmax_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
@@ -8372,7 +8372,7 @@ vmax_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))]
 pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i32")]
         fn vmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
@@ -8388,7 +8388,7 @@ vmaxq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
 pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f32")]
         fn vmax_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
@@ -8404,7 +8404,7 @@ vmax_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
 pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")]
         fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
@@ -8420,7 +8420,7 @@ vmaxq_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
 pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
         fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
@@ -8436,7 +8436,7 @@ vmaxnm_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
 pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
         fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
@@ -8452,7 +8452,7 @@ vmaxnmq_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
 pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i8")]
         fn vmin_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -8468,7 +8468,7 @@ vmin_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
 pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v16i8")]
         fn vminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -8484,7 +8484,7 @@ vminq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
 pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i16")]
         fn vmin_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -8500,7 +8500,7 @@ vmin_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
 pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i16")]
         fn vminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -8516,7 +8516,7 @@ vminq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
 pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v2i32")]
         fn vmin_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -8532,7 +8532,7 @@ vmin_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))]
 pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i32")]
         fn vminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -8548,7 +8548,7 @@ vminq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
 pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i8")]
         fn vmin_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
@@ -8564,7 +8564,7 @@ vmin_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
 pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v16i8")]
         fn vminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
@@ -8580,7 +8580,7 @@ vminq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
 pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i16")]
         fn vmin_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
@@ -8596,7 +8596,7 @@ vmin_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
 pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i16")]
         fn vminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
@@ -8612,7 +8612,7 @@ vminq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
 pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v2i32")]
         fn vmin_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
@@ -8628,7 +8628,7 @@ vmin_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))]
 pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i32")]
         fn vminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
@@ -8644,7 +8644,7 @@ vminq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
 pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f32")]
         fn vmin_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
@@ -8660,7 +8660,7 @@ vmin_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
 pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")]
         fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
@@ -8676,7 +8676,7 @@ vminq_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
 pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")]
         fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
@@ -8692,7 +8692,7 @@ vminnm_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
 pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")]
         fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
@@ -8708,7 +8708,7 @@ vminnmq_f32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
 pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v4i32")]
         fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
@@ -8724,7 +8724,7 @@ vqdmull_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
 pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v2i64")]
         fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
@@ -8914,7 +8914,7 @@ pub unsafe fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int3
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
 pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i16")]
         fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -8930,7 +8930,7 @@ vqdmulh_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
 pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v8i16")]
         fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -8946,7 +8946,7 @@ vqdmulhq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
 pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v2i32")]
         fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -8962,7 +8962,7 @@ vqdmulh_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
 pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i32")]
         fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -9022,7 +9022,7 @@ pub unsafe fn vqdmulhq_nq_s32(a: int32x4_t, b: i32) -> int32x4_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
 pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v8i8")]
         fn vqmovn_s16_(a: int16x8_t) -> int8x8_t;
@@ -9038,7 +9038,7 @@ vqmovn_s16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
 pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v4i16")]
         fn vqmovn_s32_(a: int32x4_t) -> int16x4_t;
@@ -9054,7 +9054,7 @@ vqmovn_s32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))]
 pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v2i32")]
         fn vqmovn_s64_(a: int64x2_t) -> int32x2_t;
@@ -9070,7 +9070,7 @@ vqmovn_s64_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
 pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v8i8")]
         fn vqmovn_u16_(a: uint16x8_t) -> uint8x8_t;
@@ -9086,7 +9086,7 @@ vqmovn_u16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
 pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v4i16")]
         fn vqmovn_u32_(a: uint32x4_t) -> uint16x4_t;
@@ -9102,7 +9102,7 @@ vqmovn_u32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
 pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v2i32")]
         fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t;
@@ -9118,7 +9118,7 @@ vqmovn_u64_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
 pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v8i8")]
         fn vqmovun_s16_(a: int16x8_t) -> uint8x8_t;
@@ -9134,7 +9134,7 @@ vqmovun_s16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
 pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v4i16")]
         fn vqmovun_s32_(a: int32x4_t) -> uint16x4_t;
@@ -9150,7 +9150,7 @@ vqmovun_s32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))]
 pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v2i32")]
         fn vqmovun_s64_(a: int64x2_t) -> uint32x2_t;
@@ -9166,7 +9166,7 @@ vqmovun_s64_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
 pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i16")]
         fn vqrdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -9182,7 +9182,7 @@ vqrdmulh_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
 pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v8i16")]
         fn vqrdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -9198,7 +9198,7 @@ vqrdmulhq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
 pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v2i32")]
         fn vqrdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -9214,7 +9214,7 @@ vqrdmulh_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))]
 pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i32")]
         fn vqrdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -9646,7 +9646,7 @@ pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
 pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")]
         fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -9662,7 +9662,7 @@ vqrshl_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
 pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")]
         fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -9678,7 +9678,7 @@ vqrshlq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
 pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")]
         fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -9694,7 +9694,7 @@ vqrshl_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
 pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")]
         fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -9710,7 +9710,7 @@ vqrshlq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
 pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")]
         fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -9726,7 +9726,7 @@ vqrshl_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
 pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")]
         fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -9742,7 +9742,7 @@ vqrshlq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
 pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")]
         fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
@@ -9758,7 +9758,7 @@ vqrshl_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
 pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")]
         fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
@@ -9774,7 +9774,7 @@ vqrshlq_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
 pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")]
         fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
@@ -9790,7 +9790,7 @@ vqrshl_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
 pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")]
         fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
@@ -9806,7 +9806,7 @@ vqrshlq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
 pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")]
         fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
@@ -9822,7 +9822,7 @@ vqrshl_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
 pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")]
         fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
@@ -9838,7 +9838,7 @@ vqrshlq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
 pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")]
         fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
@@ -9854,7 +9854,7 @@ vqrshl_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
 pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")]
         fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
@@ -9870,7 +9870,7 @@ vqrshlq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
 pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")]
         fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
@@ -9886,7 +9886,7 @@ vqrshl_u64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
 pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")]
         fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
@@ -9903,7 +9903,7 @@ vqrshlq_u64_(a, b)
 pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
         fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -9919,7 +9919,7 @@ vqrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i1
 pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")]
         fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
     }
@@ -9935,7 +9935,7 @@ vqrshrn_n_s16_(a, N)
 pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
         fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -9951,7 +9951,7 @@ vqrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")]
         fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
     }
@@ -9967,7 +9967,7 @@ vqrshrn_n_s32_(a, N)
 pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
         fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -9983,7 +9983,7 @@ vqrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")]
         fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
     }
@@ -9999,7 +9999,7 @@ vqrshrn_n_s64_(a, N)
 pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
         fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
     }
@@ -10015,7 +10015,7 @@ vqrshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u
 pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")]
         fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
     }
@@ -10031,7 +10031,7 @@ vqrshrn_n_u16_(a, N)
 pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
         fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
     }
@@ -10047,7 +10047,7 @@ vqrshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32))
 pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")]
         fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
     }
@@ -10063,7 +10063,7 @@ vqrshrn_n_u32_(a, N)
 pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
         fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
     }
@@ -10079,7 +10079,7 @@ vqrshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64))
 pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")]
         fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
     }
@@ -10095,7 +10095,7 @@ vqrshrn_n_u64_(a, N)
 pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
         fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
     }
@@ -10111,7 +10111,7 @@ vqrshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i
 pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")]
         fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
     }
@@ -10127,7 +10127,7 @@ vqrshrun_n_s16_(a, N)
 pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
         fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
     }
@@ -10143,7 +10143,7 @@ vqrshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")]
         fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
     }
@@ -10159,7 +10159,7 @@ vqrshrun_n_s32_(a, N)
 pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
         fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
     }
@@ -10175,7 +10175,7 @@ vqrshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")]
         fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
     }
@@ -10190,7 +10190,7 @@ vqrshrun_n_s64_(a, N)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
 pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i8")]
         fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -10206,7 +10206,7 @@ vqshl_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
 pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v16i8")]
         fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -10222,7 +10222,7 @@ vqshlq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
 pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i16")]
         fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -10238,7 +10238,7 @@ vqshl_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
 pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i16")]
         fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -10254,7 +10254,7 @@ vqshlq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
 pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i32")]
         fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -10270,7 +10270,7 @@ vqshl_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
 pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i32")]
         fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -10286,7 +10286,7 @@ vqshlq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
 pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v1i64")]
         fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
@@ -10302,7 +10302,7 @@ vqshl_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))]
 pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i64")]
         fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
@@ -10318,7 +10318,7 @@ vqshlq_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
 pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i8")]
         fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
@@ -10334,7 +10334,7 @@ vqshl_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
 pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v16i8")]
         fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
@@ -10350,7 +10350,7 @@ vqshlq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
 pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i16")]
         fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
@@ -10366,7 +10366,7 @@ vqshl_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
 pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i16")]
         fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
@@ -10382,7 +10382,7 @@ vqshlq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
 pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i32")]
         fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
@@ -10398,7 +10398,7 @@ vqshl_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
 pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i32")]
         fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
@@ -10414,7 +10414,7 @@ vqshlq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
 pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v1i64")]
         fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
@@ -10430,7 +10430,7 @@ vqshl_u64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))]
 pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i64")]
         fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
@@ -10639,7 +10639,7 @@ pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
 pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
         fn vqshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -10655,7 +10655,7 @@ vqshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16
 pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v8i8")]
         fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
     }
@@ -10671,7 +10671,7 @@ vqshrn_n_s16_(a, N)
 pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
         fn vqshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -10687,7 +10687,7 @@ vqshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v4i16")]
         fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
     }
@@ -10703,7 +10703,7 @@ vqshrn_n_s32_(a, N)
 pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
         fn vqshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -10719,7 +10719,7 @@ vqshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v2i32")]
         fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
     }
@@ -10735,7 +10735,7 @@ vqshrn_n_s64_(a, N)
 pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
         fn vqshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
     }
@@ -10751,7 +10751,7 @@ vqshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u1
 pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v8i8")]
         fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
     }
@@ -10767,7 +10767,7 @@ vqshrn_n_u16_(a, N)
 pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
         fn vqshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
     }
@@ -10783,7 +10783,7 @@ vqshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32))
 pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v4i16")]
         fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
     }
@@ -10799,7 +10799,7 @@ vqshrn_n_u32_(a, N)
 pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
         fn vqshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
     }
@@ -10815,7 +10815,7 @@ vqshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64))
 pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v2i32")]
         fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
     }
@@ -10831,7 +10831,7 @@ vqshrn_n_u64_(a, N)
 pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
         fn vqshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
     }
@@ -10847,7 +10847,7 @@ vqshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i1
 pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v8i8")]
         fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
     }
@@ -10863,7 +10863,7 @@ vqshrun_n_s16_(a, N)
 pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
         fn vqshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
     }
@@ -10879,7 +10879,7 @@ vqshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v4i16")]
         fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
     }
@@ -10895,7 +10895,7 @@ vqshrun_n_s32_(a, N)
 pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
         fn vqshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
     }
@@ -10911,7 +10911,7 @@ vqshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v2i32")]
         fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
     }
@@ -10926,7 +10926,7 @@ vqshrun_n_s64_(a, N)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
 pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
         fn vrsqrte_f32_(a: float32x2_t) -> float32x2_t;
@@ -10942,7 +10942,7 @@ vrsqrte_f32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
 pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v4f32")]
         fn vrsqrteq_f32_(a: float32x4_t) -> float32x4_t;
@@ -10958,7 +10958,7 @@ vrsqrteq_f32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
 pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f32")]
         fn vrecpe_f32_(a: float32x2_t) -> float32x2_t;
@@ -10974,7 +10974,7 @@ vrecpe_f32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
 pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v4f32")]
         fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t;
@@ -13190,7 +13190,7 @@ pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
 pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i8")]
         fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -13206,7 +13206,7 @@ vrshl_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
 pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v16i8")]
         fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -13222,7 +13222,7 @@ vrshlq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
 pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i16")]
         fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -13238,7 +13238,7 @@ vrshl_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
 pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i16")]
         fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -13254,7 +13254,7 @@ vrshlq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
 pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i32")]
         fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -13270,7 +13270,7 @@ vrshl_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
 pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i32")]
         fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -13286,7 +13286,7 @@ vrshlq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
 pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v1i64")]
         fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
@@ -13302,7 +13302,7 @@ vrshl_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))]
 pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i64")]
         fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
@@ -13318,7 +13318,7 @@ vrshlq_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
 pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i8")]
         fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
@@ -13334,7 +13334,7 @@ vrshl_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
 pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v16i8")]
         fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
@@ -13350,7 +13350,7 @@ vrshlq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
 pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i16")]
         fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
@@ -13366,7 +13366,7 @@ vrshl_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
 pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i16")]
         fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
@@ -13382,7 +13382,7 @@ vrshlq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
 pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i32")]
         fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
@@ -13398,7 +13398,7 @@ vrshl_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
 pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i32")]
         fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
@@ -13414,7 +13414,7 @@ vrshlq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
 pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v1i64")]
         fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
@@ -13430,7 +13430,7 @@ vrshl_u64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))]
 pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i64")]
         fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
@@ -13639,7 +13639,7 @@ pub unsafe fn vrshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
 pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")]
         fn vrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
@@ -13655,7 +13655,7 @@ vrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16
 pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N : i32 where N >= 1 && N <= 8);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v8i8")]
         fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
     }
@@ -13671,7 +13671,7 @@ vrshrn_n_s16_(a, N)
 pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")]
         fn vrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
@@ -13687,7 +13687,7 @@ vrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
 pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N : i32 where N >= 1 && N <= 16);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v4i16")]
         fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
     }
@@ -13703,7 +13703,7 @@ vrshrn_n_s32_(a, N)
 pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")]
         fn vrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
@@ -13719,7 +13719,7 @@ vrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
 pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N : i32 where N >= 1 && N <= 32);
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v2i32")]
         fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
     }
@@ -14250,7 +14250,7 @@ pub unsafe fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
 pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i8")]
         fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
@@ -14266,7 +14266,7 @@ vshl_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
 pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v16i8")]
         fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
@@ -14282,7 +14282,7 @@ vshlq_s8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
 pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i16")]
         fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
@@ -14298,7 +14298,7 @@ vshl_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
 pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i16")]
         fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
@@ -14314,7 +14314,7 @@ vshlq_s16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
 pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i32")]
         fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
@@ -14330,7 +14330,7 @@ vshl_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
 pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i32")]
         fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
@@ -14346,7 +14346,7 @@ vshlq_s32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
 pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v1i64")]
         fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
@@ -14362,7 +14362,7 @@ vshl_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))]
 pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i64")]
         fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
@@ -14378,7 +14378,7 @@ vshlq_s64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
 pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i8")]
         fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
@@ -14394,7 +14394,7 @@ vshl_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
 pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v16i8")]
         fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
@@ -14410,7 +14410,7 @@ vshlq_u8_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
 pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i16")]
         fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
@@ -14426,7 +14426,7 @@ vshl_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
 pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i16")]
         fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
@@ -14442,7 +14442,7 @@ vshlq_u16_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
 pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i32")]
         fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
@@ -14458,7 +14458,7 @@ vshl_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
 pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i32")]
         fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
@@ -14474,7 +14474,7 @@ vshlq_u32_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
 pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v1i64")]
         fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
@@ -14490,7 +14490,7 @@ vshl_u64_(a, b)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))]
 pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i64")]
         fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
@@ -15295,7 +15295,7 @@ pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
 pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i8")]
         fn vqabs_s8_(a: int8x8_t) -> int8x8_t;
@@ -15311,7 +15311,7 @@ vqabs_s8_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
 pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v16i8")]
         fn vqabsq_s8_(a: int8x16_t) -> int8x16_t;
@@ -15327,7 +15327,7 @@ vqabsq_s8_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
 pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i16")]
         fn vqabs_s16_(a: int16x4_t) -> int16x4_t;
@@ -15343,7 +15343,7 @@ vqabs_s16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
 pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i16")]
         fn vqabsq_s16_(a: int16x8_t) -> int16x8_t;
@@ -15359,7 +15359,7 @@ vqabsq_s16_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
 pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i32")]
         fn vqabs_s32_(a: int32x2_t) -> int32x2_t;
@@ -15375,7 +15375,7 @@ vqabs_s32_(a)
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))]
 pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
     #[allow(improper_ctypes)]
-    extern "C" {
+    extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
         #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i32")]
         fn vqabsq_s32_(a: int32x4_t) -> int32x4_t;
diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs
index be8c8ba6bb..493eb05a24 100644
--- a/crates/stdarch-gen/src/main.rs
+++ b/crates/stdarch-gen/src/main.rs
@@ -51,142 +51,34 @@ const FLOAT_TYPES_64: [&str; 2] = [
 ];
 
 fn type_len(t: &str) -> usize {
-    match t {
-        "int8x8_t" => 8,
-        "int8x16_t" => 16,
-        "int16x4_t" => 4,
-        "int16x8_t" => 8,
-        "int32x2_t" => 2,
-        "int32x4_t" => 4,
-        "int64x1_t" => 1,
-        "int64x2_t" => 2,
-        "uint8x8_t" => 8,
-        "uint8x16_t" => 16,
-        "uint16x4_t" => 4,
-        "uint16x8_t" => 8,
-        "uint32x2_t" => 2,
-        "uint32x4_t" => 4,
-        "uint64x1_t" => 1,
-        "uint64x2_t" => 2,
-        "float16x4_t" => 4,
-        "float16x8_t" => 8,
-        "float32x2_t" => 2,
-        "float32x4_t" => 4,
-        "float64x1_t" => 1,
-        "float64x2_t" => 2,
-        "poly8x8_t" => 8,
-        "poly8x16_t" => 16,
-        "poly16x4_t" => 4,
-        "poly16x8_t" => 8,
-        "poly64x1_t" => 1,
-        "poly64x2_t" => 2,
-        "int8x8x2_t" => 16,
-        "int8x8x3_t" => 24,
-        "int8x8x4_t" => 32,
-        "int16x4x2_t" => 8,
-        "int16x4x3_t" => 12,
-        "int16x4x4_t" => 16,
-        "int32x2x2_t" => 4,
-        "int32x2x3_t" => 6,
-        "int32x2x4_t" => 8,
-        "int64x1x2_t" => 2,
-        "int64x1x3_t" => 3,
-        "int64x1x4_t" => 4,
-        "uint8x8x2_t" => 16,
-        "uint8x8x3_t" => 24,
-        "uint8x8x4_t" => 32,
-        "uint16x4x2_t" => 8,
-        "uint16x4x3_t" => 12,
-        "uint16x4x4_t" => 16,
-        "uint32x2x2_t" => 4,
-        "uint32x2x3_t" => 6,
-        "uint32x2x4_t" => 8,
-        "uint64x1x2_t" => 2,
-        "uint64x1x3_t" => 3,
-        "uint64x1x4_t" => 4,
-        "poly8x8x2_t" => 16,
-        "poly8x8x3_t" => 24,
-        "poly8x8x4_t" => 32,
-        "poly16x4x2_t" => 8,
-        "poly16x4x3_t" => 12,
-        "poly16x4x4_t" => 16,
-        "poly64x1x2_t" => 2,
-        "poly64x1x3_t" => 3,
-        "poly64x1x4_t" => 4,
-        "float32x2x2_t" => 4,
-        "float32x2x3_t" => 6,
-        "float32x2x4_t" => 8,
-        "float64x1x2_t" => 2,
-        "float64x1x3_t" => 3,
-        "float64x1x4_t" => 4,
-        "int8x16x2_t" => 32,
-        "int8x16x3_t" => 48,
-        "int8x16x4_t" => 64,
-        "int16x8x2_t" => 16,
-        "int16x8x3_t" => 24,
-        "int16x8x4_t" => 32,
-        "int32x4x2_t" => 8,
-        "int32x4x3_t" => 12,
-        "int32x4x4_t" => 16,
-        "int64x2x2_t" => 4,
-        "int64x2x3_t" => 6,
-        "int64x2x4_t" => 8,
-        "uint8x16x2_t" => 32,
-        "uint8x16x3_t" => 48,
-        "uint8x16x4_t" => 64,
-        "uint16x8x2_t" => 16,
-        "uint16x8x3_t" => 24,
-        "uint16x8x4_t" => 32,
-        "uint32x4x2_t" => 8,
-        "uint32x4x3_t" => 12,
-        "uint32x4x4_t" => 16,
-        "uint64x2x2_t" => 4,
-        "uint64x2x3_t" => 6,
-        "uint64x2x4_t" => 8,
-        "poly8x16x2_t" => 32,
-        "poly8x16x3_t" => 48,
-        "poly8x16x4_t" => 64,
-        "poly16x8x2_t" => 16,
-        "poly16x8x3_t" => 24,
-        "poly16x8x4_t" => 32,
-        "poly64x2x2_t" => 4,
-        "poly64x2x3_t" => 6,
-        "poly64x2x4_t" => 8,
-        "float32x4x2_t" => 8,
-        "float32x4x3_t" => 12,
-        "float32x4x4_t" => 16,
-        "float64x2x2_t" => 4,
-        "float64x2x3_t" => 6,
-        "float64x2x4_t" => 8,
-        "i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "p8"
-        | "p16" | "p64" | "p128" => 1,
-        _ => panic!("unknown type: {}", t),
+    let s: Vec<_> = t.split("x").collect();
+    if s.len() == 2 {
+        match &s[1][0..2] {
+            "1_" => 1,
+            "2_" => 2,
+            "4_" => 4,
+            "8_" => 8,
+            "16" => 16,
+            _ => panic!("unknown type: {}", t),
+        }
+    } else if s.len() == 3 {
+        s[1].parse::<usize>().unwrap() * type_sub_len(t)
+    } else {
+        1
     }
 }
 
 fn type_sub_len(t: &str) -> usize {
-    match t {
-        "int8x8_t" | "int16x4_t" | "int32x2_t" | "int64x1_t" | "uint8x8_t" | "uint16x4_t"
-        | "uint32x2_t" | "uint64x1_t" => 1,
-        "int8x8x2_t" | "int16x4x2_t" | "int32x2x2_t" | "int64x1x2_t" | "uint8x8x2_t"
-        | "uint16x4x2_t" | "uint32x2x2_t" | "uint64x1x2_t" | "poly8x8x2_t" | "poly16x4x2_t"
-        | "poly64x1x2_t" | "float32x2x2_t" | "float64x1x2_t" | "int8x16x2_t" | "int16x8x2_t"
-        | "int32x4x2_t" | "int64x2x2_t" | "uint8x16x2_t" | "uint16x8x2_t" | "uint32x4x2_t"
-        | "uint64x2x2_t" | "poly8x16x2_t" | "poly16x8x2_t" | "poly64x2x2_t" | "float32x4x2_t"
-        | "float64x2x2_t" => 2,
-        "int8x8x3_t" | "int16x4x3_t" | "int32x2x3_t" | "int64x1x3_t" | "uint8x8x3_t"
-        | "uint16x4x3_t" | "uint32x2x3_t" | "uint64x1x3_t" | "poly8x8x3_t" | "poly16x4x3_t"
-        | "poly64x1x3_t" | "float32x2x3_t" | "float64x1x3_t" | "int8x16x3_t" | "int16x8x3_t"
-        | "int32x4x3_t" | "int64x2x3_t" | "uint8x16x3_t" | "uint16x8x3_t" | "uint32x4x3_t"
-        | "uint64x2x3_t" | "poly8x16x3_t" | "poly16x8x3_t" | "poly64x2x3_t" | "float32x4x3_t"
-        | "float64x2x3_t" => 3,
-        "int8x8x4_t" | "int16x4x4_t" | "int32x2x4_t" | "int64x1x4_t" | "uint8x8x4_t"
-        | "uint16x4x4_t" | "uint32x2x4_t" | "uint64x1x4_t" | "poly8x8x4_t" | "poly16x4x4_t"
-        | "poly64x1x4_t" | "float32x2x4_t" | "float64x1x4_t" | "int8x16x4_t" | "int16x8x4_t"
-        | "int32x4x4_t" | "int64x2x4_t" | "uint8x16x4_t" | "uint16x8x4_t" | "uint32x4x4_t"
-        | "uint64x2x4_t" | "poly8x16x4_t" | "poly16x8x4_t" | "poly64x2x4_t" | "float32x4x4_t"
-        | "float64x2x4_t" => 4,
-        _ => panic!("unknown type: {}", t),
+    let s: Vec<_> = t.split('x').collect();
+    if s.len() != 3 {
+        1
+    } else {
+        match s[2] {
+            "2_t" => 2,
+            "3_t" => 3,
+            "4_t" => 4,
+            _ => panic!("unknown type len: {}", t),
+        }
     }
 }
 
@@ -1157,19 +1049,13 @@ fn gen_aarch64(
             link.push_str(&link_aarch64);
             link.replace("_EXT_", ext).replace("_EXT2_", ext2)
         };
-        let abi = if test_fn != "normal" {
-            "unadjusted"
-        } else {
-            "C"
-        };
         ext_c = format!(
             r#"#[allow(improper_ctypes)]
-    extern "{}" {{
+    extern "unadjusted" {{
         #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
         fn {}({}) -> {};
     }}
     "#,
-            abi,
             link_aarch64,
             current_fn,
             match para_num {
@@ -1189,7 +1075,7 @@ fn gen_aarch64(
         if const_aarch64.is_some() {
             ext_c_const = format!(
                 r#"#[allow(improper_ctypes)]
-    extern "C" {{
+    extern "unadjusted" {{
         #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
         fn {}({}) -> {};
     }}
@@ -1737,21 +1623,15 @@ fn gen_arm(
             link.push_str(&link_aarch64);
             link.replace("_EXT_", ext).replace("_EXT2_", ext2)
         };
-        let abi = if test_fn == "load_test" {
-            "unadjusted"
-        } else {
-            "C"
-        };
         if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] {
             ext_c = format!(
                 r#"#[allow(improper_ctypes)]
-    extern "{}" {{
+    extern "unadjusted" {{
         #[cfg_attr(target_arch = "arm", link_name = "{}")]
         #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
         fn {}({}) -> {};
     }}
 "#,
-                abi,
                 link_arm,
                 link_aarch64,
                 current_fn,
@@ -1782,7 +1662,7 @@ fn gen_arm(
             };
             ext_c_arm.push_str(&format!(
                 r#"#[allow(improper_ctypes)]
-    extern "C" {{
+    extern "unadjusted" {{
         #[cfg_attr(target_arch = "arm", link_name = "{}")]
         fn {}({}) -> {};
     }}
@@ -1810,7 +1690,7 @@ fn gen_arm(
         if out_t != link_arm_t[3] {
             ext_c_arm.push_str(&format!(
                 r#"#[allow(improper_ctypes)]
-    extern "C" {{
+    extern "unadjusted" {{
         #[cfg_attr(target_arch = "arm", link_name = "{}")]
         fn {}({}) -> {};
     }}
@@ -1838,7 +1718,7 @@ fn gen_arm(
         if const_aarch64.is_some() {
             ext_c_aarch64.push_str(&format!(
                 r#"#[allow(improper_ctypes)]
-    extern "C" {{
+    extern "unadjusted" {{
         #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
         fn {}({}) -> {};
     }}
@@ -1863,7 +1743,7 @@ fn gen_arm(
         if out_t != link_aarch64_t[3] {
             ext_c_aarch64.push_str(&format!(
                 r#"#[allow(improper_ctypes)]
-    extern "C" {{
+    extern "unadjusted" {{
         #[cfg_attr(target_arch = "aarch64", link_name = "{}")]
         fn {}({}) -> {};
     }}

From fb43b366a0fed8c5d5abb44c987fe46880d6bfe4 Mon Sep 17 00:00:00 2001
From: SparrowLii <liyuan179@huawei.com>
Date: Mon, 23 Aug 2021 14:57:11 +0800
Subject: [PATCH 4/7] correct instr_assert

---
 crates/core_arch/src/arm_shared/neon/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index 118db0f31f..58b21c8316 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -3117,7 +3117,7 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov", IMM5 = 1))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
 // Based on the discussion in https://github.com/rust-lang/stdarch/pull/792
 // `mov` seems to be an acceptable intrinsic to compile to
@@ -3242,7 +3242,7 @@ pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov", IMM5 = 0))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
 pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
     static_assert_imm1!(IMM5);
@@ -3266,7 +3266,7 @@ pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov", IMM5 = 0))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
 pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
     static_assert_imm1!(IMM5);

From 2dae81af9474869d19f61ffe6f6cf774d20e57f9 Mon Sep 17 00:00:00 2001
From: SparrowLii <liyuan179@huawei.com>
Date: Mon, 23 Aug 2021 15:14:42 +0800
Subject: [PATCH 5/7] correct Abi from "C" to "unadjusted" for arm/aarch64

---
 crates/core_arch/src/aarch64/crc.rs            |  2 +-
 crates/core_arch/src/aarch64/neon/mod.rs       |  2 +-
 crates/core_arch/src/aarch64/prefetch.rs       |  2 +-
 crates/core_arch/src/aarch64/tme.rs            |  2 +-
 crates/core_arch/src/arm/dsp.rs                |  2 +-
 crates/core_arch/src/arm/ex.rs                 | 14 +++++++-------
 crates/core_arch/src/arm/mod.rs                |  2 +-
 crates/core_arch/src/arm/neon.rs               |  2 +-
 crates/core_arch/src/arm/simd32.rs             |  2 +-
 crates/core_arch/src/arm_shared/barrier/mod.rs |  2 +-
 crates/core_arch/src/arm_shared/crc.rs         |  2 +-
 crates/core_arch/src/arm_shared/crypto.rs      |  2 +-
 crates/core_arch/src/arm_shared/hints.rs       |  2 +-
 crates/core_arch/src/arm_shared/neon/mod.rs    |  2 +-
 14 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/crates/core_arch/src/aarch64/crc.rs b/crates/core_arch/src/aarch64/crc.rs
index c19d61ca73..6e8128534b 100644
--- a/crates/core_arch/src/aarch64/crc.rs
+++ b/crates/core_arch/src/aarch64/crc.rs
@@ -1,4 +1,4 @@
-extern "C" {
+extern "unadjusted" {
     #[link_name = "llvm.aarch64.crc32x"]
     fn crc32x_(crc: u32, data: u64) -> u32;
 
diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs
index 09f67edaaa..686eb9446a 100644
--- a/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/crates/core_arch/src/aarch64/neon/mod.rs
@@ -56,7 +56,7 @@ pub struct float64x2x4_t(
 );
 
 #[allow(improper_ctypes)]
-extern "C" {
+extern "unadjusted" {
     // absolute value
     #[link_name = "llvm.aarch64.neon.abs.i64"]
     fn vabsd_s64_(a: i64) -> i64;
diff --git a/crates/core_arch/src/aarch64/prefetch.rs b/crates/core_arch/src/aarch64/prefetch.rs
index 02b3971aa4..687c3f39a0 100644
--- a/crates/core_arch/src/aarch64/prefetch.rs
+++ b/crates/core_arch/src/aarch64/prefetch.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "C" {
+extern "unadjusted" {
     #[link_name = "llvm.prefetch"]
     fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
 }
diff --git a/crates/core_arch/src/aarch64/tme.rs b/crates/core_arch/src/aarch64/tme.rs
index edf87b8034..d1b2cf334d 100644
--- a/crates/core_arch/src/aarch64/tme.rs
+++ b/crates/core_arch/src/aarch64/tme.rs
@@ -17,7 +17,7 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-extern "C" {
+extern "unadjusted" {
     #[link_name = "llvm.aarch64.tstart"]
     fn aarch64_tstart() -> u64;
     #[link_name = "llvm.aarch64.tcommit"]
diff --git a/crates/core_arch/src/arm/dsp.rs b/crates/core_arch/src/arm/dsp.rs
index 7039f0351d..6720f97a53 100644
--- a/crates/core_arch/src/arm/dsp.rs
+++ b/crates/core_arch/src/arm/dsp.rs
@@ -32,7 +32,7 @@ types! {
     pub struct uint16x2_t(u16, u16);
 }
 
-extern "C" {
+extern "unadjusted" {
     #[link_name = "llvm.arm.smulbb"]
     fn arm_smulbb(a: i32, b: i32) -> i32;
 
diff --git a/crates/core_arch/src/arm/ex.rs b/crates/core_arch/src/arm/ex.rs
index b9d5047a05..654e07b3af 100644
--- a/crates/core_arch/src/arm/ex.rs
+++ b/crates/core_arch/src/arm/ex.rs
@@ -11,7 +11,7 @@
     doc
 ))]
 pub unsafe fn __clrex() {
-    extern "C" {
+    extern "unadjusted" {
         #[link_name = "llvm.arm.clrex"]
         fn clrex();
     }
@@ -27,7 +27,7 @@ pub unsafe fn __clrex() {
     doc
 ))]
 pub unsafe fn __ldrexb(p: *const u8) -> u8 {
-    extern "C" {
+    extern "unadjusted" {
         #[link_name = "llvm.arm.ldrex.p0i8"]
         fn ldrex8(p: *const u8) -> u32;
     }
@@ -43,7 +43,7 @@ pub unsafe fn __ldrexb(p: *const u8) -> u8 {
     doc
 ))]
 pub unsafe fn __ldrexh(p: *const u16) -> u16 {
-    extern "C" {
+    extern "unadjusted" {
         #[link_name = "llvm.arm.ldrex.p0i16"]
         fn ldrex16(p: *const u16) -> u32;
     }
@@ -60,7 +60,7 @@ pub unsafe fn __ldrexh(p: *const u16) -> u16 {
     doc
 ))]
 pub unsafe fn __ldrex(p: *const u32) -> u32 {
-    extern "C" {
+    extern "unadjusted" {
         #[link_name = "llvm.arm.ldrex.p0i32"]
         fn ldrex32(p: *const u32) -> u32;
     }
@@ -78,7 +78,7 @@ pub unsafe fn __ldrex(p: *const u32) -> u32 {
     doc
 ))]
 pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 {
-    extern "C" {
+    extern "unadjusted" {
         #[link_name = "llvm.arm.strex.p0i8"]
         fn strex8(value: u32, addr: *mut u8) -> u32;
     }
@@ -97,7 +97,7 @@ pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 {
     doc
 ))]
 pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 {
-    extern "C" {
+    extern "unadjusted" {
         #[link_name = "llvm.arm.strex.p0i16"]
         fn strex16(value: u32, addr: *mut u16) -> u32;
     }
@@ -116,7 +116,7 @@ pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 {
     doc
 ))]
 pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 {
-    extern "C" {
+    extern "unadjusted" {
         #[link_name = "llvm.arm.strex.p0i32"]
         fn strex32(value: u32, addr: *mut u32) -> u32;
     }
diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs
index d6b12b8292..3c56ec7b1e 100644
--- a/crates/core_arch/src/arm/mod.rs
+++ b/crates/core_arch/src/arm/mod.rs
@@ -107,7 +107,7 @@ pub unsafe fn __dbg<const IMM4: i32>() {
     dbg(IMM4);
 }
 
-extern "C" {
+extern "unadjusted" {
     #[link_name = "llvm.arm.dbg"]
     fn dbg(_: i32);
 }
diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs
index 473c753fd6..cf3b16f9a0 100644
--- a/crates/core_arch/src/arm/neon.rs
+++ b/crates/core_arch/src/arm/neon.rs
@@ -12,7 +12,7 @@ pub(crate) type p8 = u8;
 pub(crate) type p16 = u16;
 
 #[allow(improper_ctypes)]
-extern "C" {
+extern "unadjusted" {
     #[link_name = "llvm.arm.neon.vbsl.v8i8"]
     fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
     #[link_name = "llvm.arm.neon.vbsl.v16i8"]
diff --git a/crates/core_arch/src/arm/simd32.rs b/crates/core_arch/src/arm/simd32.rs
index 5cae2fc2aa..2d867acc83 100644
--- a/crates/core_arch/src/arm/simd32.rs
+++ b/crates/core_arch/src/arm/simd32.rs
@@ -80,7 +80,7 @@ macro_rules! dsp_call {
     };
 }
 
-extern "C" {
+extern "unadjusted" {
     #[link_name = "llvm.arm.qadd8"]
     fn arm_qadd8(a: i32, b: i32) -> i32;
 
diff --git a/crates/core_arch/src/arm_shared/barrier/mod.rs b/crates/core_arch/src/arm_shared/barrier/mod.rs
index b3cbf44d27..6ccced00e3 100644
--- a/crates/core_arch/src/arm_shared/barrier/mod.rs
+++ b/crates/core_arch/src/arm_shared/barrier/mod.rs
@@ -122,7 +122,7 @@ where
     arg.__isb()
 }
 
-extern "C" {
+extern "unadjusted" {
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dmb")]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")]
     fn dmb(_: i32);
diff --git a/crates/core_arch/src/arm_shared/crc.rs b/crates/core_arch/src/arm_shared/crc.rs
index b1cfbb381b..e0d0fbe356 100644
--- a/crates/core_arch/src/arm_shared/crc.rs
+++ b/crates/core_arch/src/arm_shared/crc.rs
@@ -1,4 +1,4 @@
-extern "C" {
+extern "unadjusted" {
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32b")]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32b")]
     fn crc32b_(crc: u32, data: u32) -> u32;
diff --git a/crates/core_arch/src/arm_shared/crypto.rs b/crates/core_arch/src/arm_shared/crypto.rs
index 4cdebb1da4..56b99088bf 100644
--- a/crates/core_arch/src/arm_shared/crypto.rs
+++ b/crates/core_arch/src/arm_shared/crypto.rs
@@ -1,7 +1,7 @@
 use crate::core_arch::arm_shared::{uint32x4_t, uint8x16_t};
 
 #[allow(improper_ctypes)]
-extern "C" {
+extern "unadjusted" {
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.aese")]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")]
     fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
diff --git a/crates/core_arch/src/arm_shared/hints.rs b/crates/core_arch/src/arm_shared/hints.rs
index 3145cde8d5..1d6551e5e7 100644
--- a/crates/core_arch/src/arm_shared/hints.rs
+++ b/crates/core_arch/src/arm_shared/hints.rs
@@ -80,7 +80,7 @@ pub unsafe fn __nop() {
     asm!("nop", options(nomem, nostack, preserves_flags));
 }
 
-extern "C" {
+extern "unadjusted" {
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")]
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
     fn hint(_: i32);
diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index 58b21c8316..ae03d6a6cc 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -363,7 +363,7 @@ pub struct uint64x2x4_t(
 );
 
 #[allow(improper_ctypes)]
-extern "C" {
+extern "unadjusted" {
     // absolute value (64-bit)
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")]
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v8i8")]

From b0b2939c8c2295f448bc4610534e11016517f6d0 Mon Sep 17 00:00:00 2001
From: SparrowLii <liyuan179@huawei.com>
Date: Tue, 24 Aug 2021 14:39:21 +0800
Subject: [PATCH 6/7] replace with "nop" in assert_instr of vget

---
 crates/core_arch/src/arm_shared/neon/mod.rs | 145 ++++++++++----------
 1 file changed, 70 insertions(+), 75 deletions(-)

diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index ae03d6a6cc..5bcf68c505 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -3117,11 +3117,8 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
-// Based on the discussion in https://github.com/rust-lang/stdarch/pull/792
-// `mov` seems to be an acceptable intrinsic to compile to
-// #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(vmov, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3132,10 +3129,8 @@ pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
-// FIXME: no 32bit this seems to be turned into two vmov.32 instructions
-// validate correctness
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
     static_assert!(IMM5 : i32 where IMM5 == 0);
     simd_extract(v, 0)
@@ -3146,8 +3141,8 @@ pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3158,8 +3153,8 @@ pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.s16", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3170,8 +3165,8 @@ pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3182,8 +3177,8 @@ pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3194,8 +3189,8 @@ pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3206,8 +3201,8 @@ pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.f32", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3218,8 +3213,8 @@ pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.f32", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3230,8 +3225,8 @@ pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
     static_assert!(IMM5 : i32 where IMM5 == 0);
     simd_extract(v, IMM5 as u32)
@@ -3242,8 +3237,8 @@ pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3254,8 +3249,8 @@ pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
     static_assert!(IMM5 : i32 where IMM5 == 0);
     simd_extract(v, IMM5 as u32)
@@ -3266,8 +3261,8 @@ pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3278,8 +3273,8 @@ pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3290,8 +3285,8 @@ pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3302,8 +3297,8 @@ pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.s16", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3314,8 +3309,8 @@ pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3326,8 +3321,8 @@ pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3338,8 +3333,8 @@ pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3350,8 +3345,8 @@ pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.s8", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3362,8 +3357,8 @@ pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3374,8 +3369,8 @@ pub unsafe fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
     static_assert_imm4!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3386,8 +3381,8 @@ pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.s8", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
     static_assert_imm4!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3398,8 +3393,8 @@ pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
     static_assert_imm4!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3519,8 +3514,8 @@ pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
     simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
@@ -3529,8 +3524,8 @@ pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
     simd_shuffle4!(a, a, [0, 1, 2, 3])
 }
@@ -3539,8 +3534,8 @@ pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
     simd_shuffle2!(a, a, [0, 1])
 }
@@ -3549,8 +3544,8 @@ pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
     int64x1_t(simd_extract(a, 0))
 }
@@ -3559,8 +3554,8 @@ pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
     simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
@@ -3569,8 +3564,8 @@ pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
     simd_shuffle4!(a, a, [0, 1, 2, 3])
 }
@@ -3579,8 +3574,8 @@ pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
     simd_shuffle2!(a, a, [0, 1])
 }
@@ -3589,8 +3584,8 @@ pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
     uint64x1_t(simd_extract(a, 0))
 }
@@ -3599,8 +3594,8 @@ pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
     simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
@@ -3609,8 +3604,8 @@ pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
     simd_shuffle4!(a, a, [0, 1, 2, 3])
 }
@@ -3619,8 +3614,8 @@ pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
 pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
     simd_shuffle2!(a, a, [0, 1])
 }

From 4408d0f1cd89f89113e891ac96604537cc22a8e5 Mon Sep 17 00:00:00 2001
From: SparrowLii <liyuan179@huawei.com>
Date: Tue, 24 Aug 2021 20:10:05 +0800
Subject: [PATCH 7/7] simplify assert_instr

---
 crates/core_arch/src/arm_shared/neon/mod.rs | 105 +++++++-------------
 1 file changed, 35 insertions(+), 70 deletions(-)

diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index 5bcf68c505..369bf07e18 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -3117,8 +3117,7 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3129,8 +3128,7 @@ pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
     static_assert!(IMM5 : i32 where IMM5 == 0);
     simd_extract(v, 0)
@@ -3141,8 +3139,7 @@ pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3153,8 +3150,7 @@ pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3165,8 +3161,7 @@ pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3177,8 +3172,7 @@ pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3189,8 +3183,7 @@ pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3201,8 +3194,7 @@ pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3213,8 +3205,7 @@ pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 1))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3225,8 +3216,7 @@ pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
     static_assert!(IMM5 : i32 where IMM5 == 0);
     simd_extract(v, IMM5 as u32)
@@ -3237,8 +3227,7 @@ pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3249,8 +3238,7 @@ pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
     static_assert!(IMM5 : i32 where IMM5 == 0);
     simd_extract(v, IMM5 as u32)
@@ -3261,8 +3249,7 @@ pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 0))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
     static_assert_imm1!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3273,8 +3260,7 @@ pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3285,8 +3271,7 @@ pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3297,8 +3282,7 @@ pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3309,8 +3293,7 @@ pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3321,8 +3304,7 @@ pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
     static_assert_imm2!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3333,8 +3315,7 @@ pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3345,8 +3326,7 @@ pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3357,8 +3337,7 @@ pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
     static_assert_imm3!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3369,8 +3348,7 @@ pub unsafe fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
     static_assert_imm4!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3381,8 +3359,7 @@ pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
     static_assert_imm4!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3393,8 +3370,7 @@ pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", IMM5 = 2))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 pub unsafe fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
     static_assert_imm4!(IMM5);
     simd_extract(v, IMM5 as u32)
@@ -3514,8 +3490,7 @@ pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
     simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
@@ -3524,8 +3499,7 @@ pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
     simd_shuffle4!(a, a, [0, 1, 2, 3])
 }
@@ -3534,8 +3508,7 @@ pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
     simd_shuffle2!(a, a, [0, 1])
 }
@@ -3544,8 +3517,7 @@ pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
     int64x1_t(simd_extract(a, 0))
 }
@@ -3554,8 +3526,7 @@ pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
     simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
@@ -3564,8 +3535,7 @@ pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
     simd_shuffle4!(a, a, [0, 1, 2, 3])
 }
@@ -3574,8 +3544,7 @@ pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
     simd_shuffle2!(a, a, [0, 1])
 }
@@ -3584,8 +3553,7 @@ pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
     uint64x1_t(simd_extract(a, 0))
 }
@@ -3594,8 +3562,7 @@ pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
     simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
@@ -3604,8 +3571,7 @@ pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
     simd_shuffle4!(a, a, [0, 1, 2, 3])
 }
@@ -3614,8 +3580,7 @@ pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
     simd_shuffle2!(a, a, [0, 1])
 }