From 2955de991c027343ccf9a33a9ada94c0d589290e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 22 Jan 2025 15:05:37 +0100
Subject: [PATCH 1/5] implement `vec_round` and friends

---
 crates/core_arch/src/s390x/vector.rs | 284 +++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)
diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index da9ea0157d..420a8176bb 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -73,6 +73,10 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.umin.v8i16"] fn vmnlh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
     #[link_name = "llvm.umin.v4i32"] fn vmnlf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
     #[link_name = "llvm.umin.v2i64"] fn vmnlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vfisb"] fn vfisb(a: vector_float, b: i32, c: i32) -> vector_float;
+    #[link_name = "llvm.s390.vfidb"] fn vfidb(a: vector_double, b: i32, c: i32) -> vector_double;
+
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -629,6 +633,71 @@ mod sealed {
     }
 
     impl_vec_trait! { [VectorOrc vec_orc]+ 2c (orc) }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorRound: Sized {
+        unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_roundc(self) -> Self {
+            self.vec_round_impl::<4, 0>()
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_round(self) -> Self {
+            // NOTE: simd_round resoles ties by rounding away from zero,
+            // while the vec_round function rounds towards zero
+            self.vec_round_impl::<4, 4>()
+        }
+
+        // NOTE: vec_roundz (vec_round_impl::<4, 5>) is the same as vec_trunc
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_trunc(self) -> Self {
+            simd_trunc(self)
+        }
+
+        // NOTE: vec_roundp (vec_round_impl::<4, 6>) is the same as vec_ceil
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_ceil(self) -> Self {
+            simd_ceil(self)
+        }
+
+        // NOTE: vec_roundm (vec_round_impl::<4, 7>) is the same as vec_floor
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_floor(self) -> Self {
+            simd_floor(self)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_rint(self) -> Self {
+            self.vec_round_impl::<0, 0>()
+        }
+    }
+
+    // FIXME(vector-enhancements-1) apply the right target feature to all methods
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorRound for vector_float {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self {
+            vfisb(self, N, MODE)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorRound for vector_double {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self {
+            vfidb(self, N, MODE)
+        }
+    }
 }
 
 /// Vector element-wise addition.
@@ -843,6 +912,125 @@ where
     a.vec_orc(b)
 }
 
+/// Vector floor.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_floor<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    a.vec_floor()
+}
+
+/// Vector ceil.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_ceil<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    a.vec_ceil()
+}
+
+/// Returns a vector containing the truncated values of the corresponding elements of the given vector.
+/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_trunc<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    a.vec_trunc()
+}
+
+/// Vector round, resolves ties by rounding towards zero.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_round<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    a.vec_round()
+}
+
+/// Returns a vector by using the current rounding mode to round every
+/// floating-point element in the given vector to integer.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_roundc<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    a.vec_roundc()
+}
+
+/// Returns a vector containing the largest representable floating-point integral values less
+/// than or equal to the values of the corresponding elements of the given vector.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_roundm<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    // the IBM docs note
+    //
+    // > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception.
+    //
+    // but in practice `vec_floor` also does not trigger that exception, so both are equivalent
+    a.vec_floor()
+}
+
+/// Returns a vector containing the smallest representable floating-point integral values greater
+/// than or equal to the values of the corresponding elements of the given vector.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_roundp<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    // the IBM docs note
+    //
+    // > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception.
+    //
+    // but in practice `vec_ceil` also does not trigger that exception, so both are equivalent
+    a.vec_ceil()
+}
+
+/// Returns a vector containing the truncated values of the corresponding elements of the given vector.
+/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_roundz<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    // the IBM docs note
+    //
+    // > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception.
+    //
+    // but in practice `vec_trunc` also does not trigger that exception, so both are equivalent
+    a.vec_trunc()
+}
+
+/// Returns a vector by using the current rounding mode to round every floating-point element in the given vector to integer.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_rint<T>(a: T) -> T
+where
+    T: sealed::VectorRound,
+{
+    a.vec_rint()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -852,6 +1040,33 @@ mod tests {
     use crate::core_arch::simd::*;
     use stdarch_test::simd_test;
 
+    macro_rules! test_vec_1 {
+        { $name: ident, $fn:ident, f32x4, [$($a:expr),+], ~[$($d:expr),+] } => {
+            #[simd_test(enable = "vector")]
+            unsafe fn $name() {
+                let a: vector_float = transmute(f32x4::new($($a),+));
+
+                let d: vector_float = transmute(f32x4::new($($d),+));
+                let r = transmute(vec_cmple(vec_abs(vec_sub($fn(a), d)), vec_splats(f32::EPSILON)));
+                let e = m32x4::new(true, true, true, true);
+                assert_eq!(e, r);
+            }
+        };
+        { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($d:expr),+] } => {
+            test_vec_1! { $name, $fn, $ty -> $ty, [$($a),+], [$($d),+] }
+        };
+        { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($d:expr),+] } => {
+            #[simd_test(enable = "vector")]
+            unsafe fn $name() {
+                let a: s_t_l!($ty) = transmute($ty::new($($a),+));
+
+                let d = $ty_out::new($($d),+);
+                let r : $ty_out = transmute($fn(a));
+                assert_eq!(d, r);
+            }
+        }
+    }
+
     macro_rules! test_vec_2 {
         { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => {
             test_vec_2! { $name, $fn, $ty -> $ty, [$($a),+], [$($b),+], [$($d),+] }
@@ -1059,4 +1274,73 @@ mod tests {
     [0b11001100, 0b11001100, 0b11001100, 0b11001100],
     [0b00110011, 0b11110011, 0b00001100, 0b00000000],
     [!0b11111111, !0b00111111, !0b11000000, !0b11001100] }
+
+    test_vec_1! { test_vec_floor_f32, vec_floor, f32x4,
+        [1.1, 1.9, -0.5, -0.9],
+        [1.0, 1.0, -1.0, -1.0]
+    }
+
+    test_vec_1! { test_vec_floor_f64_1, vec_floor, f64x2,
+        [1.1, 1.9],
+        [1.0, 1.0]
+    }
+    test_vec_1! { test_vec_floor_f64_2, vec_floor, f64x2,
+        [-0.5, -0.9],
+        [-1.0, -1.0]
+    }
+
+    test_vec_1! { test_vec_ceil_f32, vec_ceil, f32x4,
+        [0.1, 0.5, 0.6, 0.9],
+        [1.0, 1.0, 1.0, 1.0]
+    }
+    test_vec_1! { test_vec_ceil_f64_1, vec_ceil, f64x2,
+        [0.1, 0.5],
+        [1.0, 1.0]
+    }
+    test_vec_1! { test_vec_ceil_f64_2, vec_ceil, f64x2,
+        [0.6, 0.9],
+        [1.0, 1.0]
+    }
+
+    // FIXME(vector-enhancements-1)
+    //    test_vec_1! { test_vec_round_f32, vec_round, f32x4,
+    //        [],
+    //        []
+    //    }
+    test_vec_1! { test_vec_round_f64_1, vec_round, f64x2,
+        [0.1, 0.5],
+        [0.0, 0.0]
+    }
+    test_vec_1! { test_vec_round_f64_2, vec_round, f64x2,
+        [0.6, 0.9],
+        [1.0, 1.0]
+    }
+
+    // FIXME(vector-enhancements-1)
+    //    test_vec_1! { test_vec_roundc_f32, vec_roundc, f32x4,
+    //        [],
+    //        []
+    //    }
+    test_vec_1! { test_vec_roundc_f64_1, vec_roundc, f64x2,
+        [0.1, 0.5],
+        [0.0, 0.0]
+    }
+    test_vec_1! { test_vec_roundc_f64_2, vec_roundc, f64x2,
+        [0.6, 0.9],
+        [1.0, 1.0]
+    }
+
+    // FIXME(vector-enhancements-1)
+    //    test_vec_1! { test_vec_rint_f32, vec_rint, f32x4,
+    //        [],
+    //        []
+    //    }
+    test_vec_1! { test_vec_rint_f64_1, vec_rint, f64x2,
+        [0.1, 0.5],
+        [0.0, 0.0]
+    }
+    test_vec_1! { test_vec_rint_f64_2, vec_rint, f64x2,
+        [0.6, 0.9],
+        [1.0, 1.0]
+    }
 }

From f72a162665314dfd75b0354d5a11ca3d91d82805 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 22 Jan 2025 17:06:52 +0100
Subject: [PATCH 2/5] impl `VectorMax/Min` for `vector_float/double`

these implementations work with just the vector target feature, but they only get a dedicated instruction in vector-enhancements-1
---
 crates/core_arch/src/s390x/macros.rs |  7 +++++++
 crates/core_arch/src/s390x/vector.rs | 14 ++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/crates/core_arch/src/s390x/macros.rs b/crates/core_arch/src/s390x/macros.rs
index 92faf387bd..4c11b1d00e 100644
--- a/crates/core_arch/src/s390x/macros.rs
+++ b/crates/core_arch/src/s390x/macros.rs
@@ -2,6 +2,13 @@
 #![allow(unused_imports)] // FIXME remove when more tests are added
 
 macro_rules! test_impl {
+    ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, _]) => {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        pub unsafe fn $fun ($($v : $ty),*) -> $r {
+            $call ($($v),*)
+        }
+    };
     ($fun:ident +($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr:ident]) => {
         #[inline]
         #[target_feature(enable = "vector")]
diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 420a8176bb..808196dd07 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -330,6 +330,13 @@ mod sealed {
 
     impl_vec_trait! { [VectorMax vec_max] ~(vmxlb, vmxb, vmxlh, vmxh, vmxlf, vmxf, vmxlg, vmxg) }
 
+    // FIXME(vector-enhancements-1) test for the `vfmaxsb` etc. instruction
+    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, _] }
+    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, _] }
+
+    impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float);
+    impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double);
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorMin<Other> {
         type Result;
@@ -348,6 +355,13 @@ mod sealed {
 
     impl_vec_trait! { [VectorMin vec_min] ~(vmxlb, vmxb, vmxlh, vmxh, vmxlf, vmxf, vmxlg, vmxg) }
 
+    // FIXME(vector-enhancements-1) test for the `vfminsb` etc. instruction
+    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, _] }
+    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, _] }
+
+    impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float);
+    impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double);
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorAbs {
         unsafe fn vec_abs(self) -> Self;

From 16693f85c289a4ecc8859d8d575a3a4d43c75a3a Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 22 Jan 2025 20:10:29 +0100
Subject: [PATCH 3/5] simplify the `vec_abs` implementation

---
 crates/core_arch/src/s390x/vector.rs | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 808196dd07..61a17e5611 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -384,21 +384,9 @@ mod sealed {
     impl_abs! { vec_abs_i32, i32x4 }
     impl_abs! { vec_abs_i64, i64x2 }
 
-    #[inline]
-    #[target_feature(enable = "vector")]
-    unsafe fn vec_abs_f32(v: vector_float) -> vector_float {
-        let v: u32x4 = transmute(v);
-
-        transmute(simd_and(v, u32x4::splat(0x7FFFFFFF)))
-    }
-
-    #[inline]
-    #[target_feature(enable = "vector")]
-    unsafe fn vec_abs_f64(v: vector_double) -> vector_double {
-        let v: u64x2 = transmute(v);
-
-        transmute(simd_and(v, u64x2::splat(0x7FFFFFFF_FFFFFFFF)))
-    }
+    // FIXME(vector-enhancements-1)
+    test_impl! { vec_abs_f32 (v: vector_float) -> vector_float [ simd_fabs, _ ] }
+    test_impl! { vec_abs_f64 (v: vector_double) -> vector_double [ simd_fabs, vflpdb ] }
 
     impl_vec_trait! { [VectorAbs vec_abs] vec_abs_f32 (vector_float) }
     impl_vec_trait! { [VectorAbs vec_abs] vec_abs_f64 (vector_double) }

From b2aac897278b061e3df8c7b702d0ef74d841eff2 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 13 Feb 2025 18:56:24 +0100
Subject: [PATCH 4/5] use generic llvm intrinsics for rounding

---
 crates/core_arch/src/s390x/vector.rs | 170 +++++++++++++++------------
 1 file changed, 94 insertions(+), 76 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 61a17e5611..abdab951c2 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -74,9 +74,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.umin.v4i32"] fn vmnlf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
     #[link_name = "llvm.umin.v2i64"] fn vmnlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long;
 
-    #[link_name = "llvm.s390.vfisb"] fn vfisb(a: vector_float, b: i32, c: i32) -> vector_float;
-    #[link_name = "llvm.s390.vfidb"] fn vfidb(a: vector_double, b: i32, c: i32) -> vector_double;
+    #[link_name = "llvm.nearbyint.v4f32"] fn nearbyint_v4f32(a: vector_float) -> vector_float;
+    #[link_name = "llvm.nearbyint.v2f64"] fn nearbyint_v2f64(a: vector_double) -> vector_double;
 
+    #[link_name = "llvm.rint.v4f32"] fn rint_v4f32(a: vector_float) -> vector_float;
+    #[link_name = "llvm.rint.v2f64"] fn rint_v2f64(a: vector_double) -> vector_double;
+
+    #[link_name = "llvm.roundeven.v4f32"] fn roundeven_v4f32(a: vector_float) -> vector_float;
+    #[link_name = "llvm.roundeven.v2f64"] fn roundeven_v2f64(a: vector_double) -> vector_double;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -636,70 +641,67 @@ mod sealed {
 
     impl_vec_trait! { [VectorOrc vec_orc]+ 2c (orc) }
 
+    // FIXME(vector-enhancements-1) add instr tests for f32
+    test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32, _] }
+    test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }
+
+    // FIXME(llvm) roundeven does not yet lower to vfidb (but should in the future)
+    test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
+    test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
+
+    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [rint_v4f32, _] }
+    test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [rint_v2f64, vfidb] }
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
-    pub trait VectorRound: Sized {
-        unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self;
+    pub trait VectorRoundc {
+        unsafe fn vec_roundc(self) -> Self;
+    }
 
-        #[inline]
-        #[target_feature(enable = "vector")]
-        unsafe fn vec_roundc(self) -> Self {
-            self.vec_round_impl::<4, 0>()
-        }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorRound {
+        unsafe fn vec_round(self) -> Self;
+    }
 
-        #[inline]
-        #[target_feature(enable = "vector")]
-        unsafe fn vec_round(self) -> Self {
-            // NOTE: simd_round resoles ties by rounding away from zero,
-            // while the vec_round function rounds towards zero
-            self.vec_round_impl::<4, 4>()
-        }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorRint {
+        unsafe fn vec_rint(self) -> Self;
+    }
 
-        // NOTE: vec_roundz (vec_round_impl::<4, 5>) is the same as vec_trunc
-        #[inline]
-        #[target_feature(enable = "vector")]
-        unsafe fn vec_trunc(self) -> Self {
-            simd_trunc(self)
-        }
+    impl_vec_trait! { [VectorRoundc vec_roundc] vec_roundc_f32 (vector_float) }
+    impl_vec_trait! { [VectorRoundc vec_roundc] vec_roundc_f64 (vector_double) }
 
-        // NOTE: vec_roundp (vec_round_impl::<4, 6>) is the same as vec_ceil
-        #[inline]
-        #[target_feature(enable = "vector")]
-        unsafe fn vec_ceil(self) -> Self {
-            simd_ceil(self)
-        }
+    impl_vec_trait! { [VectorRound vec_round] vec_round_f32 (vector_float) }
+    impl_vec_trait! { [VectorRound vec_round] vec_round_f64 (vector_double) }
 
-        // NOTE: vec_roundm (vec_round_impl::<4, 7>) is the same as vec_floor
-        #[inline]
-        #[target_feature(enable = "vector")]
-        unsafe fn vec_floor(self) -> Self {
-            simd_floor(self)
-        }
+    impl_vec_trait! { [VectorRint vec_rint] vec_rint_f32 (vector_float) }
+    impl_vec_trait! { [VectorRint vec_rint] vec_rint_f64 (vector_double) }
 
-        #[inline]
-        #[target_feature(enable = "vector")]
-        unsafe fn vec_rint(self) -> Self {
-            self.vec_round_impl::<0, 0>()
-        }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorTrunc {
+        // same as vec_roundz
+        unsafe fn vec_trunc(self) -> Self;
     }
 
-    // FIXME(vector-enhancements-1) apply the right target feature to all methods
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
-    impl VectorRound for vector_float {
-        #[inline]
-        #[target_feature(enable = "vector")]
-        unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self {
-            vfisb(self, N, MODE)
-        }
+    pub trait VectorCeil {
+        // same as vec_roundp
+        unsafe fn vec_ceil(self) -> Self;
     }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
-    impl VectorRound for vector_double {
-        #[inline]
-        #[target_feature(enable = "vector")]
-        unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self {
-            vfidb(self, N, MODE)
-        }
+    pub trait VectorFloor {
+        // same as vec_roundm
+        unsafe fn vec_floor(self) -> Self;
     }
+
+    impl_vec_trait! { [VectorTrunc vec_trunc] simd_trunc (vector_float) }
+    impl_vec_trait! { [VectorTrunc vec_trunc] simd_trunc (vector_double) }
+
+    impl_vec_trait! { [VectorCeil vec_ceil] simd_ceil (vector_float) }
+    impl_vec_trait! { [VectorCeil vec_ceil] simd_ceil (vector_double) }
+
+    impl_vec_trait! { [VectorFloor vec_floor] simd_floor (vector_float) }
+    impl_vec_trait! { [VectorFloor vec_floor] simd_floor (vector_double) }
 }
 
 /// Vector element-wise addition.
@@ -920,7 +922,7 @@ where
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_floor<T>(a: T) -> T
 where
-    T: sealed::VectorRound,
+    T: sealed::VectorFloor,
 {
     a.vec_floor()
 }
@@ -931,7 +933,7 @@ where
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_ceil<T>(a: T) -> T
 where
-    T: sealed::VectorRound,
+    T: sealed::VectorCeil,
 {
     a.vec_ceil()
 }
@@ -943,12 +945,13 @@ where
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_trunc<T>(a: T) -> T
 where
-    T: sealed::VectorRound,
+    T: sealed::VectorTrunc,
 {
     a.vec_trunc()
 }
 
-/// Vector round, resolves ties by rounding towards zero.
+/// Returns a vector containing the rounded values to the nearest representable floating-point integer,
+/// using IEEE round-to-nearest rounding, of the corresponding elements of the given vector
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -966,7 +969,7 @@ where
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_roundc<T>(a: T) -> T
 where
-    T: sealed::VectorRound,
+    T: sealed::VectorRoundc,
 {
     a.vec_roundc()
 }
@@ -978,7 +981,7 @@ where
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_roundm<T>(a: T) -> T
 where
-    T: sealed::VectorRound,
+    T: sealed::VectorFloor,
 {
     // the IBM docs note
     //
@@ -995,7 +998,7 @@ where
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_roundp<T>(a: T) -> T
 where
-    T: sealed::VectorRound,
+    T: sealed::VectorCeil,
 {
     // the IBM docs note
     //
@@ -1012,7 +1015,7 @@ where
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_roundz<T>(a: T) -> T
 where
-    T: sealed::VectorRound,
+    T: sealed::VectorTrunc,
 {
     // the IBM docs note
     //
@@ -1028,7 +1031,7 @@ where
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_rint<T>(a: T) -> T
 where
-    T: sealed::VectorRound,
+    T: sealed::VectorRint,
 {
     a.vec_rint()
 }
@@ -1304,11 +1307,16 @@ mod tests {
         [1.0, 1.0]
     }
 
-    // FIXME(vector-enhancements-1)
-    //    test_vec_1! { test_vec_round_f32, vec_round, f32x4,
-    //        [],
-    //        []
-    //    }
+    test_vec_1! { test_vec_round_f32, vec_round, f32x4,
+        [0.1, 0.5, 0.6, 0.9],
+        [0.0, 0.0, 1.0, 1.0]
+    }
+
+    test_vec_1! { test_vec_round_f32_even_odd, vec_round, f32x4,
+        [0.5, 1.5, 2.5, 3.5],
+        [0.0, 2.0, 2.0, 4.0]
+    }
+
     test_vec_1! { test_vec_round_f64_1, vec_round, f64x2,
         [0.1, 0.5],
         [0.0, 0.0]
@@ -1318,11 +1326,16 @@ mod tests {
         [1.0, 1.0]
     }
 
-    // FIXME(vector-enhancements-1)
-    //    test_vec_1! { test_vec_roundc_f32, vec_roundc, f32x4,
-    //        [],
-    //        []
-    //    }
+    test_vec_1! { test_vec_roundc_f32, vec_roundc, f32x4,
+        [0.1, 0.5, 0.6, 0.9],
+        [0.0, 0.0, 1.0, 1.0]
+    }
+
+    test_vec_1! { test_vec_roundc_f32_even_odd, vec_roundc, f32x4,
+        [0.5, 1.5, 2.5, 3.5],
+        [0.0, 2.0, 2.0, 4.0]
+    }
+
     test_vec_1! { test_vec_roundc_f64_1, vec_roundc, f64x2,
         [0.1, 0.5],
         [0.0, 0.0]
@@ -1332,11 +1345,16 @@ mod tests {
         [1.0, 1.0]
     }
 
-    // FIXME(vector-enhancements-1)
-    //    test_vec_1! { test_vec_rint_f32, vec_rint, f32x4,
-    //        [],
-    //        []
-    //    }
+    test_vec_1! { test_vec_rint_f32, vec_rint, f32x4,
+        [0.1, 0.5, 0.6, 0.9],
+        [0.0, 0.0, 1.0, 1.0]
+    }
+
+    test_vec_1! { test_vec_rint_f32_even_odd, vec_rint, f32x4,
+        [0.5, 1.5, 2.5, 3.5],
+        [0.0, 2.0, 2.0, 4.0]
+    }
+
     test_vec_1! { test_vec_rint_f64_1, vec_rint, f64x2,
         [0.1, 0.5],
         [0.0, 0.0]

From b824fabddadc1fdaa986e730a74d0c09ee52d9a7 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 22 Feb 2025 15:51:30 +0100
Subject: [PATCH 5/5] start using the newly added target features for testing

---
 ci/run.sh                            |  4 +++
 crates/core_arch/src/s390x/macros.rs | 16 ++++++++++
 crates/core_arch/src/s390x/vector.rs | 47 ++++++++++++++--------------
 3 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/ci/run.sh b/ci/run.sh
index 28d53c5375..da64602ce8 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -124,6 +124,10 @@ case ${TARGET} in
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa"
         cargo_test "${PROFILE}"
 	      ;;
+    s390x*)
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vector-enhancements-1"
+        cargo_test "${PROFILE}"
+	      ;;
     powerpc64*)
         # We don't build the ppc 32-bit targets with these - these targets
         # are mostly unsupported for now.
diff --git a/crates/core_arch/src/s390x/macros.rs b/crates/core_arch/src/s390x/macros.rs
index 4c11b1d00e..fd260bbc74 100644
--- a/crates/core_arch/src/s390x/macros.rs
+++ b/crates/core_arch/src/s390x/macros.rs
@@ -17,6 +17,22 @@ macro_rules! test_impl {
             transmute($call ($($v),*))
         }
     };
+    ($fun:ident +($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $tf:literal $instr:ident]) => {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        #[cfg_attr(all(test, target_feature = $tf), assert_instr($instr))]
+        pub unsafe fn $fun ($($v : $ty),*) -> $r {
+            transmute($call ($($v),*))
+        }
+    };
+    ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $tf:literal $instr:ident]) => {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        #[cfg_attr(all(test, target_feature = $tf), assert_instr($instr))]
+        pub unsafe fn $fun ($($v : $ty),*) -> $r {
+            $call ($($v),*)
+        }
+    };
     ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr:ident]) => {
         #[inline]
         #[target_feature(enable = "vector")]
diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index abdab951c2..1b59a4c474 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -163,8 +163,7 @@ mod sealed {
 
         #[inline]
         #[target_feature(enable = "vector")]
-        // FIXME: "vfasb" is part of vector enhancements 1, add a test for it when possible
-        // #[cfg_attr(test, assert_instr(vfasb))]
+        #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vfasb))]
         pub unsafe fn va_float(a: vector_float, b: vector_float) -> vector_float {
             transmute(simd_add(a, b))
         }
@@ -247,8 +246,7 @@ mod sealed {
 
         #[inline]
         #[target_feature(enable = "vector")]
-        // FIXME: "vfssb" is part of vector enhancements 1, add a test for it when possible
-        // #[cfg_attr(test, assert_instr(vfasb))]
+        #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vfssb))]
         pub unsafe fn vs_float(a: vector_float, b: vector_float) -> vector_float {
             transmute(simd_sub(a, b))
         }
@@ -335,9 +333,8 @@ mod sealed {
 
     impl_vec_trait! { [VectorMax vec_max] ~(vmxlb, vmxb, vmxlh, vmxh, vmxlf, vmxf, vmxlg, vmxg) }
 
-    // FIXME(vector-enhancements-1) test for the `vfmaxsb` etc. instruction
-    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, _] }
-    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, _] }
+    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, "vector-enhancements-1" vfmaxsb ] }
+    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, "vector-enhancements-1" vfmaxdb] }
 
     impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float);
     impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double);
@@ -360,9 +357,8 @@ mod sealed {
 
     impl_vec_trait! { [VectorMin vec_min] ~(vmxlb, vmxb, vmxlh, vmxh, vmxlf, vmxf, vmxlg, vmxg) }
 
-    // FIXME(vector-enhancements-1) test for the `vfminsb` etc. instruction
-    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, _] }
-    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, _] }
+    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, "vector-enhancements-1" vfminsb]  }
+    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, "vector-enhancements-1" vfmindb]  }
 
     impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float);
     impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double);
@@ -389,8 +385,7 @@ mod sealed {
     impl_abs! { vec_abs_i32, i32x4 }
     impl_abs! { vec_abs_i64, i64x2 }
 
-    // FIXME(vector-enhancements-1)
-    test_impl! { vec_abs_f32 (v: vector_float) -> vector_float [ simd_fabs, _ ] }
+    test_impl! { vec_abs_f32 (v: vector_float) -> vector_float [ simd_fabs, "vector-enhancements-1" vflpsb ] }
     test_impl! { vec_abs_f64 (v: vector_double) -> vector_double [ simd_fabs, vflpdb ] }
 
     impl_vec_trait! { [VectorAbs vec_abs] vec_abs_f32 (vector_float) }
@@ -527,10 +522,15 @@ mod sealed {
     test_impl! { vec_ctzf_unsigned +(a: vector_unsigned_int) -> vector_unsigned_int [simd_cttz, vctzf] }
     test_impl! { vec_ctzg_unsigned +(a: vector_unsigned_long_long) -> vector_unsigned_long_long [simd_cttz, vctzg] }
 
-    // FIXME(vector-enhancements-1) other integer types are emulated, but get their own
-    // instructions in later facilities. Add tests when possible.
-    test_impl! { vec_popcnt_signed +(a: vector_signed_char) -> vector_signed_char [simd_ctpop, vpopctb] }
-    test_impl! { vec_popcnt_unsigned +(a: vector_unsigned_char) -> vector_unsigned_char [simd_ctpop, vpopctb] }
+    test_impl! { vec_vpopctb_signed +(a: vector_signed_char) -> vector_signed_char [simd_ctpop, vpopctb] }
+    test_impl! { vec_vpopcth_signed +(a: vector_signed_short) -> vector_signed_short [simd_ctpop, "vector-enhancements-1" vpopcth] }
+    test_impl! { vec_vpopctf_signed +(a: vector_signed_int) -> vector_signed_int [simd_ctpop, "vector-enhancements-1" vpopctf] }
+    test_impl! { vec_vpopctg_signed +(a: vector_signed_long_long) -> vector_signed_long_long [simd_ctpop, "vector-enhancements-1" vpopctg] }
+
+    test_impl! { vec_vpopctb_unsigned +(a: vector_unsigned_char) -> vector_unsigned_char [simd_ctpop, vpopctb] }
+    test_impl! { vec_vpopcth_unsigned +(a: vector_unsigned_short) -> vector_unsigned_short [simd_ctpop, "vector-enhancements-1" vpopcth] }
+    test_impl! { vec_vpopctf_unsigned +(a: vector_unsigned_int) -> vector_unsigned_int [simd_ctpop, "vector-enhancements-1" vpopctf] }
+    test_impl! { vec_vpopctg_unsigned +(a: vector_unsigned_long_long) -> vector_unsigned_long_long [simd_ctpop, "vector-enhancements-1" vpopctg] }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorAnd<Other> {
@@ -558,7 +558,7 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "vector")]
-    // FIXME(vector-enhancements-1) #[cfg_attr(test, assert_instr(vno))]
+    #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vno))]
     unsafe fn nor(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char {
         let a: u8x16 = transmute(a);
         let b: u8x16 = transmute(b);
@@ -575,7 +575,7 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "vector")]
-    // FIXME(vector-enhancements-1) #[cfg_attr(test, assert_instr(vnn))]
+    #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vnn))]
     unsafe fn nand(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char {
         let a: u8x16 = transmute(a);
         let b: u8x16 = transmute(b);
@@ -592,7 +592,7 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "vector")]
-    // FIXME(vector-enhancements-1) #[cfg_attr(test, assert_instr(vnx))]
+    #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vnx))]
     unsafe fn eqv(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char {
         let a: u8x16 = transmute(a);
         let b: u8x16 = transmute(b);
@@ -609,7 +609,7 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "vector")]
-    // FIXME(vector-enhancements-1) #[cfg_attr(test, assert_instr(vnc))]
+    #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vnc))]
     unsafe fn andc(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char {
         let a = transmute(a);
         let b = transmute(b);
@@ -626,7 +626,7 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "vector")]
-    // FIXME(vector-enhancements-1) #[cfg_attr(test, assert_instr(voc))]
+    #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(voc))]
     unsafe fn orc(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char {
         let a = transmute(a);
         let b = transmute(b);
@@ -641,15 +641,14 @@ mod sealed {
 
     impl_vec_trait! { [VectorOrc vec_orc]+ 2c (orc) }
 
-    // FIXME(vector-enhancements-1) add instr tests for f32
-    test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32, _] }
+    test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32,  "vector-enhancements-1" vfisb] }
     test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }
 
     // FIXME(llvm) roundeven does not yet lower to vfidb (but should in the future)
     test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
     test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
 
-    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [rint_v4f32, _] }
+    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [rint_v4f32, "vector-enhancements-1" vfisb] }
     test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [rint_v2f64, vfidb] }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]