Skip to content

Commit 016eff9

Browse files
authored
Add vcvtq_u32_f32 and vcvtq_s32_f32 (#902)
1 parent 851aa9f commit 016eff9

File tree

2 files changed

+99
-0
lines changed

2 files changed

+99
-0
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

+56
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ extern "C" {
285285
b3: int8x16_t,
286286
c: uint8x16_t,
287287
) -> int8x16_t;
288+
289+
#[link_name = "llvm.aarch64.neon.fcvtzu.v4i32.v4f32"]
290+
fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
291+
#[link_name = "llvm.aarch64.neon.fcvtzs.v4i32.v4f32"]
292+
fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
288293
}
289294

290295
/// Absolute Value (wrapping).
@@ -1838,6 +1843,21 @@ pub unsafe fn vld1q_u32(addr: *const u32) -> uint32x4_t {
18381843
))
18391844
}
18401845

1846+
#[inline]
1847+
#[target_feature(enable = "neon")]
1848+
#[cfg_attr(test, assert_instr(fcvtzs))]
1849+
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
1850+
vcvtq_s32_f32_(a)
1851+
}
1852+
1853+
/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
1854+
#[inline]
1855+
#[target_feature(enable = "neon")]
1856+
#[cfg_attr(test, assert_instr(fcvtzu))]
1857+
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
1858+
vcvtq_u32_f32_(a)
1859+
}
1860+
18411861
#[cfg(test)]
18421862
mod tests {
18431863
use crate::core_arch::aarch64::test_support::*;
@@ -1846,6 +1866,42 @@ mod tests {
18461866
use std::mem::transmute;
18471867
use stdarch_test::simd_test;
18481868

1869+
#[simd_test(enable = "neon")]
1870+
unsafe fn test_vcvtq_s32_f32() {
1871+
let f = f32x4::new(-1., 2., 3., 4.);
1872+
let e = i32x4::new(-1, 2, 3, 4);
1873+
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
1874+
assert_eq!(r, e);
1875+
1876+
let f = f32x4::new(10e37, 2., 3., 4.);
1877+
let e = i32x4::new(0x7fffffff, 2, 3, 4);
1878+
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
1879+
assert_eq!(r, e);
1880+
1881+
let f = f32x4::new(-10e37, 2., 3., 4.);
1882+
let e = i32x4::new(-0x80000000, 2, 3, 4);
1883+
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
1884+
assert_eq!(r, e);
1885+
}
1886+
1887+
#[simd_test(enable = "neon")]
1888+
unsafe fn test_vcvtq_u32_f32() {
1889+
let f = f32x4::new(1., 2., 3., 4.);
1890+
let e = u32x4::new(1, 2, 3, 4);
1891+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1892+
assert_eq!(r, e);
1893+
1894+
let f = f32x4::new(-1., 2., 3., 4.);
1895+
let e = u32x4::new(0, 2, 3, 4);
1896+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1897+
assert_eq!(r, e);
1898+
1899+
let f = f32x4::new(10e37, 2., 3., 4.);
1900+
let e = u32x4::new(0xffffffff, 2, 3, 4);
1901+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1902+
assert_eq!(r, e);
1903+
}
1904+
18491905
#[simd_test(enable = "neon")]
18501906
unsafe fn test_vld1q_f32() {
18511907
let e = f32x4::new(1., 2., 3., 4.);

crates/core_arch/src/arm/neon/mod.rs

+43
Original file line numberDiff line numberDiff line change
@@ -1811,6 +1811,31 @@ pub unsafe fn vld1q_dup_f32(addr: *const f32) -> float32x4_t {
18111811
transmute(f32x4::new(v, v, v, v))
18121812
}
18131813

1814+
// These float-to-int implementations have undefined behaviour when `a` overflows
1815+
// the destination type. Clang has the same problem: https://llvm.org/PR47510
1816+
1817+
/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector)
1818+
#[inline]
1819+
#[cfg(target_arch = "arm")]
1820+
#[target_feature(enable = "neon")]
1821+
#[target_feature(enable = "v7")]
1822+
#[cfg_attr(test, assert_instr("vcvt.s32.f32"))]
1823+
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
1824+
use crate::core_arch::simd::{f32x4, i32x4};
1825+
transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a)))
1826+
}
1827+
1828+
/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
1829+
#[inline]
1830+
#[cfg(target_arch = "arm")]
1831+
#[target_feature(enable = "neon")]
1832+
#[target_feature(enable = "v7")]
1833+
#[cfg_attr(test, assert_instr("vcvt.u32.f32"))]
1834+
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
1835+
use crate::core_arch::simd::{f32x4, u32x4};
1836+
transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a)))
1837+
}
1838+
18141839
#[cfg(test)]
18151840
mod tests {
18161841
use super::*;
@@ -1876,6 +1901,24 @@ mod tests {
18761901
assert_eq!(r, e);
18771902
}
18781903

1904+
#[cfg(target_arch = "arm")]
1905+
#[simd_test(enable = "neon")]
1906+
unsafe fn test_vcvtq_s32_f32() {
1907+
let f = f32x4::new(-1., 2., 3., 4.);
1908+
let e = i32x4::new(-1, 2, 3, 4);
1909+
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
1910+
assert_eq!(r, e);
1911+
}
1912+
1913+
#[cfg(target_arch = "arm")]
1914+
#[simd_test(enable = "neon")]
1915+
unsafe fn test_vcvtq_u32_f32() {
1916+
let f = f32x4::new(1., 2., 3., 4.);
1917+
let e = u32x4::new(1, 2, 3, 4);
1918+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1919+
assert_eq!(r, e);
1920+
}
1921+
18791922
#[simd_test(enable = "neon")]
18801923
unsafe fn test_vget_lane_u8() {
18811924
let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);

0 commit comments

Comments
 (0)