Skip to content

fix - aarch64_be tests #1786

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ jobs:
os: ubuntu-latest
- tuple: aarch64-unknown-linux-gnu
os: ubuntu-latest
- target:
tuple: aarch64_be-unknown-linux-gnu
os: ubuntu-latest
- tuple: riscv64gc-unknown-linux-gnu
os: ubuntu-latest
- tuple: powerpc-unknown-linux-gnu
Expand Down Expand Up @@ -125,6 +128,11 @@ jobs:
tuple: aarch64-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
- target:
tuple: aarch64_be-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
build_std: true
- target:
tuple: armv7-unknown-linux-gnueabihf
os: ubuntu-latest
Expand Down Expand Up @@ -193,13 +201,16 @@ jobs:

steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
- run: rustup target add ${{ matrix.target.tuple }}
if: matrix.build_std == ''
- run: |
rustup component add rust-src
echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV
if: matrix.build_std != ''
- run: cargo generate-lockfile

# Configure some env vars based on matrix configuration
Expand Down
2 changes: 1 addition & 1 deletion ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"

ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}"
ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/bin/aarch64-none-linux-gnu-objdump"
ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump"
ENV STDARCH_TEST_SKIP_FEATURE=tme
1 change: 1 addition & 0 deletions ci/run-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ run() {
--env NOSTD \
--env NORUN \
--env RUSTFLAGS \
--env CARGO_UNSTABLE_BUILD_STD \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to be used anywhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would get used by the CI, thinking about it; it probably makes sense for this PR to also include the code to make the CI run seeing as that's why these changes are being made

--volume "${HOME}/.cargo":/cargo \
--volume "$(rustc --print sysroot)":/rust:ro \
--volume "$(pwd)":/checkout:ro \
Expand Down
2 changes: 1 addition & 1 deletion ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ case "${TARGET}" in
--cppcompiler "${TEST_CXX_COMPILER}" \
--skip "${TEST_SKIP_INTRINSICS}" \
--target "${TARGET}" \
--linker "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER}" \
--linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
--cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
;;
*)
Expand Down
72 changes: 72 additions & 0 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24470,6 +24470,7 @@ pub fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "little")]
#[cfg_attr(test, assert_instr(rsubhn2))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
Expand All @@ -24480,6 +24481,7 @@ pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "little")]
#[cfg_attr(test, assert_instr(rsubhn2))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
Expand All @@ -24490,6 +24492,7 @@ pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "little")]
#[cfg_attr(test, assert_instr(rsubhn2))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
Expand All @@ -24500,6 +24503,7 @@ pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "little")]
#[cfg_attr(test, assert_instr(rsubhn2))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
Expand All @@ -24510,6 +24514,7 @@ pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "little")]
#[cfg_attr(test, assert_instr(rsubhn2))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
Expand All @@ -24520,12 +24525,79 @@ pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "little")]
#[cfg_attr(test, assert_instr(rsubhn2))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
let x: uint32x2_t = vrsubhn_u64(b, c);
unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
}
#[doc = "Rounding subtract returning high narrow"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "big")]
#[cfg_attr(test, assert_instr(rsubhn))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
let x: int8x8_t = vrsubhn_s16(b, c);
unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
}
#[doc = "Rounding subtract returning high narrow"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "big")]
#[cfg_attr(test, assert_instr(rsubhn))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
let x: int16x4_t = vrsubhn_s32(b, c);
unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
}
#[doc = "Rounding subtract returning high narrow"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "big")]
#[cfg_attr(test, assert_instr(rsubhn))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
let x: int32x2_t = vrsubhn_s64(b, c);
unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
}
#[doc = "Rounding subtract returning high narrow"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "big")]
#[cfg_attr(test, assert_instr(rsubhn))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
let x: uint8x8_t = vrsubhn_u16(b, c);
unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
}
#[doc = "Rounding subtract returning high narrow"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "big")]
#[cfg_attr(test, assert_instr(rsubhn))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
let x: uint16x4_t = vrsubhn_u32(b, c);
unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
}
#[doc = "Rounding subtract returning high narrow"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
#[inline]
#[target_feature(enable = "neon")]
#[cfg(target_endian = "big")]
#[cfg_attr(test, assert_instr(rsubhn))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
let x: uint32x2_t = vrsubhn_u64(b, c);
unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
}
#[doc = "Insert vector element from another vector element"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"]
#[inline]
Expand Down
2 changes: 2 additions & 0 deletions crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ macro_rules! test_vtbl {
- table[$table_t:ident]: [$($table_v:expr),*] |
$(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|*
) => {
#[cfg(target_endian = "little")]
#[simd_test(enable = "neon")]
unsafe fn $test_name() {
// create table as array, and transmute it to
Expand Down Expand Up @@ -168,6 +169,7 @@ macro_rules! test_vtbx {
- ext[$ext_t:ident]: [$($ext_v:expr),*] |
$(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|*
) => {
#[cfg(target_endian = "little")]
#[simd_test(enable = "neon")]
unsafe fn $test_name() {
// create table as array, and transmute it to
Expand Down
2 changes: 1 addition & 1 deletion crates/simd-test-macro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ pub fn simd_test(
{
"i686" | "x86_64" | "i586" => "is_x86_feature_detected",
"arm" | "armv7" => "is_arm_feature_detected",
"aarch64" | "arm64ec" => "is_aarch64_feature_detected",
"aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected",
"powerpc" | "powerpcle" => "is_powerpc_feature_detected",
"powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected",
Expand Down
1 change: 1 addition & 0 deletions crates/std_detect/src/detect/os/linux/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ impl AtHwcap {
}
}

#[cfg(target_endian = "little")]
#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 2 additions & 0 deletions crates/std_detect/src/detect/os/linux/auxvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ mod tests {
assert_eq!(v.hwcap2, 0);
}
} else if #[cfg(target_arch = "aarch64")] {
#[cfg(target_endian = "little")]
#[test]
fn linux_artificial_aarch64() {
let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv");
Expand All @@ -298,6 +299,7 @@ mod tests {
assert_eq!(v.hwcap, 0x0123456789abcdef);
assert_eq!(v.hwcap2, 0x02468ace13579bdf);
}
#[cfg(target_endian = "little")]
#[test]
fn linux_no_hwcap2_aarch64() {
let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv");
Expand Down
32 changes: 32 additions & 0 deletions crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ neon-unstable-f16: &neon-unstable-f16
neon-unstable-feat-lut: &neon-unstable-feat-lut
FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']]

# #[cfg(target_endian = "little")]
little-endian: &little-endian
FnCall: [cfg, ['target_endian = "little"']]

# #[cfg(target_endian = "big")]
big-endian: &big-endian
FnCall: [cfg, ['target_endian = "big"']]

intrinsics:
- name: "vaddd_{type}"
doc: Add
Expand Down Expand Up @@ -8906,6 +8914,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
return_type: "{neon_type[3]}"
attr:
- *little-endian
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]]
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
safety: safe
Expand All @@ -8923,6 +8932,29 @@ intrinsics:
- FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
- FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]

- name: "vrsubhn_high_{neon_type[1]}"
doc: "Rounding subtract returning high narrow"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
return_type: "{neon_type[3]}"
attr:
- *big-endian
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]]
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
safety: safe
types:
- [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
- [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
- [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
- [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
- [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
- [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
compose:
- Let:
- x
- "{neon_type[0]}"
- FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
- FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]

- name: "vcopy{neon_type[0].lane_nox}"
doc: "Insert vector element from another vector element"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
Expand Down
10 changes: 9 additions & 1 deletion crates/stdarch-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,15 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
// Original limit was 20 instructions, but ARM DSP Intrinsics
// are exactly 20 instructions long. So, bump the limit to 22
// instead of adding here a long list of exceptions.
_ => 22,
_ => {
// aarch64_be may add reverse instructions which increases
// the number of instructions generated.
if cfg!(all(target_endian = "big", target_arch = "aarch64")) {
32
} else {
22
}
}
},
|v| v.parse().unwrap(),
);
Expand Down
Loading