diff --git a/compiler-builtins/src/arm_linux.rs b/compiler-builtins/src/arm_linux.rs index 8f22eb628..aeb3ff3e5 100644 --- a/compiler-builtins/src/arm_linux.rs +++ b/compiler-builtins/src/arm_linux.rs @@ -1,5 +1,6 @@ -use core::intrinsics; +use core::arch; use core::mem; +use core::sync::atomic::{AtomicU32, Ordering}; // Kernel-provided user-mode helper functions: // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt @@ -7,6 +8,7 @@ unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool { let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ()); f(oldval, newval, ptr) == 0 } + unsafe fn __kuser_memory_barrier() { let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ()); f(); @@ -54,13 +56,52 @@ fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 { (aligned & !(mask << shift)) | ((val & mask) << shift) } +/// Performs a relaxed atomic load of 4 bytes at `ptr`. Some of the bytes are allowed to be out of +/// bounds as long as `size_of::()` bytes are in bounds. +/// +/// # Safety +/// +/// - `ptr` must be 4-aligned. +/// - `size_of::()` must be at most 4. +/// - if `size_of::() == 1`, `ptr` or `ptr` offset by 1, 2 or 3 bytes must be valid for a relaxed +/// atomic read of 1 byte. +/// - if `size_of::() == 2`, `ptr` or `ptr` offset by 2 bytes must be valid for a relaxed atomic +/// read of 2 bytes. +/// - if `size_of::() == 4`, `ptr` must be valid for a relaxed atomic read of 4 bytes. +unsafe fn atomic_load_aligned(ptr: *mut u32) -> u32 { + if mem::size_of::() == 4 { + // SAFETY: As `T` has a size of 4, the caller garantees this is sound. + unsafe { AtomicU32::from_ptr(ptr).load(Ordering::Relaxed) } + } else { + // SAFETY: + // As all 4 bytes pointed to by `ptr` might not be dereferenceable due to being out of + // bounds when doing atomic operations on a `u8`/`i8`/`u16`/`i16`, inline ASM is used to + // avoid causing undefined behaviour. However, as `ptr` is 4-aligned and at least 1 byte of + // `ptr` is dereferencable, the load won't cause a segfault as the page size is always + // larger than 4 bytes. + // The `ldr` instruction does not touch the stack or flags, or write to memory, so + // `nostack`, `preserves_flags` and `readonly` are sound. The caller garantees that `ptr` is + // 4-aligned, as required by `ldr`. + unsafe { + let res: u32; + arch::asm!( + "ldr {res}, [{ptr}]", + ptr = in(reg) ptr, + res = lateout(reg) res, + options(nostack, preserves_flags, readonly) + ); + res + } + } +} + // Generic atomic read-modify-write operation unsafe fn atomic_rmw u32, G: Fn(u32, u32) -> u32>(ptr: *mut T, f: F, g: G) -> u32 { let aligned_ptr = align_ptr(ptr); let (shift, mask) = get_shift_mask(ptr); loop { - let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr); + let curval_aligned = atomic_load_aligned::(aligned_ptr); let curval = extract_aligned(curval_aligned, shift, mask); let newval = f(curval); let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); @@ -76,7 +117,7 @@ unsafe fn atomic_cmpxchg(ptr: *mut T, oldval: u32, newval: u32) -> u32 { let (shift, mask) = get_shift_mask(ptr); loop { - let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr); + let curval_aligned = atomic_load_aligned::(aligned_ptr); let curval = extract_aligned(curval_aligned, shift, mask); if curval != oldval { return curval;