Skip to content

Commit 1fd36a5

Browse files
committed
copy_misaligned_words: use inline asm on ARM, simplify fallback implementation
1 parent 4df7a8d commit 1fd36a5

File tree

2 files changed

+56
-55
lines changed

2 files changed

+56
-55
lines changed

Diff for: compiler-builtins/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#![feature(linkage)]
99
#![feature(naked_functions)]
1010
#![feature(repr_simd)]
11+
#![feature(cfg_match)]
1112
#![cfg_attr(f16_enabled, feature(f16))]
1213
#![cfg_attr(f128_enabled, feature(f128))]
1314
#![no_builtins]

Diff for: compiler-builtins/src/mem/impls.rs

+55-55
Original file line numberDiff line numberDiff line change
@@ -41,84 +41,84 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
4141
core::mem::transmute(x_read)
4242
}
4343

44-
/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
45-
/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
46-
/// chunk size if a load happened.
47-
#[cfg(not(feature = "mem-unaligned"))]
4844
#[inline(always)]
49-
unsafe fn load_chunk_aligned<T: Copy>(
50-
src: *const usize,
51-
dst: *mut usize,
52-
load_sz: usize,
53-
offset: usize,
54-
) -> usize {
55-
let chunk_sz = core::mem::size_of::<T>();
56-
if (load_sz & chunk_sz) != 0 {
57-
*dst.wrapping_byte_add(offset).cast::<T>() = *src.wrapping_byte_add(offset).cast::<T>();
58-
offset | chunk_sz
59-
} else {
60-
offset
45+
unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
46+
let dest_end = dest.wrapping_add(n);
47+
while dest < dest_end {
48+
*dest = *src;
49+
dest = dest.wrapping_add(1);
50+
src = src.wrapping_add(1);
6151
}
6252
}
6353

64-
/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
65-
/// read with the out-of-bounds part filled with 0s.
66-
/// `load_sz` be strictly less than `WORD_SIZE`.
54+
/// Load `load_sz` many bytes from `src`, which must be usize-aligned.
55+
/// `load_sz` must be strictly less than `WORD_SIZE`.
56+
///
57+
/// The remaining bytes are filled non-deterministically.
6758
#[cfg(not(feature = "mem-unaligned"))]
6859
#[inline(always)]
6960
unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
7061
debug_assert!(load_sz < WORD_SIZE);
71-
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
72-
// (since `load_sz < WORD_SIZE`).
73-
const { assert!(WORD_SIZE <= 8) };
62+
debug_assert!(src.addr() % WORD_SIZE == 0);
63+
64+
let mut out: usize;
65+
core::cfg_match! {
66+
// We don't need an x86 path here as `feature = "mem-unaligned"` is always set there.
67+
all(not(miri), any(target_arch = "arm", target_arch = "aarch64", target_arch = "arm64ec")) => {
68+
core::arch::asm!(
69+
"ldr {out}, [{src}]",
70+
src = in(reg) src,
71+
out = lateout(reg) out,
72+
options(nostack, readonly, preserves_flags),
73+
);
74+
}
75+
_ => {
76+
out = 0;
77+
copy_forward_bytes(&raw mut out as *mut u8, src as *mut u8, load_sz);
78+
}
79+
80+
}
7481

75-
let mut i = 0;
76-
let mut out = 0usize;
77-
// We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
78-
i = load_chunk_aligned::<u32>(src, &raw mut out, load_sz, i);
79-
i = load_chunk_aligned::<u16>(src, &raw mut out, load_sz, i);
80-
i = load_chunk_aligned::<u8>(src, &raw mut out, load_sz, i);
81-
debug_assert!(i == load_sz);
8282
out
8383
}
8484

8585
/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be
86-
/// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts
87-
/// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s.
88-
/// `load_sz` be strictly less than `WORD_SIZE`.
86+
/// `usize`-aligned. `load_sz` must be strictly less than `WORD_SIZE`.
87+
///
88+
/// The bytes are returned as the *last* bytes of the return value, i.e., this acts as if we had
89+
/// done a `usize` read from `src`, with the out-of-bounds part filled non-deterministically.
8990
#[cfg(not(feature = "mem-unaligned"))]
9091
#[inline(always)]
9192
unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
9293
debug_assert!(load_sz < WORD_SIZE);
93-
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
94-
// (since `load_sz < WORD_SIZE`).
95-
const { assert!(WORD_SIZE <= 8) };
94+
debug_assert!(src.addr() % WORD_SIZE == 0);
95+
96+
let mut out: usize;
97+
core::cfg_match! {
98+
// We don't need an x86 path here as `feature = "mem-unaligned"` is always set there.
99+
all(not(miri), any(target_arch = "arm", target_arch = "aarch64", target_arch = "arm64ec")) => {
100+
core::arch::asm!(
101+
"ldr {out}, [{src}]",
102+
src = in(reg) src,
103+
out = lateout(reg) out,
104+
options(nostack, readonly, preserves_flags),
105+
);
106+
}
107+
_ => {
108+
out = 0;
109+
// Obtain pointers pointing to the beginning of the range we want to load.
110+
let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz);
111+
let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz);
112+
copy_forward_bytes(out_shifted as *mut u8, src_shifted as *mut u8, load_sz);
113+
}
114+
115+
}
96116

97-
let mut i = 0;
98-
let mut out = 0usize;
99-
// Obtain pointers pointing to the beginning of the range we want to load.
100-
let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz);
101-
let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz);
102-
// We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
103-
i = load_chunk_aligned::<u8>(src_shifted, out_shifted, load_sz, i);
104-
i = load_chunk_aligned::<u16>(src_shifted, out_shifted, load_sz, i);
105-
i = load_chunk_aligned::<u32>(src_shifted, out_shifted, load_sz, i);
106-
debug_assert!(i == load_sz);
107117
out
108118
}
109119

110120
#[inline(always)]
111121
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
112-
#[inline(always)]
113-
unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
114-
let dest_end = dest.wrapping_add(n);
115-
while dest < dest_end {
116-
*dest = *src;
117-
dest = dest.wrapping_add(1);
118-
src = src.wrapping_add(1);
119-
}
120-
}
121-
122122
#[inline(always)]
123123
unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
124124
let mut dest_usize = dest as *mut usize;

0 commit comments

Comments
 (0)