|
| 1 | +//! Aarch64 targets have two possible implementations for atomics: |
| 2 | +//! 1. Load-Locked, Store-Conditional (LL/SC), older and slower. |
| 3 | +//! 2. Large System Extensions (LSE), newer and faster. |
| 4 | +//! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics", |
| 5 | +//! where atomic operations call into the compiler runtime to dispatch between two depending on |
| 6 | +//! which is supported on the current CPU. |
| 7 | +//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion. |
| 8 | +//! |
| 9 | +//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. |
| 10 | +//! Use the `compiler-rt` intrinsics if you want LSE support. |
| 11 | +//! |
| 12 | +//! Ported from `aarch64/lse.S` in LLVM's compiler-rt. |
| 13 | +//! |
| 14 | +//! Generate functions for each of the following symbols: |
| 15 | +//! __aarch64_casM_ORDER |
| 16 | +//! __aarch64_swpN_ORDER |
| 17 | +//! __aarch64_ldaddN_ORDER |
| 18 | +//! __aarch64_ldclrN_ORDER |
| 19 | +//! __aarch64_ldeorN_ORDER |
| 20 | +//! __aarch64_ldsetN_ORDER |
| 21 | +//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel } |
| 22 | +//! |
| 23 | +//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants. |
| 24 | +//! We do something similar, but with macro arguments. |
| 25 | +#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule |
| 26 | + |
| 27 | +// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. |
| 28 | + |
| 29 | +/// Translate a byte size to a Rust type. |
| 30 | +#[rustfmt::skip] |
| 31 | +macro_rules! int_ty { |
| 32 | + (1) => { i8 }; |
| 33 | + (2) => { i16 }; |
| 34 | + (4) => { i32 }; |
| 35 | + (8) => { i64 }; |
| 36 | + (16) => { i128 }; |
| 37 | +} |
| 38 | + |
| 39 | +/// Given a byte size and a register number, return a register of the appropriate size. |
| 40 | +/// |
| 41 | +/// See <https://developer.arm.com/documentation/102374/0101/Registers-in-AArch64---general-purpose-registers>. |
| 42 | +#[rustfmt::skip] |
| 43 | +macro_rules! reg { |
| 44 | + (1, $num:literal) => { concat!("w", $num) }; |
| 45 | + (2, $num:literal) => { concat!("w", $num) }; |
| 46 | + (4, $num:literal) => { concat!("w", $num) }; |
| 47 | + (8, $num:literal) => { concat!("x", $num) }; |
| 48 | +} |
| 49 | + |
| 50 | +/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction. |
| 51 | +#[rustfmt::skip] |
| 52 | +macro_rules! acquire { |
| 53 | + (Relaxed) => { "" }; |
| 54 | + (Acquire) => { "a" }; |
| 55 | + (Release) => { "" }; |
| 56 | + (AcqRel) => { "a" }; |
| 57 | +} |
| 58 | + |
| 59 | +/// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction. |
| 60 | +#[rustfmt::skip] |
| 61 | +macro_rules! release { |
| 62 | + (Relaxed) => { "" }; |
| 63 | + (Acquire) => { "" }; |
| 64 | + (Release) => { "l" }; |
| 65 | + (AcqRel) => { "l" }; |
| 66 | +} |
| 67 | + |
| 68 | +/// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction. |
| 69 | +#[rustfmt::skip] |
| 70 | +macro_rules! size { |
| 71 | + (1) => { "b" }; |
| 72 | + (2) => { "h" }; |
| 73 | + (4) => { "" }; |
| 74 | + (8) => { "" }; |
| 75 | + (16) => { "" }; |
| 76 | +} |
| 77 | + |
| 78 | +/// Given a byte size, translate it to an Unsigned eXTend instruction |
| 79 | +/// with the correct semantics. |
| 80 | +/// |
| 81 | +/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM-> |
| 82 | +#[rustfmt::skip] |
| 83 | +macro_rules! uxt { |
| 84 | + (1) => { "uxtb" }; |
| 85 | + (2) => { "uxth" }; |
| 86 | + ($_:tt) => { "mov" }; |
| 87 | +} |
| 88 | + |
| 89 | +/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Register instruction |
| 90 | +/// with the correct semantics. |
| 91 | +/// |
| 92 | +/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXR--Load-Exclusive-Register->. |
| 93 | +macro_rules! ldxr { |
| 94 | + ($ordering:ident, $bytes:tt) => { |
| 95 | + concat!("ld", acquire!($ordering), "xr", size!($bytes)) |
| 96 | + }; |
| 97 | +} |
| 98 | + |
| 99 | +/// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction |
| 100 | +/// with the correct semantics. |
| 101 | +/// |
| 102 | +/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXR--Store-Exclusive-Register->. |
| 103 | +macro_rules! stxr { |
| 104 | + ($ordering:ident, $bytes:tt) => { |
| 105 | + concat!("st", release!($ordering), "xr", size!($bytes)) |
| 106 | + }; |
| 107 | +} |
| 108 | + |
| 109 | +/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction |
| 110 | +/// with the correct semantics. |
| 111 | +/// |
| 112 | +/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers-> |
| 113 | +macro_rules! ldxp { |
| 114 | + ($ordering:ident) => { |
| 115 | + concat!("ld", acquire!($ordering), "xp") |
| 116 | + }; |
| 117 | +} |
| 118 | + |
| 119 | +/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction |
| 120 | +/// with the correct semantics. |
| 121 | +/// |
| 122 | +/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers->. |
| 123 | +macro_rules! stxp { |
| 124 | + ($ordering:ident) => { |
| 125 | + concat!("st", release!($ordering), "xp") |
| 126 | + }; |
| 127 | +} |
| 128 | + |
| 129 | +/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>. |
| 130 | +macro_rules! compare_and_swap { |
| 131 | + ($ordering:ident, $bytes:tt, $name:ident) => { |
| 132 | + intrinsics! { |
| 133 | + #[maybe_use_optimized_c_shim] |
| 134 | + #[naked] |
| 135 | + pub unsafe extern "C" fn $name ( |
| 136 | + expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes) |
| 137 | + ) -> int_ty!($bytes) { |
| 138 | + // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. |
| 139 | + unsafe { core::arch::asm! { |
| 140 | + // UXT s(tmp0), s(0) |
| 141 | + concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), |
| 142 | + "0:", |
| 143 | + // LDXR s(0), [x2] |
| 144 | + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x2]"), |
| 145 | + // cmp s(0), s(tmp0) |
| 146 | + concat!("cmp ", reg!($bytes, 0), ", ", reg!($bytes, 16)), |
| 147 | + "bne 1f", |
| 148 | + // STXR w(tmp1), s(1), [x2] |
| 149 | + concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 1), ", [x2]"), |
| 150 | + "cbnz w17, 0b", |
| 151 | + "1:", |
| 152 | + "ret", |
| 153 | + options(noreturn) |
| 154 | + } } |
| 155 | + } |
| 156 | + } |
| 157 | + }; |
| 158 | +} |
| 159 | + |
| 160 | +// i128 uses a completely different impl, so it has its own macro. |
| 161 | +macro_rules! compare_and_swap_i128 { |
| 162 | + ($ordering:ident, $name:ident) => { |
| 163 | + intrinsics! { |
| 164 | + #[maybe_use_optimized_c_shim] |
| 165 | + #[naked] |
| 166 | + pub unsafe extern "C" fn $name ( |
| 167 | + expected: i128, desired: i128, ptr: *mut i128 |
| 168 | + ) -> i128 { |
| 169 | + unsafe { core::arch::asm! { |
| 170 | + "mov x16, x0", |
| 171 | + "mov x17, x1", |
| 172 | + "0:", |
| 173 | + // LDXP x0, x1, [x4] |
| 174 | + concat!(ldxp!($ordering), " x0, x1, [x4]"), |
| 175 | + "cmp x0, x16", |
| 176 | + "ccmp x1, x17, #0, eq", |
| 177 | + "bne 1f", |
| 178 | + // STXP w(tmp2), x2, x3, [x4] |
| 179 | + concat!(stxp!($ordering), " w15, x2, x3, [x4]"), |
| 180 | + "cbnz w15, 0b", |
| 181 | + "1:", |
| 182 | + "ret", |
| 183 | + options(noreturn) |
| 184 | + } } |
| 185 | + } |
| 186 | + } |
| 187 | + }; |
| 188 | +} |
| 189 | + |
| 190 | +/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.swap>. |
| 191 | +macro_rules! swap { |
| 192 | + ($ordering:ident, $bytes:tt, $name:ident) => { |
| 193 | + intrinsics! { |
| 194 | + #[maybe_use_optimized_c_shim] |
| 195 | + #[naked] |
| 196 | + pub unsafe extern "C" fn $name ( |
| 197 | + left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) |
| 198 | + ) -> int_ty!($bytes) { |
| 199 | + unsafe { core::arch::asm! { |
| 200 | + // mov s(tmp0), s(0) |
| 201 | + concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), |
| 202 | + "0:", |
| 203 | + // LDXR s(0), [x1] |
| 204 | + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"), |
| 205 | + // STXR w(tmp1), s(tmp0), [x1] |
| 206 | + concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), |
| 207 | + "cbnz w17, 0b", |
| 208 | + "ret", |
| 209 | + options(noreturn) |
| 210 | + } } |
| 211 | + } |
| 212 | + } |
| 213 | + }; |
| 214 | +} |
| 215 | + |
| 216 | +/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>. |
| 217 | +macro_rules! fetch_op { |
| 218 | + ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { |
| 219 | + intrinsics! { |
| 220 | + #[maybe_use_optimized_c_shim] |
| 221 | + #[naked] |
| 222 | + pub unsafe extern "C" fn $name ( |
| 223 | + val: int_ty!($bytes), ptr: *mut int_ty!($bytes) |
| 224 | + ) -> int_ty!($bytes) { |
| 225 | + unsafe { core::arch::asm! { |
| 226 | + // mov s(tmp0), s(0) |
| 227 | + concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), |
| 228 | + "0:", |
| 229 | + // LDXR s(0), [x1] |
| 230 | + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"), |
| 231 | + // OP s(tmp1), s(0), s(tmp0) |
| 232 | + concat!($op, " ", reg!($bytes, 17), ", ", reg!($bytes, 0), ", ", reg!($bytes, 16)), |
| 233 | + // STXR w(tmp2), s(tmp1), [x1] |
| 234 | + concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), |
| 235 | + "cbnz w15, 0b", |
| 236 | + "ret", |
| 237 | + options(noreturn) |
| 238 | + } } |
| 239 | + } |
| 240 | + } |
| 241 | + } |
| 242 | +} |
| 243 | + |
| 244 | +// We need a single macro to pass to `foreach_ldadd`. |
| 245 | +macro_rules! add { |
| 246 | + ($ordering:ident, $bytes:tt, $name:ident) => { |
| 247 | + fetch_op! { $ordering, $bytes, $name, "add" } |
| 248 | + }; |
| 249 | +} |
| 250 | + |
| 251 | +macro_rules! and { |
| 252 | + ($ordering:ident, $bytes:tt, $name:ident) => { |
| 253 | + fetch_op! { $ordering, $bytes, $name, "bic" } |
| 254 | + }; |
| 255 | +} |
| 256 | + |
| 257 | +macro_rules! xor { |
| 258 | + ($ordering:ident, $bytes:tt, $name:ident) => { |
| 259 | + fetch_op! { $ordering, $bytes, $name, "eor" } |
| 260 | + }; |
| 261 | +} |
| 262 | + |
| 263 | +macro_rules! or { |
| 264 | + ($ordering:ident, $bytes:tt, $name:ident) => { |
| 265 | + fetch_op! { $ordering, $bytes, $name, "orr" } |
| 266 | + }; |
| 267 | +} |
| 268 | + |
| 269 | +// See `generate_aarch64_outlined_atomics` in build.rs. |
| 270 | +include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs")); |
| 271 | +foreach_cas!(compare_and_swap); |
| 272 | +foreach_cas16!(compare_and_swap_i128); |
| 273 | +foreach_swp!(swap); |
| 274 | +foreach_ldadd!(add); |
| 275 | +foreach_ldclr!(and); |
| 276 | +foreach_ldeor!(xor); |
| 277 | +foreach_ldset!(or); |
0 commit comments