|
12 | 12 | //! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
|
13 | 13 | //!
|
14 | 14 | //! Generate functions for each of the following symbols:
|
| 15 | +//! __aarch64_casM_ORDER |
15 | 16 | //! __aarch64_swpN_ORDER
|
16 | 17 | //! __aarch64_ldaddN_ORDER
|
17 | 18 | //! __aarch64_ldclrN_ORDER
|
18 | 19 | //! __aarch64_ldeorN_ORDER
|
19 | 20 | //! __aarch64_ldsetN_ORDER
|
20 |
| -//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8}, ORDER = { relax, acq, rel, acq_rel } |
21 |
| -//! |
22 |
| -//! TODO: M = 16 |
| 21 | +//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel } |
23 | 22 | //!
|
24 | 23 | //! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants.
|
25 | 24 | //! We do something similar, but with macro arguments.
|
26 | 25 |
|
27 |
| -/// We don't do runtime dispatch so we don't have to worry about the global ctor. |
28 |
| -/// Apparently MacOS uses a different number of underscores in the symbol name (???) |
29 |
| -// #[cfg(target_vendor = "apple")] |
30 |
| -// macro_rules! have_lse { |
31 |
| -// () => { ___aarch64_have_lse_atomics } |
32 |
| -// } |
33 |
| - |
34 |
| -// #[cfg(not(target_vendor = "apple"))] |
35 |
| -// macro_rules! have_lse { |
36 |
| -// () => { __aarch64_have_lse_atomics } |
37 |
| -// } |
| 26 | +// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. |
38 | 27 |
|
39 | 28 | /// Translate a byte size to a Rust type.
|
40 | 29 | macro_rules! int_ty {
|
@@ -106,6 +95,22 @@ macro_rules! stxr {
|
106 | 95 | ($ordering:ident, $bytes:tt) => { concat!("st", release!($ordering), "xr", size!($bytes)) }
|
107 | 96 | }
|
108 | 97 |
|
| 98 | +/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction |
| 99 | +/// with the correct semantics. |
| 100 | +/// |
| 101 | +/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers-> |
| 102 | +macro_rules! ldxp { |
| 103 | + ($ordering:ident) => { concat!("ld", acquire!($ordering), "xp") } |
| 104 | +} |
| 105 | + |
| 106 | +/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction |
| 107 | +/// with the correct semantics. |
| 108 | +/// |
| 109 | +/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers->. |
| 110 | +macro_rules! stxp { |
| 111 | + ($ordering:ident) => { concat!("st", release!($ordering), "xp") } |
| 112 | +} |
| 113 | + |
109 | 114 | /// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
|
110 | 115 | macro_rules! compare_and_swap {
|
111 | 116 | ($ordering:ident, $bytes:tt, $name:ident) => {
|
@@ -137,6 +142,36 @@ macro_rules! compare_and_swap {
|
137 | 142 | }
|
138 | 143 | }
|
139 | 144 |
|
| 145 | +// i128 uses a completely different impl, so it has its own macro. |
| 146 | +macro_rules! compare_and_swap_i128 { |
| 147 | + ($ordering:ident, $name:ident) => { |
| 148 | + intrinsics! { |
| 149 | + #[maybe_use_optimized_c_shim] |
| 150 | + #[naked] |
| 151 | + pub extern "C" fn $name ( |
| 152 | + expected: i128, desired: i128, ptr: *mut i128 |
| 153 | + ) -> i128 { |
| 154 | + unsafe { core::arch::asm! { |
| 155 | + "mov x16, x0", |
| 156 | + "mov x17, x1", |
| 157 | + "0:", |
| 158 | + // LDXP x0, x1, [x4] |
| 159 | + concat!(ldxp!($ordering), " x0, x1, [x4]"), |
| 160 | + "cmp x0, x16", |
| 161 | + "ccmp x1, x17, #0, eq", |
| 162 | + "bne 1f", |
| 163 | + // STXP w(tmp2), x2, x3, [x4] |
| 164 | + concat!(stxp!($ordering), " w15, x2, x3, [x4]"), |
| 165 | + "cbnz w15, 0b", |
| 166 | + "1:", |
| 167 | + "ret", |
| 168 | + options(noreturn) |
| 169 | + } } |
| 170 | + } |
| 171 | + } |
| 172 | + }; |
| 173 | +} |
| 174 | + |
140 | 175 |
|
141 | 176 | /// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.swap>.
|
142 | 177 | macro_rules! swap {
|
@@ -212,10 +247,9 @@ macro_rules! or {
|
212 | 247 | // See `generate_aarch64_outlined_atomics` in build.rs.
|
213 | 248 | include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs"));
|
214 | 249 | foreach_cas!(compare_and_swap);
|
| 250 | +foreach_cas16!(compare_and_swap_i128); |
215 | 251 | foreach_swp!(swap);
|
216 | 252 | foreach_ldadd!(add);
|
217 | 253 | foreach_ldclr!(and);
|
218 | 254 | foreach_ldeor!(xor);
|
219 | 255 | foreach_ldset!(or);
|
220 |
| - |
221 |
| -// TODO: CAS 16 |
|
0 commit comments