Skip to content

Commit 2db0419

Browse files
committed
address review comments
- implement CAS 16 - remove useless commented out symbol name - support `feature("no-asm")` - fix warnings when `feature("c")` is enabled
1 parent ba44f2e commit 2db0419

File tree

4 files changed

+63
-20
lines changed

4 files changed

+63
-20
lines changed

build.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -122,24 +122,28 @@ fn generate_aarch64_outlined_atomics() {
122122
macros.insert(sym, gen_macro(sym));
123123
}
124124

125+
// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
126+
let mut cas16 = gen_macro("cas16");
127+
125128
for ordering in [
126129
Ordering::Relaxed,
127130
Ordering::Acquire,
128131
Ordering::Release,
129132
Ordering::AcqRel,
130133
] {
131134
let sym_ordering = aarch64_symbol(ordering);
132-
// TODO: support CAS 16
133-
for size in [1, 2, 4, 8 /* , 16*/] {
135+
for size in [1, 2, 4, 8] {
134136
for (sym, macro_) in &mut macros {
135137
let name = format!("__aarch64_{sym}{size}_{sym_ordering}");
136138
writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap();
137139
}
138140
}
141+
let name = format!("__aarch64_cas16_{sym_ordering}");
142+
writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap();
139143
}
140144

141145
let mut buf = String::new();
142-
for macro_def in macros.values() {
146+
for macro_def in macros.values().chain(std::iter::once(&cas16)) {
143147
buf += macro_def;
144148
buf += "}; }";
145149
}

src/aarch64.rs

+50-16
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,18 @@
1212
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1313
//!
1414
//! Generate functions for each of the following symbols:
15+
//! __aarch64_casM_ORDER
1516
//! __aarch64_swpN_ORDER
1617
//! __aarch64_ldaddN_ORDER
1718
//! __aarch64_ldclrN_ORDER
1819
//! __aarch64_ldeorN_ORDER
1920
//! __aarch64_ldsetN_ORDER
20-
//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8}, ORDER = { relax, acq, rel, acq_rel }
21-
//!
22-
//! TODO: M = 16
21+
//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel }
2322
//!
2423
//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants.
2524
//! We do something similar, but with macro arguments.
2625
27-
/// We don't do runtime dispatch so we don't have to worry about the global ctor.
28-
/// Apparently MacOS uses a different number of underscores in the symbol name (???)
29-
// #[cfg(target_vendor = "apple")]
30-
// macro_rules! have_lse {
31-
// () => { ___aarch64_have_lse_atomics }
32-
// }
33-
34-
// #[cfg(not(target_vendor = "apple"))]
35-
// macro_rules! have_lse {
36-
// () => { __aarch64_have_lse_atomics }
37-
// }
26+
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
3827

3928
/// Translate a byte size to a Rust type.
4029
macro_rules! int_ty {
@@ -106,6 +95,22 @@ macro_rules! stxr {
10695
($ordering:ident, $bytes:tt) => { concat!("st", release!($ordering), "xr", size!($bytes)) }
10796
}
10897

98+
/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction
99+
/// with the correct semantics.
100+
///
101+
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers->
102+
macro_rules! ldxp {
103+
($ordering:ident) => { concat!("ld", acquire!($ordering), "xp") }
104+
}
105+
106+
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction
107+
/// with the correct semantics.
108+
///
109+
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers->.
110+
macro_rules! stxp {
111+
($ordering:ident) => { concat!("st", release!($ordering), "xp") }
112+
}
113+
109114
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
110115
macro_rules! compare_and_swap {
111116
($ordering:ident, $bytes:tt, $name:ident) => {
@@ -137,6 +142,36 @@ macro_rules! compare_and_swap {
137142
}
138143
}
139144

145+
// i128 uses a completely different impl, so it has its own macro.
146+
macro_rules! compare_and_swap_i128 {
147+
($ordering:ident, $name:ident) => {
148+
intrinsics! {
149+
#[maybe_use_optimized_c_shim]
150+
#[naked]
151+
pub extern "C" fn $name (
152+
expected: i128, desired: i128, ptr: *mut i128
153+
) -> i128 {
154+
unsafe { core::arch::asm! {
155+
"mov x16, x0",
156+
"mov x17, x1",
157+
"0:",
158+
// LDXP x0, x1, [x4]
159+
concat!(ldxp!($ordering), " x0, x1, [x4]"),
160+
"cmp x0, x16",
161+
"ccmp x1, x17, #0, eq",
162+
"bne 1f",
163+
// STXP w(tmp2), x2, x3, [x4]
164+
concat!(stxp!($ordering), " w15, x2, x3, [x4]"),
165+
"cbnz w15, 0b",
166+
"1:",
167+
"ret",
168+
options(noreturn)
169+
} }
170+
}
171+
}
172+
};
173+
}
174+
140175

141176
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.swap>.
142177
macro_rules! swap {
@@ -212,10 +247,9 @@ macro_rules! or {
212247
// See `generate_aarch64_outlined_atomics` in build.rs.
213248
include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs"));
214249
foreach_cas!(compare_and_swap);
250+
foreach_cas16!(compare_and_swap_i128);
215251
foreach_swp!(swap);
216252
foreach_ldadd!(add);
217253
foreach_ldclr!(and);
218254
foreach_ldeor!(xor);
219255
foreach_ldset!(or);
220-
221-
// TODO: CAS 16

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ pub mod mem;
5757
#[cfg(target_arch = "arm")]
5858
pub mod arm;
5959

60-
#[cfg(target_arch = "aarch64")]
60+
#[cfg(all(target_arch = "aarch64", not(feature = "no-asm"), not(feature = "optimized-c")))]
6161
pub mod aarch64;
6262

6363
#[cfg(all(

testcrate/tests/lse.rs

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ mod cas {
3838
}
3939
}
4040

41+
macro test_cas16($_ordering:ident, $name:ident) {
42+
cas::test!($_ordering, 16, $name);
43+
}
44+
4145
mod swap {
4246
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
4347
#[test]
@@ -81,6 +85,7 @@ test_op!(xor, std::ops::BitXor::bitxor);
8185
test_op!(or, std::ops::BitOr::bitor);
8286

8387
compiler_builtins::foreach_cas!(cas::test);
88+
compiler_builtins::foreach_cas16!(test_cas16);
8489
compiler_builtins::foreach_swp!(swap::test);
8590
compiler_builtins::foreach_ldadd!(add::test);
8691
compiler_builtins::foreach_ldclr!(clr::test);

0 commit comments

Comments
 (0)