Skip to content

Commit 63d069a

Browse files
committed
Replace calls to core::arch intrinsics with assembly
Some backends may replace calls to `core::arch` with multiple calls to `sqrt` [1], which becomes recursive. Help mitigate this by replacing the call with assembly. Results in the same assembly as the current implementation when built with optimizations. [1]: rust-lang/compiler-builtins#649
1 parent c9672e5 commit 63d069a

File tree

1 file changed

+20
-15
lines changed

1 file changed

+20
-15
lines changed

src/math/arch/i686.rs

+20-15
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,27 @@
11
//! Architecture-specific support for x86-32 and x86-64 with SSE2
22
3-
#[cfg(target_arch = "x86")]
4-
use core::arch::x86::*;
5-
#[cfg(target_arch = "x86_64")]
6-
use core::arch::x86_64::*;
7-
8-
pub fn sqrtf(x: f32) -> f32 {
3+
pub fn sqrtf(mut x: f32) -> f32 {
4+
// SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory
5+
// access or side effects.
96
unsafe {
10-
let m = _mm_set_ss(x);
11-
let m_sqrt = _mm_sqrt_ss(m);
12-
_mm_cvtss_f32(m_sqrt)
13-
}
7+
core::arch::asm!(
8+
"sqrtss {x}, {x}",
9+
x = inout(xmm_reg) x,
10+
options(nostack, nomem, pure),
11+
)
12+
};
13+
x
1414
}
1515

16-
pub fn sqrt(x: f64) -> f64 {
16+
pub fn sqrt(mut x: f64) -> f64 {
17+
// SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory
18+
// access or side effects.
1719
unsafe {
18-
let m = _mm_set_sd(x);
19-
let m_sqrt = _mm_sqrt_pd(m);
20-
_mm_cvtsd_f64(m_sqrt)
21-
}
20+
core::arch::asm!(
21+
"sqrtsd {x}, {x}",
22+
x = inout(xmm_reg) x,
23+
options(nostack, nomem, pure),
24+
)
25+
};
26+
x
2227
}

0 commit comments

Comments
 (0)