From ec032cb8f704acf28fd80506c6e08f1f762b4267 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Fri, 23 Aug 2019 23:02:20 -0600
Subject: [PATCH 01/15] Put the usual badges in the README

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index e90b42d74..9b80960b7 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # `libm`
 
+[![crates.io](https://img.shields.io/crates/v/libm.svg)](https://crates.io/crates/libm)
+[![docs.rs](https://docs.rs/libm/badge.svg)](https://docs.rs/libm/)
 [![Build Status](https://dev.azure.com/rust-lang/libm/_apis/build/status/rust-lang-nursery.libm?branchName=master)](https://dev.azure.com/rust-lang/libm/_build/latest?definitionId=7&branchName=master)
 
 A port of [MUSL]'s libm to Rust.

From 229a61dc69628a22c1d6f0c649ce1cd35d7e3081 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Fri, 23 Aug 2019 23:03:12 -0600
Subject: [PATCH 02/15] Use hardware intrinsic much more often

---
 src/math/sqrt.rs  | 300 ++++++++++++++++++++++++----------------------
 src/math/sqrtf.rs | 209 +++++++++++++++++---------------
 2 files changed, 268 insertions(+), 241 deletions(-)

diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 31afe3356..a883c8eb2 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -1,3 +1,53 @@
+/// [Square root](https://en.wikipedia.org/wiki/Square_root) of an `f64`.
+///
+/// This function is intended to exactly match the
+/// [`sqrt`](https://en.cppreference.com/w/c/numeric/math/sqrt) function as
+/// defined by the C/C++ spec.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[allow(unreachable_code)]
+pub fn sqrt(x: f64) -> f64 {
+    // On most targets LLVM will issue a hardware sqrt instruction instead of a
+    // call to this function when you call `core::intrinsics::sqrtf64(x)`.
+    // However, not all targets have hardware sqrt, and also people might end up
+    // calling this function directly, so we must still do our best to get a
+    // hardware instruction used, and then go with a software fallback if
+    // necessary.
+
+    // Nightly: if we're *sure* that LLVM supports a hardware instruction then
+    // we'll call the LLVM intrinsic. We have to be conservative in our
+    // selection for when to do this, because if the intrinsic usage ends up
+    // calling back here it's infinite recursion.
+    #[cfg(all(
+        not(feature = "stable"),
+        any(
+            target_arch = "x86",
+            target_arch = "x86_64",
+            target_arch = "aarch64",
+            target_arch = "wasm32",
+            target_arch = "powerpc64"
+        )
+    ))]
+    {
+        return unsafe { core::intrinsics::sqrtf64(x) };
+    }
+    // Stable: We can use `sse2` if available. As more intrinsic sets stabilize
+    // we can expand this to use hardware more often.
+    #[cfg(target_feature = "sse2")]
+    {
+        #[cfg(target_arch = "x86")]
+        use core::arch::x86::*;
+        #[cfg(target_arch = "x86_64")]
+        use core::arch::x86_64::*;
+        return unsafe {
+            let m = _mm_set_sd(x);
+            let m_sqrt = _mm_sqrt_pd(m);
+            _mm_cvtsd_f64(m_sqrt)
+        };
+    }
+    // Finally, if we must, we use the software version (below).
+    software_sqrt(x)
+}
+
 /* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
 /*
  * ====================================================
@@ -75,167 +125,133 @@
  *      sqrt(-ve) = NaN         ... with invalid signal
  *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
  */
+fn software_sqrt(x: f64) -> f64 {
+    use core::num::Wrapping;
 
-use core::f64;
-
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrt(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.sqrt` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return if x < 0.0 {
-                f64::NAN
-            } else {
-                unsafe { ::core::intrinsics::sqrtf64(x) }
-            }
-        }
-    }
-    #[cfg(target_feature = "sse2")]
-    {
-        // Note: This path is unlikely since LLVM will usually have already
-        // optimized sqrt calls into hardware instructions if sse2 is available,
-        // but if someone does end up here they'll apprected the speed increase.
-        #[cfg(target_arch = "x86")]
-        use core::arch::x86::*;
-        #[cfg(target_arch = "x86_64")]
-        use core::arch::x86_64::*;
-        unsafe {
-            let m = _mm_set_sd(x);
-            let m_sqrt = _mm_sqrt_pd(m);
-            _mm_cvtsd_f64(m_sqrt)
-        }
-    }
-    #[cfg(not(target_feature = "sse2"))]
-    {
-        use core::num::Wrapping;
-
-        const TINY: f64 = 1.0e-300;
+    const TINY: f64 = 1.0e-300;
 
-        let mut z: f64;
-        let sign: Wrapping<u32> = Wrapping(0x80000000);
-        let mut ix0: i32;
-        let mut s0: i32;
-        let mut q: i32;
-        let mut m: i32;
-        let mut t: i32;
-        let mut i: i32;
-        let mut r: Wrapping<u32>;
-        let mut t1: Wrapping<u32>;
-        let mut s1: Wrapping<u32>;
-        let mut ix1: Wrapping<u32>;
-        let mut q1: Wrapping<u32>;
+    let mut z: f64;
+    let sign: Wrapping<u32> = Wrapping(0x80000000);
+    let mut ix0: i32;
+    let mut s0: i32;
+    let mut q: i32;
+    let mut m: i32;
+    let mut t: i32;
+    let mut i: i32;
+    let mut r: Wrapping<u32>;
+    let mut t1: Wrapping<u32>;
+    let mut s1: Wrapping<u32>;
+    let mut ix1: Wrapping<u32>;
+    let mut q1: Wrapping<u32>;
 
-        ix0 = (x.to_bits() >> 32) as i32;
-        ix1 = Wrapping(x.to_bits() as u32);
+    ix0 = (x.to_bits() >> 32) as i32;
+    ix1 = Wrapping(x.to_bits() as u32);
 
-        /* take care of Inf and NaN */
-        if (ix0 & 0x7ff00000) == 0x7ff00000 {
-            return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+    /* take care of Inf and NaN */
+    if (ix0 & 0x7ff00000) == 0x7ff00000 {
+        return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+    }
+    /* take care of zero */
+    if ix0 <= 0 {
+        if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 {
+            return x; /* sqrt(+-0) = +-0 */
         }
-        /* take care of zero */
-        if ix0 <= 0 {
-            if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 {
-                return x; /* sqrt(+-0) = +-0 */
-            }
-            if ix0 < 0 {
-                return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
-            }
+        if ix0 < 0 {
+            return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
         }
-        /* normalize x */
-        m = ix0 >> 20;
-        if m == 0 {
-            /* subnormal x */
-            while ix0 == 0 {
-                m -= 21;
-                ix0 |= (ix1 >> 11).0 as i32;
-                ix1 <<= 21;
-            }
-            i = 0;
-            while (ix0 & 0x00100000) == 0 {
-                i += 1;
-                ix0 <<= 1;
-            }
-            m -= i - 1;
-            ix0 |= (ix1 >> (32 - i) as usize).0 as i32;
-            ix1 = ix1 << i as usize;
+    }
+    /* normalize x */
+    m = ix0 >> 20;
+    if m == 0 {
+        /* subnormal x */
+        while ix0 == 0 {
+            m -= 21;
+            ix0 |= (ix1 >> 11).0 as i32;
+            ix1 <<= 21;
         }
-        m -= 1023; /* unbias exponent */
-        ix0 = (ix0 & 0x000fffff) | 0x00100000;
-        if (m & 1) == 1 {
-            /* odd m, double x to make it even */
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
+        i = 0;
+        while (ix0 & 0x00100000) == 0 {
+            i += 1;
+            ix0 <<= 1;
         }
-        m >>= 1; /* m = [m/2] */
-
-        /* generate sqrt(x) bit by bit */
+        m -= i - 1;
+        ix0 |= (ix1 >> (32 - i) as usize).0 as i32;
+        ix1 = ix1 << i as usize;
+    }
+    m -= 1023; /* unbias exponent */
+    ix0 = (ix0 & 0x000fffff) | 0x00100000;
+    if (m & 1) == 1 {
+        /* odd m, double x to make it even */
         ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
         ix1 += ix1;
-        q = 0; /* [q,q1] = sqrt(x) */
-        q1 = Wrapping(0);
-        s0 = 0;
-        s1 = Wrapping(0);
-        r = Wrapping(0x00200000); /* r = moving bit from right to left */
+    }
+    m >>= 1; /* m = [m/2] */
 
-        while r != Wrapping(0) {
-            t = s0 + r.0 as i32;
-            if t <= ix0 {
-                s0 = t + r.0 as i32;
-                ix0 -= t;
-                q += r.0 as i32;
-            }
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
-            r >>= 1;
+    /* generate sqrt(x) bit by bit */
+    ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+    ix1 += ix1;
+    q = 0; /* [q,q1] = sqrt(x) */
+    q1 = Wrapping(0);
+    s0 = 0;
+    s1 = Wrapping(0);
+    r = Wrapping(0x00200000); /* r = moving bit from right to left */
+
+    while r != Wrapping(0) {
+        t = s0 + r.0 as i32;
+        if t <= ix0 {
+            s0 = t + r.0 as i32;
+            ix0 -= t;
+            q += r.0 as i32;
         }
+        ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+        ix1 += ix1;
+        r >>= 1;
+    }
 
-        r = sign;
-        while r != Wrapping(0) {
-            t1 = s1 + r;
-            t = s0;
-            if t < ix0 || (t == ix0 && t1 <= ix1) {
-                s1 = t1 + r;
-                if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) {
-                    s0 += 1;
-                }
-                ix0 -= t;
-                if ix1 < t1 {
-                    ix0 -= 1;
-                }
-                ix1 -= t1;
-                q1 += r;
+    r = sign;
+    while r != Wrapping(0) {
+        t1 = s1 + r;
+        t = s0;
+        if t < ix0 || (t == ix0 && t1 <= ix1) {
+            s1 = t1 + r;
+            if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) {
+                s0 += 1;
             }
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
-            r >>= 1;
+            ix0 -= t;
+            if ix1 < t1 {
+                ix0 -= 1;
+            }
+            ix1 -= t1;
+            q1 += r;
         }
+        ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+        ix1 += ix1;
+        r >>= 1;
+    }
 
-        /* use floating add to find out rounding direction */
-        if (ix0 as u32 | ix1.0) != 0 {
-            z = 1.0 - TINY; /* raise inexact flag */
-            if z >= 1.0 {
-                z = 1.0 + TINY;
-                if q1.0 == 0xffffffff {
-                    q1 = Wrapping(0);
+    /* use floating add to find out rounding direction */
+    if (ix0 as u32 | ix1.0) != 0 {
+        z = 1.0 - TINY; /* raise inexact flag */
+        if z >= 1.0 {
+            z = 1.0 + TINY;
+            if q1.0 == 0xffffffff {
+                q1 = Wrapping(0);
+                q += 1;
+            } else if z > 1.0 {
+                if q1.0 == 0xfffffffe {
                     q += 1;
-                } else if z > 1.0 {
-                    if q1.0 == 0xfffffffe {
-                        q += 1;
-                    }
-                    q1 += Wrapping(2);
-                } else {
-                    q1 += q1 & Wrapping(1);
                 }
+                q1 += Wrapping(2);
+            } else {
+                q1 += q1 & Wrapping(1);
             }
         }
-        ix0 = (q >> 1) + 0x3fe00000;
-        ix1 = q1 >> 1;
-        if (q & 1) == 1 {
-            ix1 |= sign;
-        }
-        ix0 += m << 20;
-        f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64)
     }
+    ix0 = (q >> 1) + 0x3fe00000;
+    ix1 = q1 >> 1;
+    if (q & 1) == 1 {
+        ix1 |= sign;
+    }
+    ix0 += m << 20;
+    f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64)
 }
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index 1d5b78e84..41ac09875 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -1,3 +1,46 @@
+/// [Square root](https://en.wikipedia.org/wiki/Square_root) of an `f32`.
+///
+/// This function is intended to exactly match the
+/// [`sqrtf`](https://en.cppreference.com/w/c/numeric/math/sqrt) function as
+/// defined by the C/C++ spec.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[allow(unreachable_code)]
+pub fn sqrtf(x: f32) -> f32 {
+    // See the notes in the `sqrt` function for an explanation of what's going on
+    // here.
+
+    // Nightly
+    #[cfg(all(
+        not(feature = "stable"),
+        any(
+            target_arch = "x86",
+            target_arch = "x86_64",
+            target_arch = "arm",
+            target_arch = "aarch64",
+            target_arch = "wasm32",
+            target_arch = "powerpc64"
+        )
+    ))]
+    {
+        return unsafe { core::intrinsics::sqrtf64(x) };
+    }
+    // Stable
+    #[cfg(target_feature = "sse2")]
+    {
+        #[cfg(target_arch = "x86")]
+        use core::arch::x86::*;
+        #[cfg(target_arch = "x86_64")]
+        use core::arch::x86_64::*;
+        return unsafe {
+            let m = _mm_set_ss(x);
+            let m_sqrt = _mm_sqrt_ss(m);
+            _mm_cvtss_f32(m_sqrt)
+        };
+    }
+    // Fallback
+    software_sqrtf(x)
+}
+
 /* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
 /*
  * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
@@ -12,118 +55,86 @@
  * is preserved.
  * ====================================================
  */
+fn software_sqrtf(x: f32) -> f32 {
+    const TINY: f32 = 1.0e-30;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrtf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.sqrt` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return if x < 0.0 {
-                ::core::f32::NAN
-            } else {
-                unsafe { ::core::intrinsics::sqrtf32(x) }
-            }
-        }
-    }
-    #[cfg(target_feature = "sse")]
-    {
-        // Note: This path is unlikely since LLVM will usually have already
-        // optimized sqrt calls into hardware instructions if sse is available,
-        // but if someone does end up here they'll apprected the speed increase.
-        #[cfg(target_arch = "x86")]
-        use core::arch::x86::*;
-        #[cfg(target_arch = "x86_64")]
-        use core::arch::x86_64::*;
-        unsafe {
-            let m = _mm_set_ss(x);
-            let m_sqrt = _mm_sqrt_ss(m);
-            _mm_cvtss_f32(m_sqrt)
-        }
-    }
-    #[cfg(not(target_feature = "sse"))]
-    {
-        const TINY: f32 = 1.0e-30;
+    let mut z: f32;
+    let sign: i32 = 0x80000000u32 as i32;
+    let mut ix: i32;
+    let mut s: i32;
+    let mut q: i32;
+    let mut m: i32;
+    let mut t: i32;
+    let mut i: i32;
+    let mut r: u32;
 
-        let mut z: f32;
-        let sign: i32 = 0x80000000u32 as i32;
-        let mut ix: i32;
-        let mut s: i32;
-        let mut q: i32;
-        let mut m: i32;
-        let mut t: i32;
-        let mut i: i32;
-        let mut r: u32;
+    ix = x.to_bits() as i32;
 
-        ix = x.to_bits() as i32;
+    /* take care of Inf and NaN */
+    if (ix as u32 & 0x7f800000) == 0x7f800000 {
+        return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+    }
 
-        /* take care of Inf and NaN */
-        if (ix as u32 & 0x7f800000) == 0x7f800000 {
-            return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+    /* take care of zero */
+    if ix <= 0 {
+        if (ix & !sign) == 0 {
+            return x; /* sqrt(+-0) = +-0 */
         }
-
-        /* take care of zero */
-        if ix <= 0 {
-            if (ix & !sign) == 0 {
-                return x; /* sqrt(+-0) = +-0 */
-            }
-            if ix < 0 {
-                return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
-            }
+        if ix < 0 {
+            return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
         }
+    }
 
-        /* normalize x */
-        m = ix >> 23;
-        if m == 0 {
-            /* subnormal x */
-            i = 0;
-            while ix & 0x00800000 == 0 {
-                ix <<= 1;
-                i = i + 1;
-            }
-            m -= i - 1;
-        }
-        m -= 127; /* unbias exponent */
-        ix = (ix & 0x007fffff) | 0x00800000;
-        if m & 1 == 1 {
-            /* odd m, double x to make it even */
-            ix += ix;
+    /* normalize x */
+    m = ix >> 23;
+    if m == 0 {
+        /* subnormal x */
+        i = 0;
+        while ix & 0x00800000 == 0 {
+            ix <<= 1;
+            i = i + 1;
         }
-        m >>= 1; /* m = [m/2] */
-
-        /* generate sqrt(x) bit by bit */
+        m -= i - 1;
+    }
+    m -= 127; /* unbias exponent */
+    ix = (ix & 0x007fffff) | 0x00800000;
+    if m & 1 == 1 {
+        /* odd m, double x to make it even */
         ix += ix;
-        q = 0;
-        s = 0;
-        r = 0x01000000; /* r = moving bit from right to left */
+    }
+    m >>= 1; /* m = [m/2] */
 
-        while r != 0 {
-            t = s + r as i32;
-            if t <= ix {
-                s = t + r as i32;
-                ix -= t;
-                q += r as i32;
-            }
-            ix += ix;
-            r >>= 1;
+    /* generate sqrt(x) bit by bit */
+    ix += ix;
+    q = 0;
+    s = 0;
+    r = 0x01000000; /* r = moving bit from right to left */
+
+    while r != 0 {
+        t = s + r as i32;
+        if t <= ix {
+            s = t + r as i32;
+            ix -= t;
+            q += r as i32;
         }
+        ix += ix;
+        r >>= 1;
+    }
 
-        /* use floating add to find out rounding direction */
-        if ix != 0 {
-            z = 1.0 - TINY; /* raise inexact flag */
-            if z >= 1.0 {
-                z = 1.0 + TINY;
-                if z > 1.0 {
-                    q += 2;
-                } else {
-                    q += q & 1;
-                }
+    /* use floating add to find out rounding direction */
+    if ix != 0 {
+        z = 1.0 - TINY; /* raise inexact flag */
+        if z >= 1.0 {
+            z = 1.0 + TINY;
+            if z > 1.0 {
+                q += 2;
+            } else {
+                q += q & 1;
             }
         }
-
-        ix = (q >> 1) + 0x3f000000;
-        ix += m << 23;
-        f32::from_bits(ix as u32)
     }
+
+    ix = (q >> 1) + 0x3f000000;
+    ix += m << 23;
+    f32::from_bits(ix as u32)
 }

From 3894b7972d9db8170d3fe5ea780c6df135cc7f13 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Sun, 25 Aug 2019 10:01:46 -0600
Subject: [PATCH 03/15] Update sqrt.rs

Change per the updated info in https://github.com/rust-lang/rust/pull/63455#issuecomment-524638624
---
 src/math/sqrt.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index a883c8eb2..847ef795c 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -20,8 +20,8 @@ pub fn sqrt(x: f64) -> f64 {
     #[cfg(all(
         not(feature = "stable"),
         any(
-            target_arch = "x86",
-            target_arch = "x86_64",
+            all(target_arch = "x86", not(target_feature="soft_float")),
+            all(target_arch = "x86_64", not(target_feature="soft_float")),
             target_arch = "aarch64",
             target_arch = "wasm32",
             target_arch = "powerpc64"

From a78db4a6ea9c04c08f4e04202592241e05816c8a Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Sun, 25 Aug 2019 10:03:00 -0600
Subject: [PATCH 04/15] Update sqrtf.rs

Change per the updated info in https://github.com/rust-lang/rust/pull/63455#issuecomment-524638624
---
 src/math/sqrtf.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index 41ac09875..e28ad3d67 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -13,9 +13,8 @@ pub fn sqrtf(x: f32) -> f32 {
     #[cfg(all(
         not(feature = "stable"),
         any(
-            target_arch = "x86",
-            target_arch = "x86_64",
-            target_arch = "arm",
+            all(target_arch = "x86", not(target_feature="soft_float")),
+            all(target_arch = "x86_64", not(target_feature="soft_float")),
             target_arch = "aarch64",
             target_arch = "wasm32",
             target_arch = "powerpc64"

From f2a29beb0d3c5ccd32c3183929d53cf55087349e Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Sun, 25 Aug 2019 10:08:15 -0600
Subject: [PATCH 05/15] types are important

---
 src/math/sqrtf.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index e28ad3d67..7dd85ce77 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -21,7 +21,7 @@ pub fn sqrtf(x: f32) -> f32 {
         )
     ))]
     {
-        return unsafe { core::intrinsics::sqrtf64(x) };
+        return unsafe { core::intrinsics::sqrtf32(x) };
     }
     // Stable
     #[cfg(target_feature = "sse2")]

From d81d027eb8ea0821318a5717d71e3feb76e176e3 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Sun, 25 Aug 2019 18:58:24 -0600
Subject: [PATCH 06/15] formatting fix

---
 src/math/sqrt.rs  | 4 ++--
 src/math/sqrtf.rs | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 847ef795c..4772a67e1 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -20,8 +20,8 @@ pub fn sqrt(x: f64) -> f64 {
     #[cfg(all(
         not(feature = "stable"),
         any(
-            all(target_arch = "x86", not(target_feature="soft_float")),
-            all(target_arch = "x86_64", not(target_feature="soft_float")),
+            all(target_arch = "x86", not(target_feature = "soft_float")),
+            all(target_arch = "x86_64", not(target_feature = "soft_float")),
             target_arch = "aarch64",
             target_arch = "wasm32",
             target_arch = "powerpc64"
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index 7dd85ce77..72c136e75 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -13,8 +13,8 @@ pub fn sqrtf(x: f32) -> f32 {
     #[cfg(all(
         not(feature = "stable"),
         any(
-            all(target_arch = "x86", not(target_feature="soft_float")),
-            all(target_arch = "x86_64", not(target_feature="soft_float")),
+            all(target_arch = "x86", not(target_feature = "soft_float")),
+            all(target_arch = "x86_64", not(target_feature = "soft_float")),
             target_arch = "aarch64",
             target_arch = "wasm32",
             target_arch = "powerpc64"

From 0c504f8245fcc21acad22de7a5d112dcc9a0f7f9 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Sun, 25 Aug 2019 19:05:21 -0600
Subject: [PATCH 07/15] make core_intriniscs always available on Nightly

Also tell clippy to hush about some things.
---
 src/lib.rs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index b15857dbe..3debc5f49 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,10 +1,9 @@
 //! libm in pure Rust
 #![deny(warnings)]
 #![no_std]
-#![cfg_attr(
-    all(target_arch = "wasm32", not(feature = "stable")),
-    feature(core_intrinsics)
-)]
+#![cfg_attr(not(feature = "stable"), feature(core_intrinsics))]
+#![allow(clippy::unreadable_literal)]
+#![allow(clippy::many_single_char_names)]
 
 mod math;
 

From 162148adfb63b39fd1dc184316a0262a1746d998 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Fri, 30 Aug 2019 01:10:24 -0600
Subject: [PATCH 08/15] flip the stable/ unstable meaning

Closes https://github.com/rust-lang-nursery/libm/issues/212
---
 Cargo.toml                   | 12 ++++++++----
 crates/libm-bench/Cargo.toml |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 3e6817851..a8c0e629b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,9 +11,13 @@ version = "0.2.0"
 edition = "2018"
 
 [features]
-# only used to run our test suite
-default = ['stable']
-stable = []
+default = []
+
+# If you enable this feature then the crate will assume that you're building
+# using a Nightly compiler. This allows us to take advantage of unstable
+# features, some of which might never become stable (eg: calling some of the
+# LLVM intrinsics).
+unstable = []
 
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
@@ -30,7 +34,7 @@ members = [
 ]
 
 [dev-dependencies]
-no-panic = "0.1.8"
+no-panic = "0.1.11"
 
 [build-dependencies]
 rand = { version = "0.6.5", optional = true }
diff --git a/crates/libm-bench/Cargo.toml b/crates/libm-bench/Cargo.toml
index ba65dbd5f..b09db339b 100644
--- a/crates/libm-bench/Cargo.toml
+++ b/crates/libm-bench/Cargo.toml
@@ -12,4 +12,4 @@ paste = "0.1.5"
 
 [features]
 default = []
-stable = [ "libm/stable" ]
+unstable = [ "libm/unstable" ]

From 80665dc0ad51c6355e2f23fd43fa0e8d8de1fbcf Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Wed, 11 Sep 2019 11:02:23 -0600
Subject: [PATCH 09/15] update for the new "unstable" feature.

---
 src/lib.rs        | 5 +----
 src/math/sqrt.rs  | 2 +-
 src/math/sqrtf.rs | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index e228af9b3..0727c8e5f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,10 +1,7 @@
 //! libm in pure Rust
 #![deny(warnings)]
 #![no_std]
-#![cfg_attr(
-    all(target_arch = "wasm32", feature = "unstable"),
-    feature(core_intrinsics)
-)]
+#![cfg_attr(feature = "unstable", feature(core_intrinsics))]
 #![allow(clippy::unreadable_literal)]
 #![allow(clippy::many_single_char_names)]
 #![allow(clippy::needless_return)]
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 0564c3122..8a468b130 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -18,7 +18,7 @@ pub fn sqrt(x: f64) -> f64 {
     // selection for when to do this, because if the intrinsic usage ends up
     // calling back here it's infinite recursion.
     #[cfg(all(
-        not(feature = "stable"),
+        feature = "unstable",
         any(
             all(target_arch = "x86", not(target_feature = "soft_float")),
             all(target_arch = "x86_64", not(target_feature = "soft_float")),
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index 78eae3861..d0bb7101e 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -11,7 +11,7 @@ pub fn sqrtf(x: f32) -> f32 {
 
     // Nightly
     #[cfg(all(
-        not(feature = "stable"),
+        feature = "unstable",
         any(
             all(target_arch = "x86", not(target_feature = "soft_float")),
             all(target_arch = "x86_64", not(target_feature = "soft_float")),

From 719c6b72ae018759516ddd9535414e48462f7dc2 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Wed, 11 Sep 2019 11:09:38 -0600
Subject: [PATCH 10/15] fix the soft-float feature name

---
 src/math/sqrtf.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index d0bb7101e..cf9a94376 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -13,8 +13,8 @@ pub fn sqrtf(x: f32) -> f32 {
     #[cfg(all(
         feature = "unstable",
         any(
-            all(target_arch = "x86", not(target_feature = "soft_float")),
-            all(target_arch = "x86_64", not(target_feature = "soft_float")),
+            all(target_arch = "x86", not(target_feature = "soft-float")),
+            all(target_arch = "x86_64", not(target_feature = "soft-float")),
             target_arch = "aarch64",
             target_arch = "wasm32",
             target_arch = "powerpc64"

From 321eeb8039725a8dc0a25333e06ed9c0b028975f Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Wed, 11 Sep 2019 11:30:59 -0600
Subject: [PATCH 11/15] update when hardware is assumed to have sqrt support

---
 src/math/sqrt.rs  | 11 +++++++----
 src/math/sqrtf.rs |  7 +++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 8a468b130..f944710b3 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -20,11 +20,14 @@ pub fn sqrt(x: f64) -> f64 {
     #[cfg(all(
         feature = "unstable",
         any(
-            all(target_arch = "x86", not(target_feature = "soft_float")),
-            all(target_arch = "x86_64", not(target_feature = "soft_float")),
-            target_arch = "aarch64",
+            all(target_arch = "x86", not(target_feature = "soft-float")),
+            all(target_arch = "x86_64", not(target_feature = "soft-float")),
+            all(target_arch = "arm", not(target_feature = "soft-float")),
+            all(target_arch = "aarch64", not(target_feature = "soft-float")),
+            all(target_arch = "powerpc", target_feature = "hard-float"),
+            all(target_arch = "powerpc64", target_feature = "hard-float"),
+            all(target_arch = "risc", target_feature = "d"),
             target_arch = "wasm32",
-            target_arch = "powerpc64"
         )
     ))]
     {
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index cf9a94376..04702ff47 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -15,9 +15,12 @@ pub fn sqrtf(x: f32) -> f32 {
         any(
             all(target_arch = "x86", not(target_feature = "soft-float")),
             all(target_arch = "x86_64", not(target_feature = "soft-float")),
-            target_arch = "aarch64",
+            all(target_arch = "arm", not(target_feature = "soft-float")),
+            all(target_arch = "aarch64", not(target_feature = "soft-float")),
+            all(target_arch = "powerpc", target_feature = "hard-float"),
+            all(target_arch = "powerpc64", target_feature = "hard-float"),
+            all(target_arch = "risc", target_feature = "f"),
             target_arch = "wasm32",
-            target_arch = "powerpc64"
         )
     ))]
     {

From d59b2d4bf3ba354b4b77b0358fbe4e5bc2707dd9 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Wed, 11 Sep 2019 11:47:04 -0600
Subject: [PATCH 12/15] narrow down the panic check

CI thinks that there's a panic in here, so let's see if it thinks the panic is in the software part or the intrinsics part. Sadly, I don't have the arm7 stuff setup on my home machine so we're just letting the CI do this.
---
 src/math/sqrtf.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index 04702ff47..b43e797dc 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -3,7 +3,6 @@
 /// This function is intended to exactly match the
 /// [`sqrtf`](https://en.cppreference.com/w/c/numeric/math/sqrt) function as
 /// defined by the C/C++ spec.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 #[allow(unreachable_code)]
 pub fn sqrtf(x: f32) -> f32 {
     // See the notes in the `sqrt` function for an explanation of what's going on
@@ -57,6 +56,7 @@ pub fn sqrtf(x: f32) -> f32 {
  * is preserved.
  * ====================================================
  */
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 fn software_sqrtf(x: f32) -> f32 {
     const TINY: f32 = 1.0e-30;
 

From 04134b90670ef1cae5fb3e64c6551e3e28900b9c Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Wed, 11 Sep 2019 11:53:47 -0600
Subject: [PATCH 13/15] move the panic check to just our software func

---
 src/math/sqrt.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index f944710b3..e7454dda3 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -3,7 +3,6 @@
 /// This function is intended to exactly match the
 /// [`sqrt`](https://en.cppreference.com/w/c/numeric/math/sqrt) function as
 /// defined by the C/C++ spec.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 #[allow(unreachable_code)]
 pub fn sqrt(x: f64) -> f64 {
     // On most targets LLVM will issue a hardware sqrt instruction instead of a
@@ -128,6 +127,7 @@ pub fn sqrt(x: f64) -> f64 {
  *      sqrt(-ve) = NaN         ... with invalid signal
  *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
  */
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 fn software_sqrt(x: f64) -> f64 {
     use core::num::Wrapping;
 

From 0fb136df272196ee88ed309c2c78e3388508da92 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Wed, 11 Sep 2019 12:08:30 -0600
Subject: [PATCH 14/15] downgrade no-panic version to 0.1.8

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 3b04a4432..37aff9a76 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -28,7 +28,7 @@ members = [
 ]
 
 [dev-dependencies]
-no-panic = "0.1.11"
+no-panic = "0.1.8"
 
 [build-dependencies]
 rand = { version = "0.6.5", optional = true }

From 02e2eee9876c29cc93570d76cee91bb95c430715 Mon Sep 17 00:00:00 2001
From: Lokathor <zefria@gmail.com>
Date: Wed, 11 Sep 2019 12:25:17 -0600
Subject: [PATCH 15/15] on ARM, f64 doesn't always have hardware sqrt

---
 src/math/sqrt.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index e7454dda3..b2bcfbd2a 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -21,7 +21,6 @@ pub fn sqrt(x: f64) -> f64 {
         any(
             all(target_arch = "x86", not(target_feature = "soft-float")),
             all(target_arch = "x86_64", not(target_feature = "soft-float")),
-            all(target_arch = "arm", not(target_feature = "soft-float")),
             all(target_arch = "aarch64", not(target_feature = "soft-float")),
             all(target_arch = "powerpc", target_feature = "hard-float"),
             all(target_arch = "powerpc64", target_feature = "hard-float"),