Add 'unrolled' is_ascii_align_to benchmark, and move is_ascii benchmarks into own file

Thom Chiovoloni · Thom Chiovoloni · commit dc4a644980e9 · 2020-07-05T15:52:13.000-07:00
diff --git a/src/libcore/benches/ascii.rs b/src/libcore/benches/ascii.rs
@@ -1,3 +1,5 @@
+mod is_ascii;
+
 // Lower-case ASCII 'a' is the first byte that has its highest bit set
 // after wrap-adding 0x1F:
 //
@@ -59,48 +61,6 @@ macro_rules! benches {
             )+
         }
     };
-
-    // For some tests the vec allocation tends to dominate, so it can be avoided.
-    (@readonly $( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
-        benches!(@ro mod short_readonly SHORT $($name $arg $body)+);
-        benches!(@ro mod medium_readonly MEDIUM $($name $arg $body)+);
-        benches!(@ro mod long_readonly LONG $($name $arg $body)+);
-        // Add another `MEDIUM` bench, but trim the ends so that we can (try to)
-        // benchmark a case where the function has to handle misalignment.
-        mod medium_unaligned {
-            use super::*;
-            $(
-                #[bench]
-                fn $name(bencher: &mut Bencher) {
-                    bencher.bytes = MEDIUM.len() as u64 - 2;
-                    let mut vec = MEDIUM.as_bytes().to_vec();
-                    bencher.iter(|| {
-                        black_box(&mut vec);
-                        let $arg = black_box(&vec[1..(vec.len() - 1)]);
-                        black_box($body)
-                    })
-                }
-            )+
-        }
-    };
-    (@ro mod $mod_name: ident $input: ident $($name: ident $arg: ident $body: block)+) => {
-        mod $mod_name {
-            use super::*;
-
-            $(
-                #[bench]
-                fn $name(bencher: &mut Bencher) {
-                    bencher.bytes = $input.len() as u64;
-                    let mut vec = $input.as_bytes().to_vec();
-                    bencher.iter(|| {
-                        black_box(&mut vec);
-                        let $arg = black_box(&vec[..]);
-                        black_box($body)
-                    })
-                }
-            )+
-        }
-    };
 }
 
 use test::black_box;
@@ -287,40 +247,6 @@ benches! {
     is_ascii_control,
 }
 
-benches! {
-    @readonly
-    fn is_ascii_slice_libcore(bytes: &[u8]) {
-        bytes.is_ascii()
-    }
-
-    fn is_ascii_slice_iter_all(bytes: &[u8]) {
-        bytes.iter().all(|b| b.is_ascii())
-    }
-
-    fn is_ascii_slice_align_to(bytes: &[u8]) {
-        is_ascii_align_to_impl(bytes)
-    }
-}
-
-// Separate since it's easier to debug errors if they don't go through macro
-// expansion first.
-fn is_ascii_align_to_impl(bytes: &[u8]) -> bool {
-    if bytes.len() < core::mem::size_of::<usize>() {
-        return bytes.iter().all(|b| b.is_ascii());
-    }
-    // SAFETY: transmuting a sequence of `u8` to `usize` is always fine
-    let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
-    head.iter().all(|b| b.is_ascii())
-        && body.iter().all(|w| !contains_nonascii(*w))
-        && tail.iter().all(|b| b.is_ascii())
-}
-
-#[inline]
-fn contains_nonascii(v: usize) -> bool {
-    const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
-    (NONASCII_MASK & v) != 0
-}
-
 macro_rules! repeat {
     ($s: expr) => {
         concat!($s, $s, $s, $s, $s, $s, $s, $s, $s, $s)
diff --git a/src/libcore/benches/ascii/is_ascii.rs b/src/libcore/benches/ascii/is_ascii.rs
@@ -0,0 +1,81 @@
+use super::{LONG, MEDIUM, SHORT};
+use test::black_box;
+use test::Bencher;
+
+macro_rules! benches {
+    ($( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
+        benches!(mod short SHORT[..] $($name $arg $body)+);
+        benches!(mod medium MEDIUM[..] $($name $arg $body)+);
+        benches!(mod long LONG[..] $($name $arg $body)+);
+
+        benches!(mod unaligned_head MEDIUM[1..] $($name $arg $body)+);
+        benches!(mod unaligned_tail MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+);
+        benches!(mod unaligned_both MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+);
+    };
+
+    (mod $mod_name: ident $input: ident [$range: expr] $($name: ident $arg: ident $body: block)+) => {
+        mod $mod_name {
+            use super::*;
+            $(
+                #[bench]
+                fn $name(bencher: &mut Bencher) {
+                    bencher.bytes = $input[$range].len() as u64;
+                    let mut vec = $input.as_bytes().to_vec();
+                    bencher.iter(|| {
+                        black_box(&mut vec);
+                        let $arg = black_box(&vec[$range]);
+                        black_box($body)
+                    })
+                }
+            )+
+        }
+    };
+}
+
+benches! {
+    fn case00_libcore(bytes: &[u8]) {
+        bytes.is_ascii()
+    }
+
+    fn case01_iter_all(bytes: &[u8]) {
+        bytes.iter().all(|b| b.is_ascii())
+    }
+
+    fn case02_align_to(bytes: &[u8]) {
+        is_ascii_align_to(bytes)
+    }
+
+    fn case03_align_to_unrolled(bytes: &[u8]) {
+        is_ascii_align_to_unrolled(bytes)
+    }
+}
+
+// These are separate since it's easier to debug errors if they don't go through
+// macro expansion first.
+fn is_ascii_align_to(bytes: &[u8]) -> bool {
+    if bytes.len() < core::mem::size_of::<usize>() {
+        return bytes.iter().all(|b| b.is_ascii());
+    }
+    // SAFETY: transmuting a sequence of `u8` to `usize` is always fine
+    let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
+    head.iter().all(|b| b.is_ascii())
+        && body.iter().all(|w| !contains_nonascii(*w))
+        && tail.iter().all(|b| b.is_ascii())
+}
+
+fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool {
+    if bytes.len() < core::mem::size_of::<usize>() {
+        return bytes.iter().all(|b| b.is_ascii());
+    }
+    // SAFETY: transmuting a sequence of `u8` to `[usize; 2]` is always fine
+    let (head, body, tail) = unsafe { bytes.align_to::<[usize; 2]>() };
+    head.iter().all(|b| b.is_ascii())
+        && body.iter().all(|w| !contains_nonascii(w[0] | w[1]))
+        && tail.iter().all(|b| b.is_ascii())
+}
+
+#[inline]
+fn contains_nonascii(v: usize) -> bool {
+    const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
+    (NONASCII_MASK & v) != 0
+}