Skip to content

Commit dc4a644

Browse files
author
Thom Chiovoloni
committed
Add 'unrolled' is_ascii_align_to benchmark, and move is_ascii benchmarks into own file
1 parent 13e380d commit dc4a644

File tree

2 files changed

+83
-76
lines changed

2 files changed

+83
-76
lines changed

src/libcore/benches/ascii.rs

+2-76
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
mod is_ascii;
2+
13
// Lower-case ASCII 'a' is the first byte that has its highest bit set
24
// after wrap-adding 0x1F:
35
//
@@ -59,48 +61,6 @@ macro_rules! benches {
5961
)+
6062
}
6163
};
62-
63-
// For some tests the vec allocation tends to dominate, so it can be avoided.
64-
(@readonly $( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
65-
benches!(@ro mod short_readonly SHORT $($name $arg $body)+);
66-
benches!(@ro mod medium_readonly MEDIUM $($name $arg $body)+);
67-
benches!(@ro mod long_readonly LONG $($name $arg $body)+);
68-
// Add another `MEDIUM` bench, but trim the ends so that we can (try to)
69-
// benchmark a case where the function has to handle misalignment.
70-
mod medium_unaligned {
71-
use super::*;
72-
$(
73-
#[bench]
74-
fn $name(bencher: &mut Bencher) {
75-
bencher.bytes = MEDIUM.len() as u64 - 2;
76-
let mut vec = MEDIUM.as_bytes().to_vec();
77-
bencher.iter(|| {
78-
black_box(&mut vec);
79-
let $arg = black_box(&vec[1..(vec.len() - 1)]);
80-
black_box($body)
81-
})
82-
}
83-
)+
84-
}
85-
};
86-
(@ro mod $mod_name: ident $input: ident $($name: ident $arg: ident $body: block)+) => {
87-
mod $mod_name {
88-
use super::*;
89-
90-
$(
91-
#[bench]
92-
fn $name(bencher: &mut Bencher) {
93-
bencher.bytes = $input.len() as u64;
94-
let mut vec = $input.as_bytes().to_vec();
95-
bencher.iter(|| {
96-
black_box(&mut vec);
97-
let $arg = black_box(&vec[..]);
98-
black_box($body)
99-
})
100-
}
101-
)+
102-
}
103-
};
10464
}
10565

10666
use test::black_box;
@@ -287,40 +247,6 @@ benches! {
287247
is_ascii_control,
288248
}
289249

290-
benches! {
291-
@readonly
292-
fn is_ascii_slice_libcore(bytes: &[u8]) {
293-
bytes.is_ascii()
294-
}
295-
296-
fn is_ascii_slice_iter_all(bytes: &[u8]) {
297-
bytes.iter().all(|b| b.is_ascii())
298-
}
299-
300-
fn is_ascii_slice_align_to(bytes: &[u8]) {
301-
is_ascii_align_to_impl(bytes)
302-
}
303-
}
304-
305-
// Separate since it's easier to debug errors if they don't go through macro
306-
// expansion first.
307-
fn is_ascii_align_to_impl(bytes: &[u8]) -> bool {
308-
if bytes.len() < core::mem::size_of::<usize>() {
309-
return bytes.iter().all(|b| b.is_ascii());
310-
}
311-
// SAFETY: transmuting a sequence of `u8` to `usize` is always fine
312-
let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
313-
head.iter().all(|b| b.is_ascii())
314-
&& body.iter().all(|w| !contains_nonascii(*w))
315-
&& tail.iter().all(|b| b.is_ascii())
316-
}
317-
318-
#[inline]
319-
fn contains_nonascii(v: usize) -> bool {
320-
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
321-
(NONASCII_MASK & v) != 0
322-
}
323-
324250
macro_rules! repeat {
325251
($s: expr) => {
326252
concat!($s, $s, $s, $s, $s, $s, $s, $s, $s, $s)

src/libcore/benches/ascii/is_ascii.rs

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
use super::{LONG, MEDIUM, SHORT};
2+
use test::black_box;
3+
use test::Bencher;
4+
5+
macro_rules! benches {
6+
($( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
7+
benches!(mod short SHORT[..] $($name $arg $body)+);
8+
benches!(mod medium MEDIUM[..] $($name $arg $body)+);
9+
benches!(mod long LONG[..] $($name $arg $body)+);
10+
11+
benches!(mod unaligned_head MEDIUM[1..] $($name $arg $body)+);
12+
benches!(mod unaligned_tail MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+);
13+
benches!(mod unaligned_both MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+);
14+
};
15+
16+
(mod $mod_name: ident $input: ident [$range: expr] $($name: ident $arg: ident $body: block)+) => {
17+
mod $mod_name {
18+
use super::*;
19+
$(
20+
#[bench]
21+
fn $name(bencher: &mut Bencher) {
22+
bencher.bytes = $input[$range].len() as u64;
23+
let mut vec = $input.as_bytes().to_vec();
24+
bencher.iter(|| {
25+
black_box(&mut vec);
26+
let $arg = black_box(&vec[$range]);
27+
black_box($body)
28+
})
29+
}
30+
)+
31+
}
32+
};
33+
}
34+
35+
benches! {
36+
fn case00_libcore(bytes: &[u8]) {
37+
bytes.is_ascii()
38+
}
39+
40+
fn case01_iter_all(bytes: &[u8]) {
41+
bytes.iter().all(|b| b.is_ascii())
42+
}
43+
44+
fn case02_align_to(bytes: &[u8]) {
45+
is_ascii_align_to(bytes)
46+
}
47+
48+
fn case03_align_to_unrolled(bytes: &[u8]) {
49+
is_ascii_align_to_unrolled(bytes)
50+
}
51+
}
52+
53+
// These are separate since it's easier to debug errors if they don't go through
54+
// macro expansion first.
55+
fn is_ascii_align_to(bytes: &[u8]) -> bool {
56+
if bytes.len() < core::mem::size_of::<usize>() {
57+
return bytes.iter().all(|b| b.is_ascii());
58+
}
59+
// SAFETY: transmuting a sequence of `u8` to `usize` is always fine
60+
let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
61+
head.iter().all(|b| b.is_ascii())
62+
&& body.iter().all(|w| !contains_nonascii(*w))
63+
&& tail.iter().all(|b| b.is_ascii())
64+
}
65+
66+
fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool {
67+
if bytes.len() < core::mem::size_of::<usize>() {
68+
return bytes.iter().all(|b| b.is_ascii());
69+
}
70+
// SAFETY: transmuting a sequence of `u8` to `[usize; 2]` is always fine
71+
let (head, body, tail) = unsafe { bytes.align_to::<[usize; 2]>() };
72+
head.iter().all(|b| b.is_ascii())
73+
&& body.iter().all(|w| !contains_nonascii(w[0] | w[1]))
74+
&& tail.iter().all(|b| b.is_ascii())
75+
}
76+
77+
#[inline]
78+
fn contains_nonascii(v: usize) -> bool {
79+
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
80+
(NONASCII_MASK & v) != 0
81+
}

0 commit comments

Comments
 (0)