Skip to content

Commit 29a035f

Browse files
authored
Rollup merge of rust-lang#61339 - jridgewell:pointer-alignment, r=BurntSushi
Optimize pointer alignment in utf8 validation This uses (and reuses) the u8 arrays's inherent block alignment when checking whether the current index is block aligned. I initially thought that this would just move the expensive `align_offset` call out of the while loop and replace it with a subtraction and bitwise AND. But it appears this optimizes much better, too... before: https://rust.godbolt.org/z/WIPvWl after: https://rust.godbolt.org/z/-jBPoW ## Benchmarks https://github.com/jridgewell/faster-from_utf8/tree/pointer-alignment ``` test from_utf8_2_bytes_fast ... bench: 310 ns/iter (+/- 42) = 1290 MB/s test from_utf8_2_bytes_regular ... bench: 309 ns/iter (+/- 24) = 1294 MB/s test from_utf8_3_bytes_fast ... bench: 1,027 ns/iter (+/- 62) = 1168 MB/s test from_utf8_3_bytes_regular ... bench: 1,513 ns/iter (+/- 611) = 793 MB/s test from_utf8_4_bytes_fast ... bench: 1,788 ns/iter (+/- 26) = 1342 MB/s test from_utf8_4_bytes_regular ... bench: 1,907 ns/iter (+/- 181) = 1258 MB/s test from_utf8_all_bytes_fast ... bench: 3,463 ns/iter (+/- 97) = 1155 MB/s test from_utf8_all_bytes_regular ... bench: 4,083 ns/iter (+/- 89) = 979 MB/s test from_utf8_ascii_fast ... bench: 88 ns/iter (+/- 4) = 28988 MB/s test from_utf8_ascii_regular ... bench: 88 ns/iter (+/- 8) = 28988 MB/s test from_utf8_cyr_fast ... bench: 7,707 ns/iter (+/- 531) = 665 MB/s test from_utf8_cyr_regular ... bench: 8,202 ns/iter (+/- 135) = 625 MB/s test from_utf8_enwik8_fast ... bench: 1,135,756 ns/iter (+/- 84,450) = 8804 MB/s test from_utf8_enwik8_regular ... bench: 1,145,468 ns/iter (+/- 79,601) = 8730 MB/s test from_utf8_jawik10_fast ... bench: 12,723,844 ns/iter (+/- 473,247) = 785 MB/s test from_utf8_jawik10_regular ... bench: 13,384,596 ns/iter (+/- 666,997) = 747 MB/s test from_utf8_mixed_fast ... bench: 2,321 ns/iter (+/- 123) = 2081 MB/s test from_utf8_mixed_regular ... bench: 2,702 ns/iter (+/- 408) = 1788 MB/s test from_utf8_mostlyasc_fast ... bench: 249 ns/iter (+/- 10) = 14666 MB/s test from_utf8_mostlyasc_regular ... bench: 276 ns/iter (+/- 5) = 13231 MB/s ```
2 parents 853f300 + 3d2c4ff commit 29a035f

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

src/libcore/str/mod.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -1422,6 +1422,7 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
14221422
let usize_bytes = mem::size_of::<usize>();
14231423
let ascii_block_size = 2 * usize_bytes;
14241424
let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1425+
let align = v.as_ptr().align_offset(usize_bytes);
14251426

14261427
while index < len {
14271428
let old_offset = index;
@@ -1501,12 +1502,8 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
15011502
// Ascii case, try to skip forward quickly.
15021503
// When the pointer is aligned, read 2 words of data per iteration
15031504
// until we find a word containing a non-ascii byte.
1504-
let ptr = v.as_ptr();
1505-
let align = unsafe {
1506-
// the offset is safe, because `index` is guaranteed inbounds
1507-
ptr.add(index).align_offset(usize_bytes)
1508-
};
1509-
if align == 0 {
1505+
if align.wrapping_sub(index) % usize_bytes == 0 {
1506+
let ptr = v.as_ptr();
15101507
while index < blocks_end {
15111508
unsafe {
15121509
let block = ptr.add(index) as *const usize;

0 commit comments

Comments
 (0)