Skip to content

Commit 0ce0fed

Browse files
committed
Auto merge of #81358 - mcastorina:to-upper-lower-speed, r=joshtriplett
Add a check for ASCII characters in to_upper and to_lower This extra check has better performance. See discussion here: https://internals.rust-lang.org/t/to-upper-speed/13896 Thanks to `@gilescope` for helping discover and test this.
2 parents 2c74903 + 229fdf8 commit 0ce0fed

File tree

2 files changed

+44
-6
lines changed

2 files changed

+44
-6
lines changed

library/core/benches/char/methods.rs

+30
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,33 @@ fn bench_to_ascii_uppercase(b: &mut Bencher) {
4545
fn bench_to_ascii_lowercase(b: &mut Bencher) {
4646
b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_lowercase()).min())
4747
}
48+
49+
#[bench]
50+
fn bench_ascii_mix_to_uppercase(b: &mut Bencher) {
51+
b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count())
52+
}
53+
54+
#[bench]
55+
fn bench_ascii_mix_to_lowercase(b: &mut Bencher) {
56+
b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count())
57+
}
58+
59+
#[bench]
60+
fn bench_ascii_char_to_uppercase(b: &mut Bencher) {
61+
b.iter(|| (0..=127).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count())
62+
}
63+
64+
#[bench]
65+
fn bench_ascii_char_to_lowercase(b: &mut Bencher) {
66+
b.iter(|| (0..=127).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count())
67+
}
68+
69+
#[bench]
70+
fn bench_non_ascii_char_to_uppercase(b: &mut Bencher) {
71+
b.iter(|| (128..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count())
72+
}
73+
74+
#[bench]
75+
fn bench_non_ascii_char_to_lowercase(b: &mut Bencher) {
76+
b.iter(|| (128..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count())
77+
}

library/core/src/unicode/unicode_data.rs

+14-6
Original file line numberDiff line numberDiff line change
@@ -549,16 +549,24 @@ pub mod white_space {
549549
#[rustfmt::skip]
550550
pub mod conversions {
551551
pub fn to_lower(c: char) -> [char; 3] {
552-
match bsearch_case_table(c, LOWERCASE_TABLE) {
553-
None => [c, '\0', '\0'],
554-
Some(index) => LOWERCASE_TABLE[index].1,
552+
if c.is_ascii() {
553+
[(c as u8).to_ascii_lowercase() as char, '\0', '\0']
554+
} else {
555+
match bsearch_case_table(c, LOWERCASE_TABLE) {
556+
None => [c, '\0', '\0'],
557+
Some(index) => LOWERCASE_TABLE[index].1,
558+
}
555559
}
556560
}
557561

558562
pub fn to_upper(c: char) -> [char; 3] {
559-
match bsearch_case_table(c, UPPERCASE_TABLE) {
560-
None => [c, '\0', '\0'],
561-
Some(index) => UPPERCASE_TABLE[index].1,
563+
if c.is_ascii() {
564+
[(c as u8).to_ascii_uppercase() as char, '\0', '\0']
565+
} else {
566+
match bsearch_case_table(c, UPPERCASE_TABLE) {
567+
None => [c, '\0', '\0'],
568+
Some(index) => UPPERCASE_TABLE[index].1,
569+
}
562570
}
563571
}
564572

0 commit comments

Comments
 (0)