Skip to content

Commit 85f5dd4

Browse files
committed
Auto merge of rust-lang#50052 - nnethercote:char_lit, r=Mark-Simulacrum
Avoid allocating when parsing \u{...} literals. `char_lit` uses an allocation in order to ignore '_' chars in \u{...} literals. This patch changes it to not do that by processing the chars more directly. This improves various rustc-perf benchmark measurements by up to 6%, particularly regex, futures, clap, coercions, hyper, and encoding. rustc-perf results, on a stage 2 build with jemalloc disabled: <details> ``` regex-check avg: -5.4% min: -6.5% max: -2.7% futures-check avg: -3.5% min: -5.3% max: -1.7% regex-opt avg: -2.0% min: -5.1% max: -0.2% regex avg: -2.3% min: -5.0% max: -0.6% futures-opt avg: -3.0% min: -4.8% max: -1.1% futures avg: -3.1% min: -4.8% max: -1.3% clap-rs-check avg: -1.8% min: -3.5% max: -0.9% coercions-check avg: -2.0% min: -3.3% max: -1.0% hyper-check avg: -2.2% min: -3.1% max: -1.3% hyper avg: -1.3% min: -2.4% max: -0.3% hyper-opt avg: -0.9% min: -2.3% max: -0.1% coercions avg: -1.1% min: -2.2% max: -0.4% encoding-check avg: -1.7% min: -2.2% max: -0.9% clap-rs-opt avg: -0.7% min: -2.2% max: 0.0% coercions-opt avg: -1.2% min: -2.1% max: -0.3% clap-rs avg: -0.8% min: -1.9% max: -0.4% encoding-opt avg: -1.0% min: -1.9% max: -0.3% encoding avg: -1.1% min: -1.9% max: -0.4% piston-image-check avg: -0.7% min: -1.3% max: -0.3% inflate-opt avg: -0.3% min: -0.9% max: -0.0% piston-image avg: -0.3% min: -0.8% max: -0.1% piston-image-opt avg: -0.3% min: -0.7% max: -0.1% syn-check avg: -0.3% min: -0.6% max: -0.1% deep-vector avg: 0.1% min: -0.1% max: 0.5% syn-opt avg: -0.1% min: -0.4% max: 0.0% html5ever avg: -0.2% min: -0.4% max: -0.0% deep-vector-check avg: 0.0% min: -0.3% max: 0.3% syn avg: -0.2% min: -0.3% max: -0.1% html5ever-check avg: -0.3% min: -0.3% max: -0.2% issue-46449-check avg: -0.1% min: -0.2% max: 0.2% html5ever-opt avg: -0.0% min: -0.2% max: 0.1% deep-vector-opt avg: -0.0% min: -0.2% max: 0.1% issue-46449-opt avg: -0.0% min: -0.2% max: 0.1% unify-linearly-check avg: -0.0% min: -0.2% max: 0.1% helloworld-check avg: 0.0% min: -0.0% max: 0.2% parser-check avg: -0.0% min: -0.2% max: 0.0% inflate avg: 0.0% min: -0.0% max: 0.1% tokio-webpush-simple-check avg: -0.1% min: -0.1% max: -0.0% regression-31157-check avg: 0.0% min: -0.1% max: 0.1% issue-46449 avg: 0.0% min: -0.1% max: 0.1% tuple-stress-opt avg: 0.0% min: -0.0% max: 0.1% tuple-stress-check avg: -0.0% min: -0.1% max: 0.1% tuple-stress avg: 0.0% min: -0.0% max: 0.1% deeply-nested-check avg: 0.0% min: -0.0% max: 0.1% regression-31157 avg: -0.0% min: -0.1% max: 0.1% deeply-nested-opt avg: -0.0% min: -0.1% max: 0.1% parser-opt avg: -0.0% min: -0.1% max: 0.0% parser avg: 0.1% min: 0.0% max: 0.1% tokio-webpush-simple avg: -0.0% min: -0.1% max: 0.1% regression-31157-opt avg: -0.0% min: -0.1% max: 0.1% helloworld-opt avg: 0.0% min: -0.0% max: 0.1% unify-linearly-opt avg: 0.0% min: -0.0% max: 0.1% unused-warnings-check avg: 0.0% min: 0.0% max: 0.1% tokio-webpush-simple-opt avg: -0.0% min: -0.1% max: 0.0% helloworld avg: -0.0% min: -0.0% max: 0.1% unused-warnings avg: 0.0% min: -0.0% max: 0.0% deeply-nested avg: -0.0% min: -0.0% max: -0.0% unused-warnings-opt avg: 0.0% min: -0.0% max: 0.0% unify-linearly avg: 0.0% min: -0.0% max: 0.0% inflate-check avg: 0.0% min: -0.0% max: 0.0% ``` </details>
2 parents 1a44439 + 9f14502 commit 85f5dd4

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

src/libsyntax/parse/mod.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,16 @@ pub fn char_lit(lit: &str, diag: Option<(Span, &Handler)>) -> (char, isize) {
271271
'u' => {
272272
assert_eq!(lit.as_bytes()[2], b'{');
273273
let idx = lit.find('}').unwrap();
274-
let s = &lit[3..idx].chars().filter(|&c| c != '_').collect::<String>();
275-
let v = u32::from_str_radix(&s, 16).unwrap();
274+
275+
// All digits and '_' are ascii, so treat each byte as a char.
276+
let mut v: u32 = 0;
277+
for c in lit[3..idx].bytes() {
278+
let c = char::from(c);
279+
if c != '_' {
280+
let x = c.to_digit(16).unwrap();
281+
v = v.checked_mul(16).unwrap().checked_add(x).unwrap();
282+
}
283+
}
276284
let c = char::from_u32(v).unwrap_or_else(|| {
277285
if let Some((span, diag)) = diag {
278286
let mut diag = diag.struct_span_err(span, "invalid unicode character escape");

0 commit comments

Comments
 (0)