diff --git a/Cargo.lock b/Cargo.lock index 7c8f06a023969..89eb514917d6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4064,6 +4064,7 @@ name = "rustc_lexer" version = "0.0.0" dependencies = [ "expect-test", + "memchr", "unicode-properties", "unicode-xid", ] diff --git a/compiler/rustc_lexer/Cargo.toml b/compiler/rustc_lexer/Cargo.toml index 84b9e29229553..4b3492fdeda25 100644 --- a/compiler/rustc_lexer/Cargo.toml +++ b/compiler/rustc_lexer/Cargo.toml @@ -14,6 +14,7 @@ Rust lexer used by rustc. No stability guarantees are provided. # Note that this crate purposefully does not depend on other rustc crates [dependencies] +memchr = "2.7.4" unicode-xid = "0.2.0" [dependencies.unicode-properties] diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index d173c3ac0327b..e0e3bd0e30b16 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -103,4 +103,11 @@ impl<'a> Cursor<'a> { self.bump(); } } + + pub(crate) fn eat_until(&mut self, byte: u8) { + self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) { + Some(index) => self.as_str()[index..].chars(), + None => "".chars(), + } + } } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index aa4abf678b9f2..c63ab77decac9 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -483,7 +483,7 @@ impl Cursor<'_> { _ => None, }; - self.eat_while(|c| c != '\n'); + self.eat_until(b'\n'); LineComment { doc_style } } @@ -888,7 +888,7 @@ impl Cursor<'_> { // Skip the string contents and on each '#' character met, check if this is // a raw string termination. loop { - self.eat_while(|c| c != '"'); + self.eat_until(b'"'); if self.is_eof() { return Err(RawStrError::NoTerminator {